git.notmuchmail.org Git - tar/blob - gnu/regexec.c

   1 /* -*- buffer-read-only: t -*- vi: set ro: */
   2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
   3 /* Extended regular expression matching and search library.
   4    Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
   5    Software Foundation, Inc.
   6    This file is part of the GNU C Library.
   7    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3, or (at your option)
  12    any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License along
  20    with this program; if not, write to the Free Software Foundation,
  21    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
  22
  23 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
  24                                      Idx n) internal_function;
  25 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
  26 static void match_ctx_free (re_match_context_t *cache) internal_function;
  27 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
  28                                           Idx str_idx, Idx from, Idx to)
  29      internal_function;
  30 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
  31      internal_function;
  32 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
  33                                            Idx str_idx) internal_function;
  34 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
  35                                                     Idx node, Idx str_idx)
  36      internal_function;
  37 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
  38                            re_dfastate_t **limited_sts, Idx last_node,
  39                            Idx last_str_idx)
  40      internal_function;
  41 static reg_errcode_t re_search_internal (const regex_t *preg,
  42                                          const char *string, Idx length,
  43                                          Idx start, Idx last_start, Idx stop,
  44                                          size_t nmatch, regmatch_t pmatch[],
  45                                          int eflags) internal_function;
  46 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
  47                                   const char *string1, Idx length1,
  48                                   const char *string2, Idx length2,
  49                                   Idx start, regoff_t range,
  50                                   struct re_registers *regs,
  51                                   Idx stop, bool ret_len) internal_function;
  52 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
  53                                 const char *string, Idx length, Idx start,
  54                                 regoff_t range, Idx stop,
  55                                 struct re_registers *regs,
  56                                 bool ret_len) internal_function;
  57 static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
  58                                   Idx nregs, int regs_allocated)
  59      internal_function;
  60 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
  61      internal_function;
  62 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
  63                            Idx *p_match_first) internal_function;
  64 static Idx check_halt_state_context (const re_match_context_t *mctx,
  65                                      const re_dfastate_t *state, Idx idx)
  66      internal_function;
  67 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
  68                          regmatch_t *prev_idx_match, Idx cur_node,
  69                          Idx cur_idx, Idx nmatch) internal_function;
  70 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
  71                                       Idx str_idx, Idx dest_node, Idx nregs,
  72                                       regmatch_t *regs,
  73                                       re_node_set *eps_via_nodes)
  74      internal_function;
  75 static reg_errcode_t set_regs (const regex_t *preg,
  76                                const re_match_context_t *mctx,
  77                                size_t nmatch, regmatch_t *pmatch,
  78                                bool fl_backtrack) internal_function;
  79 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
  80      internal_function;
  81
  82 #ifdef RE_ENABLE_I18N
  83 static int sift_states_iter_mb (const re_match_context_t *mctx,
  84                                 re_sift_context_t *sctx,
  85                                 Idx node_idx, Idx str_idx, Idx max_str_idx)
  86      internal_function;
  87 #endif /* RE_ENABLE_I18N */
  88 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
  89                                            re_sift_context_t *sctx)
  90      internal_function;
  91 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
  92                                           re_sift_context_t *sctx, Idx str_idx,
  93                                           re_node_set *cur_dest)
  94      internal_function;
  95 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
  96                                               re_sift_context_t *sctx,
  97                                               Idx str_idx,
  98                                               re_node_set *dest_nodes)
  99      internal_function;
 100 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
 101                                             re_node_set *dest_nodes,
 102                                             const re_node_set *candidates)
 103      internal_function;
 104 static bool check_dst_limits (const re_match_context_t *mctx,
 105                               const re_node_set *limits,
 106                               Idx dst_node, Idx dst_idx, Idx src_node,
 107                               Idx src_idx) internal_function;
 108 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
 109                                         int boundaries, Idx subexp_idx,
 110                                         Idx from_node, Idx bkref_idx)
 111      internal_function;
 112 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
 113                                       Idx limit, Idx subexp_idx,
 114                                       Idx node, Idx str_idx,
 115                                       Idx bkref_idx) internal_function;
 116 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
 117                                           re_node_set *dest_nodes,
 118                                           const re_node_set *candidates,
 119                                           re_node_set *limits,
 120                                           struct re_backref_cache_entry *bkref_ents,
 121                                           Idx str_idx) internal_function;
 122 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
 123                                         re_sift_context_t *sctx,
 124                                         Idx str_idx, const re_node_set *candidates)
 125      internal_function;
 126 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
 127                                         re_dfastate_t **dst,
 128                                         re_dfastate_t **src, Idx num)
 129      internal_function;
 130 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
 131                                          re_match_context_t *mctx) internal_function;
 132 static re_dfastate_t *transit_state (reg_errcode_t *err,
 133                                      re_match_context_t *mctx,
 134                                      re_dfastate_t *state) internal_function;
 135 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
 136                                             re_match_context_t *mctx,
 137                                             re_dfastate_t *next_state)
 138      internal_function;
 139 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
 140                                                 re_node_set *cur_nodes,
 141                                                 Idx str_idx) internal_function;
 142 #if 0
 143 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
 144                                         re_match_context_t *mctx,
 145                                         re_dfastate_t *pstate)
 146      internal_function;
 147 #endif
 148 #ifdef RE_ENABLE_I18N
 149 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
 150                                        re_dfastate_t *pstate)
 151      internal_function;
 152 #endif /* RE_ENABLE_I18N */
 153 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
 154                                           const re_node_set *nodes)
 155      internal_function;
 156 static reg_errcode_t get_subexp (re_match_context_t *mctx,
 157                                  Idx bkref_node, Idx bkref_str_idx)
 158      internal_function;
 159 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
 160                                      const re_sub_match_top_t *sub_top,
 161                                      re_sub_match_last_t *sub_last,
 162                                      Idx bkref_node, Idx bkref_str)
 163      internal_function;
 164 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
 165                              Idx subexp_idx, int type) internal_function;
 166 static reg_errcode_t check_arrival (re_match_context_t *mctx,
 167                                     state_array_t *path, Idx top_node,
 168                                     Idx top_str, Idx last_node, Idx last_str,
 169                                     int type) internal_function;
 170 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
 171                                                    Idx str_idx,
 172                                                    re_node_set *cur_nodes,
 173                                                    re_node_set *next_nodes)
 174      internal_function;
 175 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
 176                                                re_node_set *cur_nodes,
 177                                                Idx ex_subexp, int type)
 178      internal_function;
 179 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
 180                                                    re_node_set *dst_nodes,
 181                                                    Idx target, Idx ex_subexp,
 182                                                    int type) internal_function;
 183 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
 184                                          re_node_set *cur_nodes, Idx cur_str,
 185                                          Idx subexp_num, int type)
 186      internal_function;
 187 static bool build_trtable (const re_dfa_t *dfa,
 188                            re_dfastate_t *state) internal_function;
 189 #ifdef RE_ENABLE_I18N
 190 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
 191                                     const re_string_t *input, Idx idx)
 192      internal_function;
 193 # ifdef _LIBC
 194 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
 195                                                    size_t name_len)
 196      internal_function;
 197 # endif /* _LIBC */
 198 #endif /* RE_ENABLE_I18N */
 199 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
 200                                        const re_dfastate_t *state,
 201                                        re_node_set *states_node,
 202                                        bitset_t *states_ch) internal_function;
 203 static bool check_node_accept (const re_match_context_t *mctx,
 204                                const re_token_t *node, Idx idx)
 205      internal_function;
 206 static reg_errcode_t extend_buffers (re_match_context_t *mctx)
 207      internal_function;
 208 \f
 209 /* Entry point for POSIX code.  */
 210
 211 /* regexec searches for a given pattern, specified by PREG, in the
 212    string STRING.
 213
 214    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
 215    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
 216    least NMATCH elements, and we set them to the offsets of the
 217    corresponding matched substrings.
 218
 219    EFLAGS specifies `execution flags' which affect matching: if
 220    REG_NOTBOL is set, then ^ does not match at the beginning of the
 221    string; if REG_NOTEOL is set, then $ does not match at the end.
 222
 223    We return 0 if we find a match and REG_NOMATCH if not.  */
 224
 225 int
 226 regexec (preg, string, nmatch, pmatch, eflags)
 227     const regex_t *_Restrict_ preg;
 228     const char *_Restrict_ string;
 229     size_t nmatch;
 230     regmatch_t pmatch[_Restrict_arr_];
 231     int eflags;
 232 {
 233   reg_errcode_t err;
 234   Idx start, length;
 235 #ifdef _LIBC
 236   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 237 #endif
 238
 239   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
 240     return REG_BADPAT;
 241
 242   if (eflags & REG_STARTEND)
 243     {
 244       start = pmatch[0].rm_so;
 245       length = pmatch[0].rm_eo;
 246     }
 247   else
 248     {
 249       start = 0;
 250       length = strlen (string);
 251     }
 252
 253   __libc_lock_lock (dfa->lock);
 254   if (preg->no_sub)
 255     err = re_search_internal (preg, string, length, start, length,
 256                               length, 0, NULL, eflags);
 257   else
 258     err = re_search_internal (preg, string, length, start, length,
 259                               length, nmatch, pmatch, eflags);
 260   __libc_lock_unlock (dfa->lock);
 261   return err != REG_NOERROR;
 262 }
 263
 264 #ifdef _LIBC
 265 # include <shlib-compat.h>
 266 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
 267
 268 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
 269 __typeof__ (__regexec) __compat_regexec;
 270
 271 int
 272 attribute_compat_text_section
 273 __compat_regexec (const regex_t *_Restrict_ preg,
 274                   const char *_Restrict_ string, size_t nmatch,
 275                   regmatch_t pmatch[], int eflags)
 276 {
 277   return regexec (preg, string, nmatch, pmatch,
 278                   eflags & (REG_NOTBOL | REG_NOTEOL));
 279 }
 280 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
 281 # endif
 282 #endif
 283
 284 /* Entry points for GNU code.  */
 285
 286 /* re_match, re_search, re_match_2, re_search_2
 287
 288    The former two functions operate on STRING with length LENGTH,
 289    while the later two operate on concatenation of STRING1 and STRING2
 290    with lengths LENGTH1 and LENGTH2, respectively.
 291
 292    re_match() matches the compiled pattern in BUFP against the string,
 293    starting at index START.
 294
 295    re_search() first tries matching at index START, then it tries to match
 296    starting from index START + 1, and so on.  The last start position tried
 297    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
 298    way as re_match().)
 299
 300    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
 301    the first STOP characters of the concatenation of the strings should be
 302    concerned.
 303
 304    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
 305    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
 306    computed relative to the concatenation, not relative to the individual
 307    strings.)
 308
 309    On success, re_match* functions return the length of the match, re_search*
 310    return the position of the start of the match.  Return value -1 means no
 311    match was found and -2 indicates an internal error.  */
 312
 313 regoff_t
 314 re_match (bufp, string, length, start, regs)
 315     struct re_pattern_buffer *bufp;
 316     const char *string;
 317     Idx length, start;
 318     struct re_registers *regs;
 319 {
 320   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
 321 }
 322 #ifdef _LIBC
 323 weak_alias (__re_match, re_match)
 324 #endif
 325
 326 regoff_t
 327 re_search (bufp, string, length, start, range, regs)
 328     struct re_pattern_buffer *bufp;
 329     const char *string;
 330     Idx length, start;
 331     regoff_t range;
 332     struct re_registers *regs;
 333 {
 334   return re_search_stub (bufp, string, length, start, range, length, regs,
 335                          false);
 336 }
 337 #ifdef _LIBC
 338 weak_alias (__re_search, re_search)
 339 #endif
 340
 341 regoff_t
 342 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
 343     struct re_pattern_buffer *bufp;
 344     const char *string1, *string2;
 345     Idx length1, length2, start, stop;
 346     struct re_registers *regs;
 347 {
 348   return re_search_2_stub (bufp, string1, length1, string2, length2,
 349                            start, 0, regs, stop, true);
 350 }
 351 #ifdef _LIBC
 352 weak_alias (__re_match_2, re_match_2)
 353 #endif
 354
 355 regoff_t
 356 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
 357     struct re_pattern_buffer *bufp;
 358     const char *string1, *string2;
 359     Idx length1, length2, start, stop;
 360     regoff_t range;
 361     struct re_registers *regs;
 362 {
 363   return re_search_2_stub (bufp, string1, length1, string2, length2,
 364                            start, range, regs, stop, false);
 365 }
 366 #ifdef _LIBC
 367 weak_alias (__re_search_2, re_search_2)
 368 #endif
 369
 370 static regoff_t
 371 internal_function
 372 re_search_2_stub (struct re_pattern_buffer *bufp,
 373                   const char *string1, Idx length1,
 374                   const char *string2, Idx length2,
 375                   Idx start, regoff_t range, struct re_registers *regs,
 376                   Idx stop, bool ret_len)
 377 {
 378   const char *str;
 379   regoff_t rval;
 380   Idx len = length1 + length2;
 381   char *s = NULL;
 382
 383   if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
 384     return -2;
 385
 386   /* Concatenate the strings.  */
 387   if (length2 > 0)
 388     if (length1 > 0)
 389       {
 390         s = re_malloc (char, len);
 391
 392         if (BE (s == NULL, 0))
 393           return -2;
 394 #ifdef _LIBC
 395         memcpy (__mempcpy (s, string1, length1), string2, length2);
 396 #else
 397         memcpy (s, string1, length1);
 398         memcpy (s + length1, string2, length2);
 399 #endif
 400         str = s;
 401       }
 402     else
 403       str = string2;
 404   else
 405     str = string1;
 406
 407   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
 408                          ret_len);
 409   re_free (s);
 410   return rval;
 411 }
 412
 413 /* The parameters have the same meaning as those of re_search.
 414    Additional parameters:
 415    If RET_LEN is true the length of the match is returned (re_match style);
 416    otherwise the position of the match is returned.  */
 417
 418 static regoff_t
 419 internal_function
 420 re_search_stub (struct re_pattern_buffer *bufp,
 421                 const char *string, Idx length,
 422                 Idx start, regoff_t range, Idx stop, struct re_registers *regs,
 423                 bool ret_len)
 424 {
 425   reg_errcode_t result;
 426   regmatch_t *pmatch;
 427   Idx nregs;
 428   regoff_t rval;
 429   int eflags = 0;
 430 #ifdef _LIBC
 431   re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
 432 #endif
 433   Idx last_start = start + range;
 434
 435   /* Check for out-of-range.  */
 436   if (BE (start < 0 || start > length, 0))
 437     return -1;
 438   if (BE (length < last_start || (0 <= range && last_start < start), 0))
 439     last_start = length;
 440   else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0))
 441     last_start = 0;
 442
 443   __libc_lock_lock (dfa->lock);
 444
 445   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
 446   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
 447
 448   /* Compile fastmap if we haven't yet.  */
 449   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
 450     re_compile_fastmap (bufp);
 451
 452   if (BE (bufp->no_sub, 0))
 453     regs = NULL;
 454
 455   /* We need at least 1 register.  */
 456   if (regs == NULL)
 457     nregs = 1;
 458   else if (BE (bufp->regs_allocated == REGS_FIXED
 459                && regs->num_regs <= bufp->re_nsub, 0))
 460     {
 461       nregs = regs->num_regs;
 462       if (BE (nregs < 1, 0))
 463         {
 464           /* Nothing can be copied to regs.  */
 465           regs = NULL;
 466           nregs = 1;
 467         }
 468     }
 469   else
 470     nregs = bufp->re_nsub + 1;
 471   pmatch = re_malloc (regmatch_t, nregs);
 472   if (BE (pmatch == NULL, 0))
 473     {
 474       rval = -2;
 475       goto out;
 476     }
 477
 478   result = re_search_internal (bufp, string, length, start, last_start, stop,
 479                                nregs, pmatch, eflags);
 480
 481   rval = 0;
 482
 483   /* I hope we needn't fill ther regs with -1's when no match was found.  */
 484   if (result != REG_NOERROR)
 485     rval = -1;
 486   else if (regs != NULL)
 487     {
 488       /* If caller wants register contents data back, copy them.  */
 489       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
 490                                            bufp->regs_allocated);
 491       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
 492         rval = -2;
 493     }
 494
 495   if (BE (rval == 0, 1))
 496     {
 497       if (ret_len)
 498         {
 499           assert (pmatch[0].rm_so == start);
 500           rval = pmatch[0].rm_eo - start;
 501         }
 502       else
 503         rval = pmatch[0].rm_so;
 504     }
 505   re_free (pmatch);
 506  out:
 507   __libc_lock_unlock (dfa->lock);
 508   return rval;
 509 }
 510
 511 static unsigned int
 512 internal_function
 513 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
 514               int regs_allocated)
 515 {
 516   int rval = REGS_REALLOCATE;
 517   Idx i;
 518   Idx need_regs = nregs + 1;
 519   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
 520      uses.  */
 521
 522   /* Have the register data arrays been allocated?  */
 523   if (regs_allocated == REGS_UNALLOCATED)
 524     { /* No.  So allocate them with malloc.  */
 525       regs->start = re_malloc (regoff_t, need_regs);
 526       if (BE (regs->start == NULL, 0))
 527         return REGS_UNALLOCATED;
 528       regs->end = re_malloc (regoff_t, need_regs);
 529       if (BE (regs->end == NULL, 0))
 530         {
 531           re_free (regs->start);
 532           return REGS_UNALLOCATED;
 533         }
 534       regs->num_regs = need_regs;
 535     }
 536   else if (regs_allocated == REGS_REALLOCATE)
 537     { /* Yes.  If we need more elements than were already
 538          allocated, reallocate them.  If we need fewer, just
 539          leave it alone.  */
 540       if (BE (need_regs > regs->num_regs, 0))
 541         {
 542           regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
 543           regoff_t *new_end;
 544           if (BE (new_start == NULL, 0))
 545             return REGS_UNALLOCATED;
 546           new_end = re_realloc (regs->end, regoff_t, need_regs);
 547           if (BE (new_end == NULL, 0))
 548             {
 549               re_free (new_start);
 550               return REGS_UNALLOCATED;
 551             }
 552           regs->start = new_start;
 553           regs->end = new_end;
 554           regs->num_regs = need_regs;
 555         }
 556     }
 557   else
 558     {
 559       assert (regs_allocated == REGS_FIXED);
 560       /* This function may not be called with REGS_FIXED and nregs too big.  */
 561       assert (regs->num_regs >= nregs);
 562       rval = REGS_FIXED;
 563     }
 564
 565   /* Copy the regs.  */
 566   for (i = 0; i < nregs; ++i)
 567     {
 568       regs->start[i] = pmatch[i].rm_so;
 569       regs->end[i] = pmatch[i].rm_eo;
 570     }
 571   for ( ; i < regs->num_regs; ++i)
 572     regs->start[i] = regs->end[i] = -1;
 573
 574   return rval;
 575 }
 576
 577 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
 578    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
 579    this memory for recording register information.  STARTS and ENDS
 580    must be allocated using the malloc library routine, and must each
 581    be at least NUM_REGS * sizeof (regoff_t) bytes long.
 582
 583    If NUM_REGS == 0, then subsequent matches should allocate their own
 584    register data.
 585
 586    Unless this function is called, the first search or match using
 587    PATTERN_BUFFER will allocate its own register data, without
 588    freeing the old data.  */
 589
 590 void
 591 re_set_registers (bufp, regs, num_regs, starts, ends)
 592     struct re_pattern_buffer *bufp;
 593     struct re_registers *regs;
 594     __re_size_t num_regs;
 595     regoff_t *starts, *ends;
 596 {
 597   if (num_regs)
 598     {
 599       bufp->regs_allocated = REGS_REALLOCATE;
 600       regs->num_regs = num_regs;
 601       regs->start = starts;
 602       regs->end = ends;
 603     }
 604   else
 605     {
 606       bufp->regs_allocated = REGS_UNALLOCATED;
 607       regs->num_regs = 0;
 608       regs->start = regs->end = NULL;
 609     }
 610 }
 611 #ifdef _LIBC
 612 weak_alias (__re_set_registers, re_set_registers)
 613 #endif
 614 \f
 615 /* Entry points compatible with 4.2 BSD regex library.  We don't define
 616    them unless specifically requested.  */
 617
 618 #if defined _REGEX_RE_COMP || defined _LIBC
 619 int
 620 # ifdef _LIBC
 621 weak_function
 622 # endif
 623 re_exec (s)
 624      const char *s;
 625 {
 626   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
 627 }
 628 #endif /* _REGEX_RE_COMP */
 629 \f
 630 /* Internal entry point.  */
 631
 632 /* Searches for a compiled pattern PREG in the string STRING, whose
 633    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
 634    meaning as with regexec.  LAST_START is START + RANGE, where
 635    START and RANGE have the same meaning as with re_search.
 636    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
 637    otherwise return the error code.
 638    Note: We assume front end functions already check ranges.
 639    (0 <= LAST_START && LAST_START <= LENGTH)  */
 640
 641 static reg_errcode_t
 642 internal_function __attribute_warn_unused_result__
 643 re_search_internal (const regex_t *preg,
 644                     const char *string, Idx length,
 645                     Idx start, Idx last_start, Idx stop,
 646                     size_t nmatch, regmatch_t pmatch[],
 647                     int eflags)
 648 {
 649   reg_errcode_t err;
 650   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
 651   Idx left_lim, right_lim;
 652   int incr;
 653   bool fl_longest_match;
 654   int match_kind;
 655   Idx match_first;
 656   Idx match_last = REG_MISSING;
 657   Idx extra_nmatch;
 658   bool sb;
 659   int ch;
 660 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
 661   re_match_context_t mctx = { .dfa = dfa };
 662 #else
 663   re_match_context_t mctx;
 664 #endif
 665   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
 666                     && start != last_start && !preg->can_be_null)
 667                    ? preg->fastmap : NULL);
 668   RE_TRANSLATE_TYPE t = preg->translate;
 669
 670 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
 671   memset (&mctx, '\0', sizeof (re_match_context_t));
 672   mctx.dfa = dfa;
 673 #endif
 674
 675   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
 676   nmatch -= extra_nmatch;
 677
 678   /* Check if the DFA haven't been compiled.  */
 679   if (BE (preg->used == 0 || dfa->init_state == NULL
 680           || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
 681           || dfa->init_state_begbuf == NULL, 0))
 682     return REG_NOMATCH;
 683
 684 #ifdef DEBUG
 685   /* We assume front-end functions already check them.  */
 686   assert (0 <= last_start && last_start <= length);
 687 #endif
 688
 689   /* If initial states with non-begbuf contexts have no elements,
 690      the regex must be anchored.  If preg->newline_anchor is set,
 691      we'll never use init_state_nl, so do not check it.  */
 692   if (dfa->init_state->nodes.nelem == 0
 693       && dfa->init_state_word->nodes.nelem == 0
 694       && (dfa->init_state_nl->nodes.nelem == 0
 695           || !preg->newline_anchor))
 696     {
 697       if (start != 0 && last_start != 0)
 698         return REG_NOMATCH;
 699       start = last_start = 0;
 700     }
 701
 702   /* We must check the longest matching, if nmatch > 0.  */
 703   fl_longest_match = (nmatch != 0 || dfa->nbackref);
 704
 705   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
 706                             preg->translate, (preg->syntax & RE_ICASE) != 0,
 707                             dfa);
 708   if (BE (err != REG_NOERROR, 0))
 709     goto free_return;
 710   mctx.input.stop = stop;
 711   mctx.input.raw_stop = stop;
 712   mctx.input.newline_anchor = preg->newline_anchor;
 713
 714   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
 715   if (BE (err != REG_NOERROR, 0))
 716     goto free_return;
 717
 718   /* We will log all the DFA states through which the dfa pass,
 719      if nmatch > 1, or this dfa has "multibyte node", which is a
 720      back-reference or a node which can accept multibyte character or
 721      multi character collating element.  */
 722   if (nmatch > 1 || dfa->has_mb_node)
 723     {
 724       /* Avoid overflow.  */
 725       if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
 726         {
 727           err = REG_ESPACE;
 728           goto free_return;
 729         }
 730
 731       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
 732       if (BE (mctx.state_log == NULL, 0))
 733         {
 734           err = REG_ESPACE;
 735           goto free_return;
 736         }
 737     }
 738   else
 739     mctx.state_log = NULL;
 740
 741   match_first = start;
 742   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
 743                            : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
 744
 745   /* Check incrementally whether of not the input string match.  */
 746   incr = (last_start < start) ? -1 : 1;
 747   left_lim = (last_start < start) ? last_start : start;
 748   right_lim = (last_start < start) ? start : last_start;
 749   sb = dfa->mb_cur_max == 1;
 750   match_kind =
 751     (fastmap
 752      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
 753         | (start <= last_start ? 2 : 0)
 754         | (t != NULL ? 1 : 0))
 755      : 8);
 756
 757   for (;; match_first += incr)
 758     {
 759       err = REG_NOMATCH;
 760       if (match_first < left_lim || right_lim < match_first)
 761         goto free_return;
 762
 763       /* Advance as rapidly as possible through the string, until we
 764          find a plausible place to start matching.  This may be done
 765          with varying efficiency, so there are various possibilities:
 766          only the most common of them are specialized, in order to
 767          save on code size.  We use a switch statement for speed.  */
 768       switch (match_kind)
 769         {
 770         case 8:
 771           /* No fastmap.  */
 772           break;
 773
 774         case 7:
 775           /* Fastmap with single-byte translation, match forward.  */
 776           while (BE (match_first < right_lim, 1)
 777                  && !fastmap[t[(unsigned char) string[match_first]]])
 778             ++match_first;
 779           goto forward_match_found_start_or_reached_end;
 780
 781         case 6:
 782           /* Fastmap without translation, match forward.  */
 783           while (BE (match_first < right_lim, 1)
 784                  && !fastmap[(unsigned char) string[match_first]])
 785             ++match_first;
 786
 787         forward_match_found_start_or_reached_end:
 788           if (BE (match_first == right_lim, 0))
 789             {
 790               ch = match_first >= length
 791                        ? 0 : (unsigned char) string[match_first];
 792               if (!fastmap[t ? t[ch] : ch])
 793                 goto free_return;
 794             }
 795           break;
 796
 797         case 4:
 798         case 5:
 799           /* Fastmap without multi-byte translation, match backwards.  */
 800           while (match_first >= left_lim)
 801             {
 802               ch = match_first >= length
 803                        ? 0 : (unsigned char) string[match_first];
 804               if (fastmap[t ? t[ch] : ch])
 805                 break;
 806               --match_first;
 807             }
 808           if (match_first < left_lim)
 809             goto free_return;
 810           break;
 811
 812         default:
 813           /* In this case, we can't determine easily the current byte,
 814              since it might be a component byte of a multibyte
 815              character.  Then we use the constructed buffer instead.  */
 816           for (;;)
 817             {
 818               /* If MATCH_FIRST is out of the valid range, reconstruct the
 819                  buffers.  */
 820               __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
 821               if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
 822                 {
 823                   err = re_string_reconstruct (&mctx.input, match_first,
 824                                                eflags);
 825                   if (BE (err != REG_NOERROR, 0))
 826                     goto free_return;
 827
 828                   offset = match_first - mctx.input.raw_mbs_idx;
 829                 }
 830               /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
 831                  Note that MATCH_FIRST must not be smaller than 0.  */
 832               ch = (match_first >= length
 833                     ? 0 : re_string_byte_at (&mctx.input, offset));
 834               if (fastmap[ch])
 835                 break;
 836               match_first += incr;
 837               if (match_first < left_lim || match_first > right_lim)
 838                 {
 839                   err = REG_NOMATCH;
 840                   goto free_return;
 841                 }
 842             }
 843           break;
 844         }
 845
 846       /* Reconstruct the buffers so that the matcher can assume that
 847          the matching starts from the beginning of the buffer.  */
 848       err = re_string_reconstruct (&mctx.input, match_first, eflags);
 849       if (BE (err != REG_NOERROR, 0))
 850         goto free_return;
 851
 852 #ifdef RE_ENABLE_I18N
 853      /* Don't consider this char as a possible match start if it part,
 854         yet isn't the head, of a multibyte character.  */
 855       if (!sb && !re_string_first_byte (&mctx.input, 0))
 856         continue;
 857 #endif
 858
 859       /* It seems to be appropriate one, then use the matcher.  */
 860       /* We assume that the matching starts from 0.  */
 861       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
 862       match_last = check_matching (&mctx, fl_longest_match,
 863                                    start <= last_start ? &match_first : NULL);
 864       if (match_last != REG_MISSING)
 865         {
 866           if (BE (match_last == REG_ERROR, 0))
 867             {
 868               err = REG_ESPACE;
 869               goto free_return;
 870             }
 871           else
 872             {
 873               mctx.match_last = match_last;
 874               if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
 875                 {
 876                   re_dfastate_t *pstate = mctx.state_log[match_last];
 877                   mctx.last_node = check_halt_state_context (&mctx, pstate,
 878                                                              match_last);
 879                 }
 880               if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
 881                   || dfa->nbackref)
 882                 {
 883                   err = prune_impossible_nodes (&mctx);
 884                   if (err == REG_NOERROR)
 885                     break;
 886                   if (BE (err != REG_NOMATCH, 0))
 887                     goto free_return;
 888                   match_last = REG_MISSING;
 889                 }
 890               else
 891                 break; /* We found a match.  */
 892             }
 893         }
 894
 895       match_ctx_clean (&mctx);
 896     }
 897
 898 #ifdef DEBUG
 899   assert (match_last != REG_MISSING);
 900   assert (err == REG_NOERROR);
 901 #endif
 902
 903   /* Set pmatch[] if we need.  */
 904   if (nmatch > 0)
 905     {
 906       Idx reg_idx;
 907
 908       /* Initialize registers.  */
 909       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
 910         pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
 911
 912       /* Set the points where matching start/end.  */
 913       pmatch[0].rm_so = 0;
 914       pmatch[0].rm_eo = mctx.match_last;
 915       /* FIXME: This function should fail if mctx.match_last exceeds
 916          the maximum possible regoff_t value.  We need a new error
 917          code REG_OVERFLOW.  */
 918
 919       if (!preg->no_sub && nmatch > 1)
 920         {
 921           err = set_regs (preg, &mctx, nmatch, pmatch,
 922                           dfa->has_plural_match && dfa->nbackref > 0);
 923           if (BE (err != REG_NOERROR, 0))
 924             goto free_return;
 925         }
 926
 927       /* At last, add the offset to the each registers, since we slided
 928          the buffers so that we could assume that the matching starts
 929          from 0.  */
 930       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 931         if (pmatch[reg_idx].rm_so != -1)
 932           {
 933 #ifdef RE_ENABLE_I18N
 934             if (BE (mctx.input.offsets_needed != 0, 0))
 935               {
 936                 pmatch[reg_idx].rm_so =
 937                   (pmatch[reg_idx].rm_so == mctx.input.valid_len
 938                    ? mctx.input.valid_raw_len
 939                    : mctx.input.offsets[pmatch[reg_idx].rm_so]);
 940                 pmatch[reg_idx].rm_eo =
 941                   (pmatch[reg_idx].rm_eo == mctx.input.valid_len
 942                    ? mctx.input.valid_raw_len
 943                    : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
 944               }
 945 #else
 946             assert (mctx.input.offsets_needed == 0);
 947 #endif
 948             pmatch[reg_idx].rm_so += match_first;
 949             pmatch[reg_idx].rm_eo += match_first;
 950           }
 951       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
 952         {
 953           pmatch[nmatch + reg_idx].rm_so = -1;
 954           pmatch[nmatch + reg_idx].rm_eo = -1;
 955         }
 956
 957       if (dfa->subexp_map)
 958         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
 959           if (dfa->subexp_map[reg_idx] != reg_idx)
 960             {
 961               pmatch[reg_idx + 1].rm_so
 962                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
 963               pmatch[reg_idx + 1].rm_eo
 964                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
 965             }
 966     }
 967
 968  free_return:
 969   re_free (mctx.state_log);
 970   if (dfa->nbackref)
 971     match_ctx_free (&mctx);
 972   re_string_destruct (&mctx.input);
 973   return err;
 974 }
 975
 976 static reg_errcode_t
 977 internal_function __attribute_warn_unused_result__
 978 prune_impossible_nodes (re_match_context_t *mctx)
 979 {
 980   const re_dfa_t *const dfa = mctx->dfa;
 981   Idx halt_node, match_last;
 982   reg_errcode_t ret;
 983   re_dfastate_t **sifted_states;
 984   re_dfastate_t **lim_states = NULL;
 985   re_sift_context_t sctx;
 986 #ifdef DEBUG
 987   assert (mctx->state_log != NULL);
 988 #endif
 989   match_last = mctx->match_last;
 990   halt_node = mctx->last_node;
 991
 992   /* Avoid overflow.  */
 993   if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
 994     return REG_ESPACE;
 995
 996   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
 997   if (BE (sifted_states == NULL, 0))
 998     {
 999       ret = REG_ESPACE;
1000       goto free_return;
1001     }
1002   if (dfa->nbackref)
1003     {
1004       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
1005       if (BE (lim_states == NULL, 0))
1006         {
1007           ret = REG_ESPACE;
1008           goto free_return;
1009         }
1010       while (1)
1011         {
1012           memset (lim_states, '\0',
1013                   sizeof (re_dfastate_t *) * (match_last + 1));
1014           sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
1015                          match_last);
1016           ret = sift_states_backward (mctx, &sctx);
1017           re_node_set_free (&sctx.limits);
1018           if (BE (ret != REG_NOERROR, 0))
1019               goto free_return;
1020           if (sifted_states[0] != NULL || lim_states[0] != NULL)
1021             break;
1022           do
1023             {
1024               --match_last;
1025               if (! REG_VALID_INDEX (match_last))
1026                 {
1027                   ret = REG_NOMATCH;
1028                   goto free_return;
1029                 }
1030             } while (mctx->state_log[match_last] == NULL
1031                      || !mctx->state_log[match_last]->halt);
1032           halt_node = check_halt_state_context (mctx,
1033                                                 mctx->state_log[match_last],
1034                                                 match_last);
1035         }
1036       ret = merge_state_array (dfa, sifted_states, lim_states,
1037                                match_last + 1);
1038       re_free (lim_states);
1039       lim_states = NULL;
1040       if (BE (ret != REG_NOERROR, 0))
1041         goto free_return;
1042     }
1043   else
1044     {
1045       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1046       ret = sift_states_backward (mctx, &sctx);
1047       re_node_set_free (&sctx.limits);
1048       if (BE (ret != REG_NOERROR, 0))
1049         goto free_return;
1050       if (sifted_states[0] == NULL)
1051         {
1052           ret = REG_NOMATCH;
1053           goto free_return;
1054         }
1055     }
1056   re_free (mctx->state_log);
1057   mctx->state_log = sifted_states;
1058   sifted_states = NULL;
1059   mctx->last_node = halt_node;
1060   mctx->match_last = match_last;
1061   ret = REG_NOERROR;
1062  free_return:
1063   re_free (sifted_states);
1064   re_free (lim_states);
1065   return ret;
1066 }
1067
1068 /* Acquire an initial state and return it.
1069    We must select appropriate initial state depending on the context,
1070    since initial states may have constraints like "\<", "^", etc..  */
1071
1072 static inline re_dfastate_t *
1073 __attribute ((always_inline)) internal_function
1074 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1075                             Idx idx)
1076 {
1077   const re_dfa_t *const dfa = mctx->dfa;
1078   if (dfa->init_state->has_constraint)
1079     {
1080       unsigned int context;
1081       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1082       if (IS_WORD_CONTEXT (context))
1083         return dfa->init_state_word;
1084       else if (IS_ORDINARY_CONTEXT (context))
1085         return dfa->init_state;
1086       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1087         return dfa->init_state_begbuf;
1088       else if (IS_NEWLINE_CONTEXT (context))
1089         return dfa->init_state_nl;
1090       else if (IS_BEGBUF_CONTEXT (context))
1091         {
1092           /* It is relatively rare case, then calculate on demand.  */
1093           return re_acquire_state_context (err, dfa,
1094                                            dfa->init_state->entrance_nodes,
1095                                            context);
1096         }
1097       else
1098         /* Must not happen?  */
1099         return dfa->init_state;
1100     }
1101   else
1102     return dfa->init_state;
1103 }
1104
1105 /* Check whether the regular expression match input string INPUT or not,
1106    and return the index where the matching end.  Return REG_MISSING if
1107    there is no match, and return REG_ERROR in case of an error.
1108    FL_LONGEST_MATCH means we want the POSIX longest matching.
1109    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1110    next place where we may want to try matching.
1111    Note that the matcher assume that the maching starts from the current
1112    index of the buffer.  */
1113
1114 static Idx
1115 internal_function __attribute_warn_unused_result__
1116 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1117                 Idx *p_match_first)
1118 {
1119   const re_dfa_t *const dfa = mctx->dfa;
1120   reg_errcode_t err;
1121   Idx match = 0;
1122   Idx match_last = REG_MISSING;
1123   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1124   re_dfastate_t *cur_state;
1125   bool at_init_state = p_match_first != NULL;
1126   Idx next_start_idx = cur_str_idx;
1127
1128   err = REG_NOERROR;
1129   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1130   /* An initial state must not be NULL (invalid).  */
1131   if (BE (cur_state == NULL, 0))
1132     {
1133       assert (err == REG_ESPACE);
1134       return REG_ERROR;
1135     }
1136
1137   if (mctx->state_log != NULL)
1138     {
1139       mctx->state_log[cur_str_idx] = cur_state;
1140
1141       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1142          later.  E.g. Processing back references.  */
1143       if (BE (dfa->nbackref, 0))
1144         {
1145           at_init_state = false;
1146           err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1147           if (BE (err != REG_NOERROR, 0))
1148             return err;
1149
1150           if (cur_state->has_backref)
1151             {
1152               err = transit_state_bkref (mctx, &cur_state->nodes);
1153               if (BE (err != REG_NOERROR, 0))
1154                 return err;
1155             }
1156         }
1157     }
1158
1159   /* If the RE accepts NULL string.  */
1160   if (BE (cur_state->halt, 0))
1161     {
1162       if (!cur_state->has_constraint
1163           || check_halt_state_context (mctx, cur_state, cur_str_idx))
1164         {
1165           if (!fl_longest_match)
1166             return cur_str_idx;
1167           else
1168             {
1169               match_last = cur_str_idx;
1170               match = 1;
1171             }
1172         }
1173     }
1174
1175   while (!re_string_eoi (&mctx->input))
1176     {
1177       re_dfastate_t *old_state = cur_state;
1178       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1179
1180       if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1181           || (BE (next_char_idx >= mctx->input.valid_len, 0)
1182               && mctx->input.valid_len < mctx->input.len))
1183         {
1184           err = extend_buffers (mctx);
1185           if (BE (err != REG_NOERROR, 0))
1186             {
1187               assert (err == REG_ESPACE);
1188               return REG_ERROR;
1189             }
1190         }
1191
1192       cur_state = transit_state (&err, mctx, cur_state);
1193       if (mctx->state_log != NULL)
1194         cur_state = merge_state_with_log (&err, mctx, cur_state);
1195
1196       if (cur_state == NULL)
1197         {
1198           /* Reached the invalid state or an error.  Try to recover a valid
1199              state using the state log, if available and if we have not
1200              already found a valid (even if not the longest) match.  */
1201           if (BE (err != REG_NOERROR, 0))
1202             return REG_ERROR;
1203
1204           if (mctx->state_log == NULL
1205               || (match && !fl_longest_match)
1206               || (cur_state = find_recover_state (&err, mctx)) == NULL)
1207             break;
1208         }
1209
1210       if (BE (at_init_state, 0))
1211         {
1212           if (old_state == cur_state)
1213             next_start_idx = next_char_idx;
1214           else
1215             at_init_state = false;
1216         }
1217
1218       if (cur_state->halt)
1219         {
1220           /* Reached a halt state.
1221              Check the halt state can satisfy the current context.  */
1222           if (!cur_state->has_constraint
1223               || check_halt_state_context (mctx, cur_state,
1224                                            re_string_cur_idx (&mctx->input)))
1225             {
1226               /* We found an appropriate halt state.  */
1227               match_last = re_string_cur_idx (&mctx->input);
1228               match = 1;
1229
1230               /* We found a match, do not modify match_first below.  */
1231               p_match_first = NULL;
1232               if (!fl_longest_match)
1233                 break;
1234             }
1235         }
1236     }
1237
1238   if (p_match_first)
1239     *p_match_first += next_start_idx;
1240
1241   return match_last;
1242 }
1243
1244 /* Check NODE match the current context.  */
1245
1246 static bool
1247 internal_function
1248 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1249 {
1250   re_token_type_t type = dfa->nodes[node].type;
1251   unsigned int constraint = dfa->nodes[node].constraint;
1252   if (type != END_OF_RE)
1253     return false;
1254   if (!constraint)
1255     return true;
1256   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1257     return false;
1258   return true;
1259 }
1260
1261 /* Check the halt state STATE match the current context.
1262    Return 0 if not match, if the node, STATE has, is a halt node and
1263    match the context, return the node.  */
1264
1265 static Idx
1266 internal_function
1267 check_halt_state_context (const re_match_context_t *mctx,
1268                           const re_dfastate_t *state, Idx idx)
1269 {
1270   Idx i;
1271   unsigned int context;
1272 #ifdef DEBUG
1273   assert (state->halt);
1274 #endif
1275   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1276   for (i = 0; i < state->nodes.nelem; ++i)
1277     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1278       return state->nodes.elems[i];
1279   return 0;
1280 }
1281
1282 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1283    corresponding to the DFA).
1284    Return the destination node, and update EPS_VIA_NODES;
1285    return REG_MISSING in case of errors.  */
1286
1287 static Idx
1288 internal_function
1289 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1290                    Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1291                    struct re_fail_stack_t *fs)
1292 {
1293   const re_dfa_t *const dfa = mctx->dfa;
1294   Idx i;
1295   bool ok;
1296   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1297     {
1298       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1299       re_node_set *edests = &dfa->edests[node];
1300       Idx dest_node;
1301       ok = re_node_set_insert (eps_via_nodes, node);
1302       if (BE (! ok, 0))
1303         return REG_ERROR;
1304       /* Pick up a valid destination, or return REG_MISSING if none
1305          is found.  */
1306       for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
1307         {
1308           Idx candidate = edests->elems[i];
1309           if (!re_node_set_contains (cur_nodes, candidate))
1310             continue;
1311           if (dest_node == REG_MISSING)
1312             dest_node = candidate;
1313
1314           else
1315             {
1316               /* In order to avoid infinite loop like "(a*)*", return the second
1317                  epsilon-transition if the first was already considered.  */
1318               if (re_node_set_contains (eps_via_nodes, dest_node))
1319                 return candidate;
1320
1321               /* Otherwise, push the second epsilon-transition on the fail stack.  */
1322               else if (fs != NULL
1323                        && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1324                                            eps_via_nodes))
1325                 return REG_ERROR;
1326
1327               /* We know we are going to exit.  */
1328               break;
1329             }
1330         }
1331       return dest_node;
1332     }
1333   else
1334     {
1335       Idx naccepted = 0;
1336       re_token_type_t type = dfa->nodes[node].type;
1337
1338 #ifdef RE_ENABLE_I18N
1339       if (dfa->nodes[node].accept_mb)
1340         naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1341       else
1342 #endif /* RE_ENABLE_I18N */
1343       if (type == OP_BACK_REF)
1344         {
1345           Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1346           naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1347           if (fs != NULL)
1348             {
1349               if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1350                 return REG_MISSING;
1351               else if (naccepted)
1352                 {
1353                   char *buf = (char *) re_string_get_buffer (&mctx->input);
1354                   if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1355                               naccepted) != 0)
1356                     return REG_MISSING;
1357                 }
1358             }
1359
1360           if (naccepted == 0)
1361             {
1362               Idx dest_node;
1363               ok = re_node_set_insert (eps_via_nodes, node);
1364               if (BE (! ok, 0))
1365                 return REG_ERROR;
1366               dest_node = dfa->edests[node].elems[0];
1367               if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1368                                         dest_node))
1369                 return dest_node;
1370             }
1371         }
1372
1373       if (naccepted != 0
1374           || check_node_accept (mctx, dfa->nodes + node, *pidx))
1375         {
1376           Idx dest_node = dfa->nexts[node];
1377           *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1378           if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1379                      || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1380                                                dest_node)))
1381             return REG_MISSING;
1382           re_node_set_empty (eps_via_nodes);
1383           return dest_node;
1384         }
1385     }
1386   return REG_MISSING;
1387 }
1388
1389 static reg_errcode_t
1390 internal_function __attribute_warn_unused_result__
1391 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1392                  Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1393 {
1394   reg_errcode_t err;
1395   Idx num = fs->num++;
1396   if (fs->num == fs->alloc)
1397     {
1398       struct re_fail_stack_ent_t *new_array;
1399       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1400                                        * fs->alloc * 2));
1401       if (new_array == NULL)
1402         return REG_ESPACE;
1403       fs->alloc *= 2;
1404       fs->stack = new_array;
1405     }
1406   fs->stack[num].idx = str_idx;
1407   fs->stack[num].node = dest_node;
1408   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1409   if (fs->stack[num].regs == NULL)
1410     return REG_ESPACE;
1411   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1412   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1413   return err;
1414 }
1415
1416 static Idx
1417 internal_function
1418 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1419                 regmatch_t *regs, re_node_set *eps_via_nodes)
1420 {
1421   Idx num = --fs->num;
1422   assert (REG_VALID_INDEX (num));
1423   *pidx = fs->stack[num].idx;
1424   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1425   re_node_set_free (eps_via_nodes);
1426   re_free (fs->stack[num].regs);
1427   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1428   return fs->stack[num].node;
1429 }
1430
1431 /* Set the positions where the subexpressions are starts/ends to registers
1432    PMATCH.
1433    Note: We assume that pmatch[0] is already set, and
1434    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1435
1436 static reg_errcode_t
1437 internal_function __attribute_warn_unused_result__
1438 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1439           regmatch_t *pmatch, bool fl_backtrack)
1440 {
1441   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
1442   Idx idx, cur_node;
1443   re_node_set eps_via_nodes;
1444   struct re_fail_stack_t *fs;
1445   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1446   regmatch_t *prev_idx_match;
1447   bool prev_idx_match_malloced = false;
1448
1449 #ifdef DEBUG
1450   assert (nmatch > 1);
1451   assert (mctx->state_log != NULL);
1452 #endif
1453   if (fl_backtrack)
1454     {
1455       fs = &fs_body;
1456       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1457       if (fs->stack == NULL)
1458         return REG_ESPACE;
1459     }
1460   else
1461     fs = NULL;
1462
1463   cur_node = dfa->init_node;
1464   re_node_set_init_empty (&eps_via_nodes);
1465
1466   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1467     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1468   else
1469     {
1470       prev_idx_match = re_malloc (regmatch_t, nmatch);
1471       if (prev_idx_match == NULL)
1472         {
1473           free_fail_stack_return (fs);
1474           return REG_ESPACE;
1475         }
1476       prev_idx_match_malloced = true;
1477     }
1478   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1479
1480   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1481     {
1482       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1483
1484       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1485         {
1486           Idx reg_idx;
1487           if (fs)
1488             {
1489               for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1490                 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1491                   break;
1492               if (reg_idx == nmatch)
1493                 {
1494                   re_node_set_free (&eps_via_nodes);
1495                   if (prev_idx_match_malloced)
1496                     re_free (prev_idx_match);
1497                   return free_fail_stack_return (fs);
1498                 }
1499               cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1500                                          &eps_via_nodes);
1501             }
1502           else
1503             {
1504               re_node_set_free (&eps_via_nodes);
1505               if (prev_idx_match_malloced)
1506                 re_free (prev_idx_match);
1507               return REG_NOERROR;
1508             }
1509         }
1510
1511       /* Proceed to next node.  */
1512       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1513                                     &eps_via_nodes, fs);
1514
1515       if (BE (! REG_VALID_INDEX (cur_node), 0))
1516         {
1517           if (BE (cur_node == REG_ERROR, 0))
1518             {
1519               re_node_set_free (&eps_via_nodes);
1520               if (prev_idx_match_malloced)
1521                 re_free (prev_idx_match);
1522               free_fail_stack_return (fs);
1523               return REG_ESPACE;
1524             }
1525           if (fs)
1526             cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1527                                        &eps_via_nodes);
1528           else
1529             {
1530               re_node_set_free (&eps_via_nodes);
1531               if (prev_idx_match_malloced)
1532                 re_free (prev_idx_match);
1533               return REG_NOMATCH;
1534             }
1535         }
1536     }
1537   re_node_set_free (&eps_via_nodes);
1538   if (prev_idx_match_malloced)
1539     re_free (prev_idx_match);
1540   return free_fail_stack_return (fs);
1541 }
1542
1543 static reg_errcode_t
1544 internal_function
1545 free_fail_stack_return (struct re_fail_stack_t *fs)
1546 {
1547   if (fs)
1548     {
1549       Idx fs_idx;
1550       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1551         {
1552           re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1553           re_free (fs->stack[fs_idx].regs);
1554         }
1555       re_free (fs->stack);
1556     }
1557   return REG_NOERROR;
1558 }
1559
1560 static void
1561 internal_function
1562 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1563              regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1564 {
1565   int type = dfa->nodes[cur_node].type;
1566   if (type == OP_OPEN_SUBEXP)
1567     {
1568       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1569
1570       /* We are at the first node of this sub expression.  */
1571       if (reg_num < nmatch)
1572         {
1573           pmatch[reg_num].rm_so = cur_idx;
1574           pmatch[reg_num].rm_eo = -1;
1575         }
1576     }
1577   else if (type == OP_CLOSE_SUBEXP)
1578     {
1579       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1580       if (reg_num < nmatch)
1581         {
1582           /* We are at the last node of this sub expression.  */
1583           if (pmatch[reg_num].rm_so < cur_idx)
1584             {
1585               pmatch[reg_num].rm_eo = cur_idx;
1586               /* This is a non-empty match or we are not inside an optional
1587                  subexpression.  Accept this right away.  */
1588               memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1589             }
1590           else
1591             {
1592               if (dfa->nodes[cur_node].opt_subexp
1593                   && prev_idx_match[reg_num].rm_so != -1)
1594                 /* We transited through an empty match for an optional
1595                    subexpression, like (a?)*, and this is not the subexp's
1596                    first match.  Copy back the old content of the registers
1597                    so that matches of an inner subexpression are undone as
1598                    well, like in ((a?))*.  */
1599                 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1600               else
1601                 /* We completed a subexpression, but it may be part of
1602                    an optional one, so do not update PREV_IDX_MATCH.  */
1603                 pmatch[reg_num].rm_eo = cur_idx;
1604             }
1605         }
1606     }
1607 }
1608
1609 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1610    and sift the nodes in each states according to the following rules.
1611    Updated state_log will be wrote to STATE_LOG.
1612
1613    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1614      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1615         If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1616         the LAST_NODE, we throw away the node `a'.
1617      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1618         string `s' and transit to `b':
1619         i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1620            away the node `a'.
1621         ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1622             thrown away, we throw away the node `a'.
1623      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1624         i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1625            node `a'.
1626         ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1627             we throw away the node `a'.  */
1628
1629 #define STATE_NODE_CONTAINS(state,node) \
1630   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1631
1632 static reg_errcode_t
1633 internal_function
1634 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1635 {
1636   reg_errcode_t err;
1637   int null_cnt = 0;
1638   Idx str_idx = sctx->last_str_idx;
1639   re_node_set cur_dest;
1640
1641 #ifdef DEBUG
1642   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1643 #endif
1644
1645   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1646      transit to the last_node and the last_node itself.  */
1647   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1648   if (BE (err != REG_NOERROR, 0))
1649     return err;
1650   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1651   if (BE (err != REG_NOERROR, 0))
1652     goto free_return;
1653
1654   /* Then check each states in the state_log.  */
1655   while (str_idx > 0)
1656     {
1657       /* Update counters.  */
1658       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1659       if (null_cnt > mctx->max_mb_elem_len)
1660         {
1661           memset (sctx->sifted_states, '\0',
1662                   sizeof (re_dfastate_t *) * str_idx);
1663           re_node_set_free (&cur_dest);
1664           return REG_NOERROR;
1665         }
1666       re_node_set_empty (&cur_dest);
1667       --str_idx;
1668
1669       if (mctx->state_log[str_idx])
1670         {
1671           err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1672           if (BE (err != REG_NOERROR, 0))
1673             goto free_return;
1674         }
1675
1676       /* Add all the nodes which satisfy the following conditions:
1677          - It can epsilon transit to a node in CUR_DEST.
1678          - It is in CUR_SRC.
1679          And update state_log.  */
1680       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1681       if (BE (err != REG_NOERROR, 0))
1682         goto free_return;
1683     }
1684   err = REG_NOERROR;
1685  free_return:
1686   re_node_set_free (&cur_dest);
1687   return err;
1688 }
1689
1690 static reg_errcode_t
1691 internal_function __attribute_warn_unused_result__
1692 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1693                      Idx str_idx, re_node_set *cur_dest)
1694 {
1695   const re_dfa_t *const dfa = mctx->dfa;
1696   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1697   Idx i;
1698
1699   /* Then build the next sifted state.
1700      We build the next sifted state on `cur_dest', and update
1701      `sifted_states[str_idx]' with `cur_dest'.
1702      Note:
1703      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1704      `cur_src' points the node_set of the old `state_log[str_idx]'
1705      (with the epsilon nodes pre-filtered out).  */
1706   for (i = 0; i < cur_src->nelem; i++)
1707     {
1708       Idx prev_node = cur_src->elems[i];
1709       int naccepted = 0;
1710       bool ok;
1711
1712 #ifdef DEBUG
1713       re_token_type_t type = dfa->nodes[prev_node].type;
1714       assert (!IS_EPSILON_NODE (type));
1715 #endif
1716 #ifdef RE_ENABLE_I18N
1717       /* If the node may accept `multi byte'.  */
1718       if (dfa->nodes[prev_node].accept_mb)
1719         naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1720                                          str_idx, sctx->last_str_idx);
1721 #endif /* RE_ENABLE_I18N */
1722
1723       /* We don't check backreferences here.
1724          See update_cur_sifted_state().  */
1725       if (!naccepted
1726           && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1727           && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1728                                   dfa->nexts[prev_node]))
1729         naccepted = 1;
1730
1731       if (naccepted == 0)
1732         continue;
1733
1734       if (sctx->limits.nelem)
1735         {
1736           Idx to_idx = str_idx + naccepted;
1737           if (check_dst_limits (mctx, &sctx->limits,
1738                                 dfa->nexts[prev_node], to_idx,
1739                                 prev_node, str_idx))
1740             continue;
1741         }
1742       ok = re_node_set_insert (cur_dest, prev_node);
1743       if (BE (! ok, 0))
1744         return REG_ESPACE;
1745     }
1746
1747   return REG_NOERROR;
1748 }
1749
1750 /* Helper functions.  */
1751
1752 static reg_errcode_t
1753 internal_function
1754 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1755 {
1756   Idx top = mctx->state_log_top;
1757
1758   if (next_state_log_idx >= mctx->input.bufs_len
1759       || (next_state_log_idx >= mctx->input.valid_len
1760           && mctx->input.valid_len < mctx->input.len))
1761     {
1762       reg_errcode_t err;
1763       err = extend_buffers (mctx);
1764       if (BE (err != REG_NOERROR, 0))
1765         return err;
1766     }
1767
1768   if (top < next_state_log_idx)
1769     {
1770       memset (mctx->state_log + top + 1, '\0',
1771               sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1772       mctx->state_log_top = next_state_log_idx;
1773     }
1774   return REG_NOERROR;
1775 }
1776
1777 static reg_errcode_t
1778 internal_function
1779 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1780                    re_dfastate_t **src, Idx num)
1781 {
1782   Idx st_idx;
1783   reg_errcode_t err;
1784   for (st_idx = 0; st_idx < num; ++st_idx)
1785     {
1786       if (dst[st_idx] == NULL)
1787         dst[st_idx] = src[st_idx];
1788       else if (src[st_idx] != NULL)
1789         {
1790           re_node_set merged_set;
1791           err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1792                                         &src[st_idx]->nodes);
1793           if (BE (err != REG_NOERROR, 0))
1794             return err;
1795           dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1796           re_node_set_free (&merged_set);
1797           if (BE (err != REG_NOERROR, 0))
1798             return err;
1799         }
1800     }
1801   return REG_NOERROR;
1802 }
1803
1804 static reg_errcode_t
1805 internal_function
1806 update_cur_sifted_state (const re_match_context_t *mctx,
1807                          re_sift_context_t *sctx, Idx str_idx,
1808                          re_node_set *dest_nodes)
1809 {
1810   const re_dfa_t *const dfa = mctx->dfa;
1811   reg_errcode_t err = REG_NOERROR;
1812   const re_node_set *candidates;
1813   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1814                 : &mctx->state_log[str_idx]->nodes);
1815
1816   if (dest_nodes->nelem == 0)
1817     sctx->sifted_states[str_idx] = NULL;
1818   else
1819     {
1820       if (candidates)
1821         {
1822           /* At first, add the nodes which can epsilon transit to a node in
1823              DEST_NODE.  */
1824           err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1825           if (BE (err != REG_NOERROR, 0))
1826             return err;
1827
1828           /* Then, check the limitations in the current sift_context.  */
1829           if (sctx->limits.nelem)
1830             {
1831               err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1832                                          mctx->bkref_ents, str_idx);
1833               if (BE (err != REG_NOERROR, 0))
1834                 return err;
1835             }
1836         }
1837
1838       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1839       if (BE (err != REG_NOERROR, 0))
1840         return err;
1841     }
1842
1843   if (candidates && mctx->state_log[str_idx]->has_backref)
1844     {
1845       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1846       if (BE (err != REG_NOERROR, 0))
1847         return err;
1848     }
1849   return REG_NOERROR;
1850 }
1851
1852 static reg_errcode_t
1853 internal_function __attribute_warn_unused_result__
1854 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1855                        const re_node_set *candidates)
1856 {
1857   reg_errcode_t err = REG_NOERROR;
1858   Idx i;
1859
1860   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1861   if (BE (err != REG_NOERROR, 0))
1862     return err;
1863
1864   if (!state->inveclosure.alloc)
1865     {
1866       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1867       if (BE (err != REG_NOERROR, 0))
1868         return REG_ESPACE;
1869       for (i = 0; i < dest_nodes->nelem; i++)
1870         {
1871           err = re_node_set_merge (&state->inveclosure,
1872                                    dfa->inveclosures + dest_nodes->elems[i]);
1873           if (BE (err != REG_NOERROR, 0))
1874             return REG_ESPACE;
1875         }
1876     }
1877   return re_node_set_add_intersect (dest_nodes, candidates,
1878                                     &state->inveclosure);
1879 }
1880
1881 static reg_errcode_t
1882 internal_function
1883 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1884                        const re_node_set *candidates)
1885 {
1886     Idx ecl_idx;
1887     reg_errcode_t err;
1888     re_node_set *inv_eclosure = dfa->inveclosures + node;
1889     re_node_set except_nodes;
1890     re_node_set_init_empty (&except_nodes);
1891     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1892       {
1893         Idx cur_node = inv_eclosure->elems[ecl_idx];
1894         if (cur_node == node)
1895           continue;
1896         if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1897           {
1898             Idx edst1 = dfa->edests[cur_node].elems[0];
1899             Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1900                          ? dfa->edests[cur_node].elems[1] : REG_MISSING);
1901             if ((!re_node_set_contains (inv_eclosure, edst1)
1902                  && re_node_set_contains (dest_nodes, edst1))
1903                 || (REG_VALID_NONZERO_INDEX (edst2)
1904                     && !re_node_set_contains (inv_eclosure, edst2)
1905                     && re_node_set_contains (dest_nodes, edst2)))
1906               {
1907                 err = re_node_set_add_intersect (&except_nodes, candidates,
1908                                                  dfa->inveclosures + cur_node);
1909                 if (BE (err != REG_NOERROR, 0))
1910                   {
1911                     re_node_set_free (&except_nodes);
1912                     return err;
1913                   }
1914               }
1915           }
1916       }
1917     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1918       {
1919         Idx cur_node = inv_eclosure->elems[ecl_idx];
1920         if (!re_node_set_contains (&except_nodes, cur_node))
1921           {
1922             Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1923             re_node_set_remove_at (dest_nodes, idx);
1924           }
1925       }
1926     re_node_set_free (&except_nodes);
1927     return REG_NOERROR;
1928 }
1929
1930 static bool
1931 internal_function
1932 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1933                   Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1934 {
1935   const re_dfa_t *const dfa = mctx->dfa;
1936   Idx lim_idx, src_pos, dst_pos;
1937
1938   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1939   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1940   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1941     {
1942       Idx subexp_idx;
1943       struct re_backref_cache_entry *ent;
1944       ent = mctx->bkref_ents + limits->elems[lim_idx];
1945       subexp_idx = dfa->nodes[ent->node].opr.idx;
1946
1947       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1948                                            subexp_idx, dst_node, dst_idx,
1949                                            dst_bkref_idx);
1950       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1951                                            subexp_idx, src_node, src_idx,
1952                                            src_bkref_idx);
1953
1954       /* In case of:
1955          <src> <dst> ( <subexp> )
1956          ( <subexp> ) <src> <dst>
1957          ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1958       if (src_pos == dst_pos)
1959         continue; /* This is unrelated limitation.  */
1960       else
1961         return true;
1962     }
1963   return false;
1964 }
1965
1966 static int
1967 internal_function
1968 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1969                              Idx subexp_idx, Idx from_node, Idx bkref_idx)
1970 {
1971   const re_dfa_t *const dfa = mctx->dfa;
1972   const re_node_set *eclosures = dfa->eclosures + from_node;
1973   Idx node_idx;
1974
1975   /* Else, we are on the boundary: examine the nodes on the epsilon
1976      closure.  */
1977   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1978     {
1979       Idx node = eclosures->elems[node_idx];
1980       switch (dfa->nodes[node].type)
1981         {
1982         case OP_BACK_REF:
1983           if (bkref_idx != REG_MISSING)
1984             {
1985               struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1986               do
1987                 {
1988                   Idx dst;
1989                   int cpos;
1990
1991                   if (ent->node != node)
1992                     continue;
1993
1994                   if (subexp_idx < BITSET_WORD_BITS
1995                       && !(ent->eps_reachable_subexps_map
1996                            & ((bitset_word_t) 1 << subexp_idx)))
1997                     continue;
1998
1999                   /* Recurse trying to reach the OP_OPEN_SUBEXP and
2000                      OP_CLOSE_SUBEXP cases below.  But, if the
2001                      destination node is the same node as the source
2002                      node, don't recurse because it would cause an
2003                      infinite loop: a regex that exhibits this behavior
2004                      is ()\1*\1*  */
2005                   dst = dfa->edests[node].elems[0];
2006                   if (dst == from_node)
2007                     {
2008                       if (boundaries & 1)
2009                         return -1;
2010                       else /* if (boundaries & 2) */
2011                         return 0;
2012                     }
2013
2014                   cpos =
2015                     check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2016                                                  dst, bkref_idx);
2017                   if (cpos == -1 /* && (boundaries & 1) */)
2018                     return -1;
2019                   if (cpos == 0 && (boundaries & 2))
2020                     return 0;
2021
2022                   if (subexp_idx < BITSET_WORD_BITS)
2023                     ent->eps_reachable_subexps_map
2024                       &= ~((bitset_word_t) 1 << subexp_idx);
2025                 }
2026               while (ent++->more);
2027             }
2028           break;
2029
2030         case OP_OPEN_SUBEXP:
2031           if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
2032             return -1;
2033           break;
2034
2035         case OP_CLOSE_SUBEXP:
2036           if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
2037             return 0;
2038           break;
2039
2040         default:
2041             break;
2042         }
2043     }
2044
2045   return (boundaries & 2) ? 1 : 0;
2046 }
2047
2048 static int
2049 internal_function
2050 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
2051                            Idx subexp_idx, Idx from_node, Idx str_idx,
2052                            Idx bkref_idx)
2053 {
2054   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2055   int boundaries;
2056
2057   /* If we are outside the range of the subexpression, return -1 or 1.  */
2058   if (str_idx < lim->subexp_from)
2059     return -1;
2060
2061   if (lim->subexp_to < str_idx)
2062     return 1;
2063
2064   /* If we are within the subexpression, return 0.  */
2065   boundaries = (str_idx == lim->subexp_from);
2066   boundaries |= (str_idx == lim->subexp_to) << 1;
2067   if (boundaries == 0)
2068     return 0;
2069
2070   /* Else, examine epsilon closure.  */
2071   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2072                                       from_node, bkref_idx);
2073 }
2074
2075 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2076    which are against limitations from DEST_NODES. */
2077
2078 static reg_errcode_t
2079 internal_function
2080 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2081                      const re_node_set *candidates, re_node_set *limits,
2082                      struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2083 {
2084   reg_errcode_t err;
2085   Idx node_idx, lim_idx;
2086
2087   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2088     {
2089       Idx subexp_idx;
2090       struct re_backref_cache_entry *ent;
2091       ent = bkref_ents + limits->elems[lim_idx];
2092
2093       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2094         continue; /* This is unrelated limitation.  */
2095
2096       subexp_idx = dfa->nodes[ent->node].opr.idx;
2097       if (ent->subexp_to == str_idx)
2098         {
2099           Idx ops_node = REG_MISSING;
2100           Idx cls_node = REG_MISSING;
2101           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2102             {
2103               Idx node = dest_nodes->elems[node_idx];
2104               re_token_type_t type = dfa->nodes[node].type;
2105               if (type == OP_OPEN_SUBEXP
2106                   && subexp_idx == dfa->nodes[node].opr.idx)
2107                 ops_node = node;
2108               else if (type == OP_CLOSE_SUBEXP
2109                        && subexp_idx == dfa->nodes[node].opr.idx)
2110                 cls_node = node;
2111             }
2112
2113           /* Check the limitation of the open subexpression.  */
2114           /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2115           if (REG_VALID_INDEX (ops_node))
2116             {
2117               err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2118                                            candidates);
2119               if (BE (err != REG_NOERROR, 0))
2120                 return err;
2121             }
2122
2123           /* Check the limitation of the close subexpression.  */
2124           if (REG_VALID_INDEX (cls_node))
2125             for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2126               {
2127                 Idx node = dest_nodes->elems[node_idx];
2128                 if (!re_node_set_contains (dfa->inveclosures + node,
2129                                            cls_node)
2130                     && !re_node_set_contains (dfa->eclosures + node,
2131                                               cls_node))
2132                   {
2133                     /* It is against this limitation.
2134                        Remove it form the current sifted state.  */
2135                     err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2136                                                  candidates);
2137                     if (BE (err != REG_NOERROR, 0))
2138                       return err;
2139                     --node_idx;
2140                   }
2141               }
2142         }
2143       else /* (ent->subexp_to != str_idx)  */
2144         {
2145           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2146             {
2147               Idx node = dest_nodes->elems[node_idx];
2148               re_token_type_t type = dfa->nodes[node].type;
2149               if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2150                 {
2151                   if (subexp_idx != dfa->nodes[node].opr.idx)
2152                     continue;
2153                   /* It is against this limitation.
2154                      Remove it form the current sifted state.  */
2155                   err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2156                                                candidates);
2157                   if (BE (err != REG_NOERROR, 0))
2158                     return err;
2159                 }
2160             }
2161         }
2162     }
2163   return REG_NOERROR;
2164 }
2165
2166 static reg_errcode_t
2167 internal_function __attribute_warn_unused_result__
2168 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2169                    Idx str_idx, const re_node_set *candidates)
2170 {
2171   const re_dfa_t *const dfa = mctx->dfa;
2172   reg_errcode_t err;
2173   Idx node_idx, node;
2174   re_sift_context_t local_sctx;
2175   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2176
2177   if (first_idx == REG_MISSING)
2178     return REG_NOERROR;
2179
2180   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2181
2182   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2183     {
2184       Idx enabled_idx;
2185       re_token_type_t type;
2186       struct re_backref_cache_entry *entry;
2187       node = candidates->elems[node_idx];
2188       type = dfa->nodes[node].type;
2189       /* Avoid infinite loop for the REs like "()\1+".  */
2190       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2191         continue;
2192       if (type != OP_BACK_REF)
2193         continue;
2194
2195       entry = mctx->bkref_ents + first_idx;
2196       enabled_idx = first_idx;
2197       do
2198         {
2199           Idx subexp_len;
2200           Idx to_idx;
2201           Idx dst_node;
2202           bool ok;
2203           re_dfastate_t *cur_state;
2204
2205           if (entry->node != node)
2206             continue;
2207           subexp_len = entry->subexp_to - entry->subexp_from;
2208           to_idx = str_idx + subexp_len;
2209           dst_node = (subexp_len ? dfa->nexts[node]
2210                       : dfa->edests[node].elems[0]);
2211
2212           if (to_idx > sctx->last_str_idx
2213               || sctx->sifted_states[to_idx] == NULL
2214               || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2215               || check_dst_limits (mctx, &sctx->limits, node,
2216                                    str_idx, dst_node, to_idx))
2217             continue;
2218
2219           if (local_sctx.sifted_states == NULL)
2220             {
2221               local_sctx = *sctx;
2222               err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2223               if (BE (err != REG_NOERROR, 0))
2224                 goto free_return;
2225             }
2226           local_sctx.last_node = node;
2227           local_sctx.last_str_idx = str_idx;
2228           ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2229           if (BE (! ok, 0))
2230             {
2231               err = REG_ESPACE;
2232               goto free_return;
2233             }
2234           cur_state = local_sctx.sifted_states[str_idx];
2235           err = sift_states_backward (mctx, &local_sctx);
2236           if (BE (err != REG_NOERROR, 0))
2237             goto free_return;
2238           if (sctx->limited_states != NULL)
2239             {
2240               err = merge_state_array (dfa, sctx->limited_states,
2241                                        local_sctx.sifted_states,
2242                                        str_idx + 1);
2243               if (BE (err != REG_NOERROR, 0))
2244                 goto free_return;
2245             }
2246           local_sctx.sifted_states[str_idx] = cur_state;
2247           re_node_set_remove (&local_sctx.limits, enabled_idx);
2248
2249           /* mctx->bkref_ents may have changed, reload the pointer.  */
2250           entry = mctx->bkref_ents + enabled_idx;
2251         }
2252       while (enabled_idx++, entry++->more);
2253     }
2254   err = REG_NOERROR;
2255  free_return:
2256   if (local_sctx.sifted_states != NULL)
2257     {
2258       re_node_set_free (&local_sctx.limits);
2259     }
2260
2261   return err;
2262 }
2263
2264
2265 #ifdef RE_ENABLE_I18N
2266 static int
2267 internal_function
2268 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2269                      Idx node_idx, Idx str_idx, Idx max_str_idx)
2270 {
2271   const re_dfa_t *const dfa = mctx->dfa;
2272   int naccepted;
2273   /* Check the node can accept `multi byte'.  */
2274   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2275   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2276       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2277                             dfa->nexts[node_idx]))
2278     /* The node can't accept the `multi byte', or the
2279        destination was already thrown away, then the node
2280        could't accept the current input `multi byte'.   */
2281     naccepted = 0;
2282   /* Otherwise, it is sure that the node could accept
2283      `naccepted' bytes input.  */
2284   return naccepted;
2285 }
2286 #endif /* RE_ENABLE_I18N */
2287
2288 \f
2289 /* Functions for state transition.  */
2290
2291 /* Return the next state to which the current state STATE will transit by
2292    accepting the current input byte, and update STATE_LOG if necessary.
2293    If STATE can accept a multibyte char/collating element/back reference
2294    update the destination of STATE_LOG.  */
2295
2296 static re_dfastate_t *
2297 internal_function __attribute_warn_unused_result__
2298 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2299                re_dfastate_t *state)
2300 {
2301   re_dfastate_t **trtable;
2302   unsigned char ch;
2303
2304 #ifdef RE_ENABLE_I18N
2305   /* If the current state can accept multibyte.  */
2306   if (BE (state->accept_mb, 0))
2307     {
2308       *err = transit_state_mb (mctx, state);
2309       if (BE (*err != REG_NOERROR, 0))
2310         return NULL;
2311     }
2312 #endif /* RE_ENABLE_I18N */
2313
2314   /* Then decide the next state with the single byte.  */
2315 #if 0
2316   if (0)
2317     /* don't use transition table  */
2318     return transit_state_sb (err, mctx, state);
2319 #endif
2320
2321   /* Use transition table  */
2322   ch = re_string_fetch_byte (&mctx->input);
2323   for (;;)
2324     {
2325       trtable = state->trtable;
2326       if (BE (trtable != NULL, 1))
2327         return trtable[ch];
2328
2329       trtable = state->word_trtable;
2330       if (BE (trtable != NULL, 1))
2331         {
2332           unsigned int context;
2333           context
2334             = re_string_context_at (&mctx->input,
2335                                     re_string_cur_idx (&mctx->input) - 1,
2336                                     mctx->eflags);
2337           if (IS_WORD_CONTEXT (context))
2338             return trtable[ch + SBC_MAX];
2339           else
2340             return trtable[ch];
2341         }
2342
2343       if (!build_trtable (mctx->dfa, state))
2344         {
2345           *err = REG_ESPACE;
2346           return NULL;
2347         }
2348
2349       /* Retry, we now have a transition table.  */
2350     }
2351 }
2352
2353 /* Update the state_log if we need */
2354 static re_dfastate_t *
2355 internal_function
2356 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2357                       re_dfastate_t *next_state)
2358 {
2359   const re_dfa_t *const dfa = mctx->dfa;
2360   Idx cur_idx = re_string_cur_idx (&mctx->input);
2361
2362   if (cur_idx > mctx->state_log_top)
2363     {
2364       mctx->state_log[cur_idx] = next_state;
2365       mctx->state_log_top = cur_idx;
2366     }
2367   else if (mctx->state_log[cur_idx] == 0)
2368     {
2369       mctx->state_log[cur_idx] = next_state;
2370     }
2371   else
2372     {
2373       re_dfastate_t *pstate;
2374       unsigned int context;
2375       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2376       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2377          the destination of a multibyte char/collating element/
2378          back reference.  Then the next state is the union set of
2379          these destinations and the results of the transition table.  */
2380       pstate = mctx->state_log[cur_idx];
2381       log_nodes = pstate->entrance_nodes;
2382       if (next_state != NULL)
2383         {
2384           table_nodes = next_state->entrance_nodes;
2385           *err = re_node_set_init_union (&next_nodes, table_nodes,
2386                                              log_nodes);
2387           if (BE (*err != REG_NOERROR, 0))
2388             return NULL;
2389         }
2390       else
2391         next_nodes = *log_nodes;
2392       /* Note: We already add the nodes of the initial state,
2393          then we don't need to add them here.  */
2394
2395       context = re_string_context_at (&mctx->input,
2396                                       re_string_cur_idx (&mctx->input) - 1,
2397                                       mctx->eflags);
2398       next_state = mctx->state_log[cur_idx]
2399         = re_acquire_state_context (err, dfa, &next_nodes, context);
2400       /* We don't need to check errors here, since the return value of
2401          this function is next_state and ERR is already set.  */
2402
2403       if (table_nodes != NULL)
2404         re_node_set_free (&next_nodes);
2405     }
2406
2407   if (BE (dfa->nbackref, 0) && next_state != NULL)
2408     {
2409       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2410          later.  We must check them here, since the back references in the
2411          next state might use them.  */
2412       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2413                                         cur_idx);
2414       if (BE (*err != REG_NOERROR, 0))
2415         return NULL;
2416
2417       /* If the next state has back references.  */
2418       if (next_state->has_backref)
2419         {
2420           *err = transit_state_bkref (mctx, &next_state->nodes);
2421           if (BE (*err != REG_NOERROR, 0))
2422             return NULL;
2423           next_state = mctx->state_log[cur_idx];
2424         }
2425     }
2426
2427   return next_state;
2428 }
2429
2430 /* Skip bytes in the input that correspond to part of a
2431    multi-byte match, then look in the log for a state
2432    from which to restart matching.  */
2433 static re_dfastate_t *
2434 internal_function
2435 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2436 {
2437   re_dfastate_t *cur_state;
2438   do
2439     {
2440       Idx max = mctx->state_log_top;
2441       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2442
2443       do
2444         {
2445           if (++cur_str_idx > max)
2446             return NULL;
2447           re_string_skip_bytes (&mctx->input, 1);
2448         }
2449       while (mctx->state_log[cur_str_idx] == NULL);
2450
2451       cur_state = merge_state_with_log (err, mctx, NULL);
2452     }
2453   while (*err == REG_NOERROR && cur_state == NULL);
2454   return cur_state;
2455 }
2456
2457 /* Helper functions for transit_state.  */
2458
2459 /* From the node set CUR_NODES, pick up the nodes whose types are
2460    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2461    expression. And register them to use them later for evaluating the
2462    correspoding back references.  */
2463
2464 static reg_errcode_t
2465 internal_function
2466 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2467                            Idx str_idx)
2468 {
2469   const re_dfa_t *const dfa = mctx->dfa;
2470   Idx node_idx;
2471   reg_errcode_t err;
2472
2473   /* TODO: This isn't efficient.
2474            Because there might be more than one nodes whose types are
2475            OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2476            nodes.
2477            E.g. RE: (a){2}  */
2478   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2479     {
2480       Idx node = cur_nodes->elems[node_idx];
2481       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2482           && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2483           && (dfa->used_bkref_map
2484               & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2485         {
2486           err = match_ctx_add_subtop (mctx, node, str_idx);
2487           if (BE (err != REG_NOERROR, 0))
2488             return err;
2489         }
2490     }
2491   return REG_NOERROR;
2492 }
2493
2494 #if 0
2495 /* Return the next state to which the current state STATE will transit by
2496    accepting the current input byte.  */
2497
2498 static re_dfastate_t *
2499 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2500                   re_dfastate_t *state)
2501 {
2502   const re_dfa_t *const dfa = mctx->dfa;
2503   re_node_set next_nodes;
2504   re_dfastate_t *next_state;
2505   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2506   unsigned int context;
2507
2508   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2509   if (BE (*err != REG_NOERROR, 0))
2510     return NULL;
2511   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2512     {
2513       Idx cur_node = state->nodes.elems[node_cnt];
2514       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2515         {
2516           *err = re_node_set_merge (&next_nodes,
2517                                     dfa->eclosures + dfa->nexts[cur_node]);
2518           if (BE (*err != REG_NOERROR, 0))
2519             {
2520               re_node_set_free (&next_nodes);
2521               return NULL;
2522             }
2523         }
2524     }
2525   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2526   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2527   /* We don't need to check errors here, since the return value of
2528      this function is next_state and ERR is already set.  */
2529
2530   re_node_set_free (&next_nodes);
2531   re_string_skip_bytes (&mctx->input, 1);
2532   return next_state;
2533 }
2534 #endif
2535
2536 #ifdef RE_ENABLE_I18N
2537 static reg_errcode_t
2538 internal_function
2539 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2540 {
2541   const re_dfa_t *const dfa = mctx->dfa;
2542   reg_errcode_t err;
2543   Idx i;
2544
2545   for (i = 0; i < pstate->nodes.nelem; ++i)
2546     {
2547       re_node_set dest_nodes, *new_nodes;
2548       Idx cur_node_idx = pstate->nodes.elems[i];
2549       int naccepted;
2550       Idx dest_idx;
2551       unsigned int context;
2552       re_dfastate_t *dest_state;
2553
2554       if (!dfa->nodes[cur_node_idx].accept_mb)
2555         continue;
2556
2557       if (dfa->nodes[cur_node_idx].constraint)
2558         {
2559           context = re_string_context_at (&mctx->input,
2560                                           re_string_cur_idx (&mctx->input),
2561                                           mctx->eflags);
2562           if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2563                                            context))
2564             continue;
2565         }
2566
2567       /* How many bytes the node can accept?  */
2568       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2569                                            re_string_cur_idx (&mctx->input));
2570       if (naccepted == 0)
2571         continue;
2572
2573       /* The node can accepts `naccepted' bytes.  */
2574       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2575       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2576                                : mctx->max_mb_elem_len);
2577       err = clean_state_log_if_needed (mctx, dest_idx);
2578       if (BE (err != REG_NOERROR, 0))
2579         return err;
2580 #ifdef DEBUG
2581       assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2582 #endif
2583       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2584
2585       dest_state = mctx->state_log[dest_idx];
2586       if (dest_state == NULL)
2587         dest_nodes = *new_nodes;
2588       else
2589         {
2590           err = re_node_set_init_union (&dest_nodes,
2591                                         dest_state->entrance_nodes, new_nodes);
2592           if (BE (err != REG_NOERROR, 0))
2593             return err;
2594         }
2595       context = re_string_context_at (&mctx->input, dest_idx - 1,
2596                                       mctx->eflags);
2597       mctx->state_log[dest_idx]
2598         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2599       if (dest_state != NULL)
2600         re_node_set_free (&dest_nodes);
2601       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2602         return err;
2603     }
2604   return REG_NOERROR;
2605 }
2606 #endif /* RE_ENABLE_I18N */
2607
2608 static reg_errcode_t
2609 internal_function
2610 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2611 {
2612   const re_dfa_t *const dfa = mctx->dfa;
2613   reg_errcode_t err;
2614   Idx i;
2615   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2616
2617   for (i = 0; i < nodes->nelem; ++i)
2618     {
2619       Idx dest_str_idx, prev_nelem, bkc_idx;
2620       Idx node_idx = nodes->elems[i];
2621       unsigned int context;
2622       const re_token_t *node = dfa->nodes + node_idx;
2623       re_node_set *new_dest_nodes;
2624
2625       /* Check whether `node' is a backreference or not.  */
2626       if (node->type != OP_BACK_REF)
2627         continue;
2628
2629       if (node->constraint)
2630         {
2631           context = re_string_context_at (&mctx->input, cur_str_idx,
2632                                           mctx->eflags);
2633           if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2634             continue;
2635         }
2636
2637       /* `node' is a backreference.
2638          Check the substring which the substring matched.  */
2639       bkc_idx = mctx->nbkref_ents;
2640       err = get_subexp (mctx, node_idx, cur_str_idx);
2641       if (BE (err != REG_NOERROR, 0))
2642         goto free_return;
2643
2644       /* And add the epsilon closures (which is `new_dest_nodes') of
2645          the backreference to appropriate state_log.  */
2646 #ifdef DEBUG
2647       assert (dfa->nexts[node_idx] != REG_MISSING);
2648 #endif
2649       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2650         {
2651           Idx subexp_len;
2652           re_dfastate_t *dest_state;
2653           struct re_backref_cache_entry *bkref_ent;
2654           bkref_ent = mctx->bkref_ents + bkc_idx;
2655           if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2656             continue;
2657           subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2658           new_dest_nodes = (subexp_len == 0
2659                             ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2660                             : dfa->eclosures + dfa->nexts[node_idx]);
2661           dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2662                           - bkref_ent->subexp_from);
2663           context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2664                                           mctx->eflags);
2665           dest_state = mctx->state_log[dest_str_idx];
2666           prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2667                         : mctx->state_log[cur_str_idx]->nodes.nelem);
2668           /* Add `new_dest_node' to state_log.  */
2669           if (dest_state == NULL)
2670             {
2671               mctx->state_log[dest_str_idx]
2672                 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2673                                             context);
2674               if (BE (mctx->state_log[dest_str_idx] == NULL
2675                       && err != REG_NOERROR, 0))
2676                 goto free_return;
2677             }
2678           else
2679             {
2680               re_node_set dest_nodes;
2681               err = re_node_set_init_union (&dest_nodes,
2682                                             dest_state->entrance_nodes,
2683                                             new_dest_nodes);
2684               if (BE (err != REG_NOERROR, 0))
2685                 {
2686                   re_node_set_free (&dest_nodes);
2687                   goto free_return;
2688                 }
2689               mctx->state_log[dest_str_idx]
2690                 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2691               re_node_set_free (&dest_nodes);
2692               if (BE (mctx->state_log[dest_str_idx] == NULL
2693                       && err != REG_NOERROR, 0))
2694                 goto free_return;
2695             }
2696           /* We need to check recursively if the backreference can epsilon
2697              transit.  */
2698           if (subexp_len == 0
2699               && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2700             {
2701               err = check_subexp_matching_top (mctx, new_dest_nodes,
2702                                                cur_str_idx);
2703               if (BE (err != REG_NOERROR, 0))
2704                 goto free_return;
2705               err = transit_state_bkref (mctx, new_dest_nodes);
2706               if (BE (err != REG_NOERROR, 0))
2707                 goto free_return;
2708             }
2709         }
2710     }
2711   err = REG_NOERROR;
2712  free_return:
2713   return err;
2714 }
2715
2716 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2717    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2718    Note that we might collect inappropriate candidates here.
2719    However, the cost of checking them strictly here is too high, then we
2720    delay these checking for prune_impossible_nodes().  */
2721
2722 static reg_errcode_t
2723 internal_function __attribute_warn_unused_result__
2724 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2725 {
2726   const re_dfa_t *const dfa = mctx->dfa;
2727   Idx subexp_num, sub_top_idx;
2728   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2729   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2730   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2731   if (cache_idx != REG_MISSING)
2732     {
2733       const struct re_backref_cache_entry *entry
2734         = mctx->bkref_ents + cache_idx;
2735       do
2736         if (entry->node == bkref_node)
2737           return REG_NOERROR; /* We already checked it.  */
2738       while (entry++->more);
2739     }
2740
2741   subexp_num = dfa->nodes[bkref_node].opr.idx;
2742
2743   /* For each sub expression  */
2744   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2745     {
2746       reg_errcode_t err;
2747       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2748       re_sub_match_last_t *sub_last;
2749       Idx sub_last_idx, sl_str, bkref_str_off;
2750
2751       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2752         continue; /* It isn't related.  */
2753
2754       sl_str = sub_top->str_idx;
2755       bkref_str_off = bkref_str_idx;
2756       /* At first, check the last node of sub expressions we already
2757          evaluated.  */
2758       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2759         {
2760           regoff_t sl_str_diff;
2761           sub_last = sub_top->lasts[sub_last_idx];
2762           sl_str_diff = sub_last->str_idx - sl_str;
2763           /* The matched string by the sub expression match with the substring
2764              at the back reference?  */
2765           if (sl_str_diff > 0)
2766             {
2767               if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2768                 {
2769                   /* Not enough chars for a successful match.  */
2770                   if (bkref_str_off + sl_str_diff > mctx->input.len)
2771                     break;
2772
2773                   err = clean_state_log_if_needed (mctx,
2774                                                    bkref_str_off
2775                                                    + sl_str_diff);
2776                   if (BE (err != REG_NOERROR, 0))
2777                     return err;
2778                   buf = (const char *) re_string_get_buffer (&mctx->input);
2779                 }
2780               if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2781                 /* We don't need to search this sub expression any more.  */
2782                 break;
2783             }
2784           bkref_str_off += sl_str_diff;
2785           sl_str += sl_str_diff;
2786           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2787                                 bkref_str_idx);
2788
2789           /* Reload buf, since the preceding call might have reallocated
2790              the buffer.  */
2791           buf = (const char *) re_string_get_buffer (&mctx->input);
2792
2793           if (err == REG_NOMATCH)
2794             continue;
2795           if (BE (err != REG_NOERROR, 0))
2796             return err;
2797         }
2798
2799       if (sub_last_idx < sub_top->nlasts)
2800         continue;
2801       if (sub_last_idx > 0)
2802         ++sl_str;
2803       /* Then, search for the other last nodes of the sub expression.  */
2804       for (; sl_str <= bkref_str_idx; ++sl_str)
2805         {
2806           Idx cls_node;
2807           regoff_t sl_str_off;
2808           const re_node_set *nodes;
2809           sl_str_off = sl_str - sub_top->str_idx;
2810           /* The matched string by the sub expression match with the substring
2811              at the back reference?  */
2812           if (sl_str_off > 0)
2813             {
2814               if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2815                 {
2816                   /* If we are at the end of the input, we cannot match.  */
2817                   if (bkref_str_off >= mctx->input.len)
2818                     break;
2819
2820                   err = extend_buffers (mctx);
2821                   if (BE (err != REG_NOERROR, 0))
2822                     return err;
2823
2824                   buf = (const char *) re_string_get_buffer (&mctx->input);
2825                 }
2826               if (buf [bkref_str_off++] != buf[sl_str - 1])
2827                 break; /* We don't need to search this sub expression
2828                           any more.  */
2829             }
2830           if (mctx->state_log[sl_str] == NULL)
2831             continue;
2832           /* Does this state have a ')' of the sub expression?  */
2833           nodes = &mctx->state_log[sl_str]->nodes;
2834           cls_node = find_subexp_node (dfa, nodes, subexp_num,
2835                                        OP_CLOSE_SUBEXP);
2836           if (cls_node == REG_MISSING)
2837             continue; /* No.  */
2838           if (sub_top->path == NULL)
2839             {
2840               sub_top->path = calloc (sizeof (state_array_t),
2841                                       sl_str - sub_top->str_idx + 1);
2842               if (sub_top->path == NULL)
2843                 return REG_ESPACE;
2844             }
2845           /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2846              in the current context?  */
2847           err = check_arrival (mctx, sub_top->path, sub_top->node,
2848                                sub_top->str_idx, cls_node, sl_str,
2849                                OP_CLOSE_SUBEXP);
2850           if (err == REG_NOMATCH)
2851               continue;
2852           if (BE (err != REG_NOERROR, 0))
2853               return err;
2854           sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2855           if (BE (sub_last == NULL, 0))
2856             return REG_ESPACE;
2857           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2858                                 bkref_str_idx);
2859           if (err == REG_NOMATCH)
2860             continue;
2861         }
2862     }
2863   return REG_NOERROR;
2864 }
2865
2866 /* Helper functions for get_subexp().  */
2867
2868 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2869    If it can arrive, register the sub expression expressed with SUB_TOP
2870    and SUB_LAST.  */
2871
2872 static reg_errcode_t
2873 internal_function
2874 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2875                 re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2876 {
2877   reg_errcode_t err;
2878   Idx to_idx;
2879   /* Can the subexpression arrive the back reference?  */
2880   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2881                        sub_last->str_idx, bkref_node, bkref_str,
2882                        OP_OPEN_SUBEXP);
2883   if (err != REG_NOERROR)
2884     return err;
2885   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2886                              sub_last->str_idx);
2887   if (BE (err != REG_NOERROR, 0))
2888     return err;
2889   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2890   return clean_state_log_if_needed (mctx, to_idx);
2891 }
2892
2893 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2894    Search '(' if FL_OPEN, or search ')' otherwise.
2895    TODO: This function isn't efficient...
2896          Because there might be more than one nodes whose types are
2897          OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2898          nodes.
2899          E.g. RE: (a){2}  */
2900
2901 static Idx
2902 internal_function
2903 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2904                   Idx subexp_idx, int type)
2905 {
2906   Idx cls_idx;
2907   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2908     {
2909       Idx cls_node = nodes->elems[cls_idx];
2910       const re_token_t *node = dfa->nodes + cls_node;
2911       if (node->type == type
2912           && node->opr.idx == subexp_idx)
2913         return cls_node;
2914     }
2915   return REG_MISSING;
2916 }
2917
2918 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2919    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2920    heavily reused.
2921    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2922
2923 static reg_errcode_t
2924 internal_function __attribute_warn_unused_result__
2925 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2926                Idx top_str, Idx last_node, Idx last_str, int type)
2927 {
2928   const re_dfa_t *const dfa = mctx->dfa;
2929   reg_errcode_t err = REG_NOERROR;
2930   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2931   re_dfastate_t *cur_state = NULL;
2932   re_node_set *cur_nodes, next_nodes;
2933   re_dfastate_t **backup_state_log;
2934   unsigned int context;
2935
2936   subexp_num = dfa->nodes[top_node].opr.idx;
2937   /* Extend the buffer if we need.  */
2938   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2939     {
2940       re_dfastate_t **new_array;
2941       Idx old_alloc = path->alloc;
2942       Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
2943       if (BE (new_alloc < old_alloc, 0)
2944           || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
2945         return REG_ESPACE;
2946       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2947       if (BE (new_array == NULL, 0))
2948         return REG_ESPACE;
2949       path->array = new_array;
2950       path->alloc = new_alloc;
2951       memset (new_array + old_alloc, '\0',
2952               sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2953     }
2954
2955   str_idx = path->next_idx ? path->next_idx : top_str;
2956
2957   /* Temporary modify MCTX.  */
2958   backup_state_log = mctx->state_log;
2959   backup_cur_idx = mctx->input.cur_idx;
2960   mctx->state_log = path->array;
2961   mctx->input.cur_idx = str_idx;
2962
2963   /* Setup initial node set.  */
2964   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2965   if (str_idx == top_str)
2966     {
2967       err = re_node_set_init_1 (&next_nodes, top_node);
2968       if (BE (err != REG_NOERROR, 0))
2969         return err;
2970       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2971       if (BE (err != REG_NOERROR, 0))
2972         {
2973           re_node_set_free (&next_nodes);
2974           return err;
2975         }
2976     }
2977   else
2978     {
2979       cur_state = mctx->state_log[str_idx];
2980       if (cur_state && cur_state->has_backref)
2981         {
2982           err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2983           if (BE (err != REG_NOERROR, 0))
2984             return err;
2985         }
2986       else
2987         re_node_set_init_empty (&next_nodes);
2988     }
2989   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2990     {
2991       if (next_nodes.nelem)
2992         {
2993           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2994                                     subexp_num, type);
2995           if (BE (err != REG_NOERROR, 0))
2996             {
2997               re_node_set_free (&next_nodes);
2998               return err;
2999             }
3000         }
3001       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3002       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3003         {
3004           re_node_set_free (&next_nodes);
3005           return err;
3006         }
3007       mctx->state_log[str_idx] = cur_state;
3008     }
3009
3010   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
3011     {
3012       re_node_set_empty (&next_nodes);
3013       if (mctx->state_log[str_idx + 1])
3014         {
3015           err = re_node_set_merge (&next_nodes,
3016                                    &mctx->state_log[str_idx + 1]->nodes);
3017           if (BE (err != REG_NOERROR, 0))
3018             {
3019               re_node_set_free (&next_nodes);
3020               return err;
3021             }
3022         }
3023       if (cur_state)
3024         {
3025           err = check_arrival_add_next_nodes (mctx, str_idx,
3026                                               &cur_state->non_eps_nodes,
3027                                               &next_nodes);
3028           if (BE (err != REG_NOERROR, 0))
3029             {
3030               re_node_set_free (&next_nodes);
3031               return err;
3032             }
3033         }
3034       ++str_idx;
3035       if (next_nodes.nelem)
3036         {
3037           err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
3038           if (BE (err != REG_NOERROR, 0))
3039             {
3040               re_node_set_free (&next_nodes);
3041               return err;
3042             }
3043           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3044                                     subexp_num, type);
3045           if (BE (err != REG_NOERROR, 0))
3046             {
3047               re_node_set_free (&next_nodes);
3048               return err;
3049             }
3050         }
3051       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3052       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3053       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3054         {
3055           re_node_set_free (&next_nodes);
3056           return err;
3057         }
3058       mctx->state_log[str_idx] = cur_state;
3059       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3060     }
3061   re_node_set_free (&next_nodes);
3062   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3063                : &mctx->state_log[last_str]->nodes);
3064   path->next_idx = str_idx;
3065
3066   /* Fix MCTX.  */
3067   mctx->state_log = backup_state_log;
3068   mctx->input.cur_idx = backup_cur_idx;
3069
3070   /* Then check the current node set has the node LAST_NODE.  */
3071   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3072     return REG_NOERROR;
3073
3074   return REG_NOMATCH;
3075 }
3076
3077 /* Helper functions for check_arrival.  */
3078
3079 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3080    to NEXT_NODES.
3081    TODO: This function is similar to the functions transit_state*(),
3082          however this function has many additional works.
3083          Can't we unify them?  */
3084
3085 static reg_errcode_t
3086 internal_function __attribute_warn_unused_result__
3087 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3088                               re_node_set *cur_nodes, re_node_set *next_nodes)
3089 {
3090   const re_dfa_t *const dfa = mctx->dfa;
3091   bool ok;
3092   Idx cur_idx;
3093 #ifdef RE_ENABLE_I18N
3094   reg_errcode_t err = REG_NOERROR;
3095 #endif
3096   re_node_set union_set;
3097   re_node_set_init_empty (&union_set);
3098   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3099     {
3100       int naccepted = 0;
3101       Idx cur_node = cur_nodes->elems[cur_idx];
3102 #ifdef DEBUG
3103       re_token_type_t type = dfa->nodes[cur_node].type;
3104       assert (!IS_EPSILON_NODE (type));
3105 #endif
3106 #ifdef RE_ENABLE_I18N
3107       /* If the node may accept `multi byte'.  */
3108       if (dfa->nodes[cur_node].accept_mb)
3109         {
3110           naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3111                                                str_idx);
3112           if (naccepted > 1)
3113             {
3114               re_dfastate_t *dest_state;
3115               Idx next_node = dfa->nexts[cur_node];
3116               Idx next_idx = str_idx + naccepted;
3117               dest_state = mctx->state_log[next_idx];
3118               re_node_set_empty (&union_set);
3119               if (dest_state)
3120                 {
3121                   err = re_node_set_merge (&union_set, &dest_state->nodes);
3122                   if (BE (err != REG_NOERROR, 0))
3123                     {
3124                       re_node_set_free (&union_set);
3125                       return err;
3126                     }
3127                 }
3128               ok = re_node_set_insert (&union_set, next_node);
3129               if (BE (! ok, 0))
3130                 {
3131                   re_node_set_free (&union_set);
3132                   return REG_ESPACE;
3133                 }
3134               mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3135                                                             &union_set);
3136               if (BE (mctx->state_log[next_idx] == NULL
3137                       && err != REG_NOERROR, 0))
3138                 {
3139                   re_node_set_free (&union_set);
3140                   return err;
3141                 }
3142             }
3143         }
3144 #endif /* RE_ENABLE_I18N */
3145       if (naccepted
3146           || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3147         {
3148           ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3149           if (BE (! ok, 0))
3150             {
3151               re_node_set_free (&union_set);
3152               return REG_ESPACE;
3153             }
3154         }
3155     }
3156   re_node_set_free (&union_set);
3157   return REG_NOERROR;
3158 }
3159
3160 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3161    CUR_NODES, however exclude the nodes which are:
3162     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3163     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3164 */
3165
3166 static reg_errcode_t
3167 internal_function
3168 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3169                           Idx ex_subexp, int type)
3170 {
3171   reg_errcode_t err;
3172   Idx idx, outside_node;
3173   re_node_set new_nodes;
3174 #ifdef DEBUG
3175   assert (cur_nodes->nelem);
3176 #endif
3177   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3178   if (BE (err != REG_NOERROR, 0))
3179     return err;
3180   /* Create a new node set NEW_NODES with the nodes which are epsilon
3181      closures of the node in CUR_NODES.  */
3182
3183   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3184     {
3185       Idx cur_node = cur_nodes->elems[idx];
3186       const re_node_set *eclosure = dfa->eclosures + cur_node;
3187       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3188       if (outside_node == REG_MISSING)
3189         {
3190           /* There are no problematic nodes, just merge them.  */
3191           err = re_node_set_merge (&new_nodes, eclosure);
3192           if (BE (err != REG_NOERROR, 0))
3193             {
3194               re_node_set_free (&new_nodes);
3195               return err;
3196             }
3197         }
3198       else
3199         {
3200           /* There are problematic nodes, re-calculate incrementally.  */
3201           err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3202                                               ex_subexp, type);
3203           if (BE (err != REG_NOERROR, 0))
3204             {
3205               re_node_set_free (&new_nodes);
3206               return err;
3207             }
3208         }
3209     }
3210   re_node_set_free (cur_nodes);
3211   *cur_nodes = new_nodes;
3212   return REG_NOERROR;
3213 }
3214
3215 /* Helper function for check_arrival_expand_ecl.
3216    Check incrementally the epsilon closure of TARGET, and if it isn't
3217    problematic append it to DST_NODES.  */
3218
3219 static reg_errcode_t
3220 internal_function __attribute_warn_unused_result__
3221 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3222                               Idx target, Idx ex_subexp, int type)
3223 {
3224   Idx cur_node;
3225   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3226     {
3227       bool ok;
3228
3229       if (dfa->nodes[cur_node].type == type
3230           && dfa->nodes[cur_node].opr.idx == ex_subexp)
3231         {
3232           if (type == OP_CLOSE_SUBEXP)
3233             {
3234               ok = re_node_set_insert (dst_nodes, cur_node);
3235               if (BE (! ok, 0))
3236                 return REG_ESPACE;
3237             }
3238           break;
3239         }
3240       ok = re_node_set_insert (dst_nodes, cur_node);
3241       if (BE (! ok, 0))
3242         return REG_ESPACE;
3243       if (dfa->edests[cur_node].nelem == 0)
3244         break;
3245       if (dfa->edests[cur_node].nelem == 2)
3246         {
3247           reg_errcode_t err;
3248           err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3249                                               dfa->edests[cur_node].elems[1],
3250                                               ex_subexp, type);
3251           if (BE (err != REG_NOERROR, 0))
3252             return err;
3253         }
3254       cur_node = dfa->edests[cur_node].elems[0];
3255     }
3256   return REG_NOERROR;
3257 }
3258
3259
3260 /* For all the back references in the current state, calculate the
3261    destination of the back references by the appropriate entry
3262    in MCTX->BKREF_ENTS.  */
3263
3264 static reg_errcode_t
3265 internal_function __attribute_warn_unused_result__
3266 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3267                     Idx cur_str, Idx subexp_num, int type)
3268 {
3269   const re_dfa_t *const dfa = mctx->dfa;
3270   reg_errcode_t err;
3271   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3272   struct re_backref_cache_entry *ent;
3273
3274   if (cache_idx_start == REG_MISSING)
3275     return REG_NOERROR;
3276
3277  restart:
3278   ent = mctx->bkref_ents + cache_idx_start;
3279   do
3280     {
3281       Idx to_idx, next_node;
3282
3283       /* Is this entry ENT is appropriate?  */
3284       if (!re_node_set_contains (cur_nodes, ent->node))
3285         continue; /* No.  */
3286
3287       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3288       /* Calculate the destination of the back reference, and append it
3289          to MCTX->STATE_LOG.  */
3290       if (to_idx == cur_str)
3291         {
3292           /* The backreference did epsilon transit, we must re-check all the
3293              node in the current state.  */
3294           re_node_set new_dests;
3295           reg_errcode_t err2, err3;
3296           next_node = dfa->edests[ent->node].elems[0];
3297           if (re_node_set_contains (cur_nodes, next_node))
3298             continue;
3299           err = re_node_set_init_1 (&new_dests, next_node);
3300           err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3301           err3 = re_node_set_merge (cur_nodes, &new_dests);
3302           re_node_set_free (&new_dests);
3303           if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3304                   || err3 != REG_NOERROR, 0))
3305             {
3306               err = (err != REG_NOERROR ? err
3307                      : (err2 != REG_NOERROR ? err2 : err3));
3308               return err;
3309             }
3310           /* TODO: It is still inefficient...  */
3311           goto restart;
3312         }
3313       else
3314         {
3315           re_node_set union_set;
3316           next_node = dfa->nexts[ent->node];
3317           if (mctx->state_log[to_idx])
3318             {
3319               bool ok;
3320               if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3321                                         next_node))
3322                 continue;
3323               err = re_node_set_init_copy (&union_set,
3324                                            &mctx->state_log[to_idx]->nodes);
3325               ok = re_node_set_insert (&union_set, next_node);
3326               if (BE (err != REG_NOERROR || ! ok, 0))
3327                 {
3328                   re_node_set_free (&union_set);
3329                   err = err != REG_NOERROR ? err : REG_ESPACE;
3330                   return err;
3331                 }
3332             }
3333           else
3334             {
3335               err = re_node_set_init_1 (&union_set, next_node);
3336               if (BE (err != REG_NOERROR, 0))
3337                 return err;
3338             }
3339           mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3340           re_node_set_free (&union_set);
3341           if (BE (mctx->state_log[to_idx] == NULL
3342                   && err != REG_NOERROR, 0))
3343             return err;
3344         }
3345     }
3346   while (ent++->more);
3347   return REG_NOERROR;
3348 }
3349
3350 /* Build transition table for the state.
3351    Return true if successful.  */
3352
3353 static bool
3354 internal_function
3355 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3356 {
3357   reg_errcode_t err;
3358   Idx i, j;
3359   int ch;
3360   bool need_word_trtable = false;
3361   bitset_word_t elem, mask;
3362   bool dests_node_malloced = false;
3363   bool dest_states_malloced = false;
3364   Idx ndests; /* Number of the destination states from `state'.  */
3365   re_dfastate_t **trtable;
3366   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3367   re_node_set follows, *dests_node;
3368   bitset_t *dests_ch;
3369   bitset_t acceptable;
3370
3371   struct dests_alloc
3372   {
3373     re_node_set dests_node[SBC_MAX];
3374     bitset_t dests_ch[SBC_MAX];
3375   } *dests_alloc;
3376
3377   /* We build DFA states which corresponds to the destination nodes
3378      from `state'.  `dests_node[i]' represents the nodes which i-th
3379      destination state contains, and `dests_ch[i]' represents the
3380      characters which i-th destination state accepts.  */
3381   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3382     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3383   else
3384     {
3385       dests_alloc = re_malloc (struct dests_alloc, 1);
3386       if (BE (dests_alloc == NULL, 0))
3387         return false;
3388       dests_node_malloced = true;
3389     }
3390   dests_node = dests_alloc->dests_node;
3391   dests_ch = dests_alloc->dests_ch;
3392
3393   /* Initialize transiton table.  */
3394   state->word_trtable = state->trtable = NULL;
3395
3396   /* At first, group all nodes belonging to `state' into several
3397      destinations.  */
3398   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3399   if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3400     {
3401       if (dests_node_malloced)
3402         free (dests_alloc);
3403       if (ndests == 0)
3404         {
3405           state->trtable = (re_dfastate_t **)
3406             calloc (sizeof (re_dfastate_t *), SBC_MAX);
3407           return true;
3408         }
3409       return false;
3410     }
3411
3412   err = re_node_set_alloc (&follows, ndests + 1);
3413   if (BE (err != REG_NOERROR, 0))
3414     goto out_free;
3415
3416   /* Avoid arithmetic overflow in size calculation.  */
3417   if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3418             / (3 * sizeof (re_dfastate_t *)))
3419            < ndests),
3420           0))
3421     goto out_free;
3422
3423   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3424                          + ndests * 3 * sizeof (re_dfastate_t *)))
3425     dest_states = (re_dfastate_t **)
3426       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3427   else
3428     {
3429       dest_states = (re_dfastate_t **)
3430         malloc (ndests * 3 * sizeof (re_dfastate_t *));
3431       if (BE (dest_states == NULL, 0))
3432         {
3433 out_free:
3434           if (dest_states_malloced)
3435             free (dest_states);
3436           re_node_set_free (&follows);
3437           for (i = 0; i < ndests; ++i)
3438             re_node_set_free (dests_node + i);
3439           if (dests_node_malloced)
3440             free (dests_alloc);
3441           return false;
3442         }
3443       dest_states_malloced = true;
3444     }
3445   dest_states_word = dest_states + ndests;
3446   dest_states_nl = dest_states_word + ndests;
3447   bitset_empty (acceptable);
3448
3449   /* Then build the states for all destinations.  */
3450   for (i = 0; i < ndests; ++i)
3451     {
3452       Idx next_node;
3453       re_node_set_empty (&follows);
3454       /* Merge the follows of this destination states.  */
3455       for (j = 0; j < dests_node[i].nelem; ++j)
3456         {
3457           next_node = dfa->nexts[dests_node[i].elems[j]];
3458           if (next_node != REG_MISSING)
3459             {
3460               err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3461               if (BE (err != REG_NOERROR, 0))
3462                 goto out_free;
3463             }
3464         }
3465       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3466       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3467         goto out_free;
3468       /* If the new state has context constraint,
3469          build appropriate states for these contexts.  */
3470       if (dest_states[i]->has_constraint)
3471         {
3472           dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3473                                                           CONTEXT_WORD);
3474           if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3475             goto out_free;
3476
3477           if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3478             need_word_trtable = true;
3479
3480           dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3481                                                         CONTEXT_NEWLINE);
3482           if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3483             goto out_free;
3484         }
3485       else
3486         {
3487           dest_states_word[i] = dest_states[i];
3488           dest_states_nl[i] = dest_states[i];
3489         }
3490       bitset_merge (acceptable, dests_ch[i]);
3491     }
3492
3493   if (!BE (need_word_trtable, 0))
3494     {
3495       /* We don't care about whether the following character is a word
3496          character, or we are in a single-byte character set so we can
3497          discern by looking at the character code: allocate a
3498          256-entry transition table.  */
3499       trtable = state->trtable =
3500         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3501       if (BE (trtable == NULL, 0))
3502         goto out_free;
3503
3504       /* For all characters ch...:  */
3505       for (i = 0; i < BITSET_WORDS; ++i)
3506         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3507              elem;
3508              mask <<= 1, elem >>= 1, ++ch)
3509           if (BE (elem & 1, 0))
3510             {
3511               /* There must be exactly one destination which accepts
3512                  character ch.  See group_nodes_into_DFAstates.  */
3513               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3514                 ;
3515
3516               /* j-th destination accepts the word character ch.  */
3517               if (dfa->word_char[i] & mask)
3518                 trtable[ch] = dest_states_word[j];
3519               else
3520                 trtable[ch] = dest_states[j];
3521             }
3522     }
3523   else
3524     {
3525       /* We care about whether the following character is a word
3526          character, and we are in a multi-byte character set: discern
3527          by looking at the character code: build two 256-entry
3528          transition tables, one starting at trtable[0] and one
3529          starting at trtable[SBC_MAX].  */
3530       trtable = state->word_trtable =
3531         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3532       if (BE (trtable == NULL, 0))
3533         goto out_free;
3534
3535       /* For all characters ch...:  */
3536       for (i = 0; i < BITSET_WORDS; ++i)
3537         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3538              elem;
3539              mask <<= 1, elem >>= 1, ++ch)
3540           if (BE (elem & 1, 0))
3541             {
3542               /* There must be exactly one destination which accepts
3543                  character ch.  See group_nodes_into_DFAstates.  */
3544               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3545                 ;
3546
3547               /* j-th destination accepts the word character ch.  */
3548               trtable[ch] = dest_states[j];
3549               trtable[ch + SBC_MAX] = dest_states_word[j];
3550             }
3551     }
3552
3553   /* new line */
3554   if (bitset_contain (acceptable, NEWLINE_CHAR))
3555     {
3556       /* The current state accepts newline character.  */
3557       for (j = 0; j < ndests; ++j)
3558         if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3559           {
3560             /* k-th destination accepts newline character.  */
3561             trtable[NEWLINE_CHAR] = dest_states_nl[j];
3562             if (need_word_trtable)
3563               trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3564             /* There must be only one destination which accepts
3565                newline.  See group_nodes_into_DFAstates.  */
3566             break;
3567           }
3568     }
3569
3570   if (dest_states_malloced)
3571     free (dest_states);
3572
3573   re_node_set_free (&follows);
3574   for (i = 0; i < ndests; ++i)
3575     re_node_set_free (dests_node + i);
3576
3577   if (dests_node_malloced)
3578     free (dests_alloc);
3579
3580   return true;
3581 }
3582
3583 /* Group all nodes belonging to STATE into several destinations.
3584    Then for all destinations, set the nodes belonging to the destination
3585    to DESTS_NODE[i] and set the characters accepted by the destination
3586    to DEST_CH[i].  This function return the number of destinations.  */
3587
3588 static Idx
3589 internal_function
3590 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3591                             re_node_set *dests_node, bitset_t *dests_ch)
3592 {
3593   reg_errcode_t err;
3594   bool ok;
3595   Idx i, j, k;
3596   Idx ndests; /* Number of the destinations from `state'.  */
3597   bitset_t accepts; /* Characters a node can accept.  */
3598   const re_node_set *cur_nodes = &state->nodes;
3599   bitset_empty (accepts);
3600   ndests = 0;
3601
3602   /* For all the nodes belonging to `state',  */
3603   for (i = 0; i < cur_nodes->nelem; ++i)
3604     {
3605       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3606       re_token_type_t type = node->type;
3607       unsigned int constraint = node->constraint;
3608
3609       /* Enumerate all single byte character this node can accept.  */
3610       if (type == CHARACTER)
3611         bitset_set (accepts, node->opr.c);
3612       else if (type == SIMPLE_BRACKET)
3613         {
3614           bitset_merge (accepts, node->opr.sbcset);
3615         }
3616       else if (type == OP_PERIOD)
3617         {
3618 #ifdef RE_ENABLE_I18N
3619           if (dfa->mb_cur_max > 1)
3620             bitset_merge (accepts, dfa->sb_char);
3621           else
3622 #endif
3623             bitset_set_all (accepts);
3624           if (!(dfa->syntax & RE_DOT_NEWLINE))
3625             bitset_clear (accepts, '\n');
3626           if (dfa->syntax & RE_DOT_NOT_NULL)
3627             bitset_clear (accepts, '\0');
3628         }
3629 #ifdef RE_ENABLE_I18N
3630       else if (type == OP_UTF8_PERIOD)
3631         {
3632           if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3633             memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3634           else
3635             bitset_merge (accepts, utf8_sb_map);
3636           if (!(dfa->syntax & RE_DOT_NEWLINE))
3637             bitset_clear (accepts, '\n');
3638           if (dfa->syntax & RE_DOT_NOT_NULL)
3639             bitset_clear (accepts, '\0');
3640         }
3641 #endif
3642       else
3643         continue;
3644
3645       /* Check the `accepts' and sift the characters which are not
3646          match it the context.  */
3647       if (constraint)
3648         {
3649           if (constraint & NEXT_NEWLINE_CONSTRAINT)
3650             {
3651               bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3652               bitset_empty (accepts);
3653               if (accepts_newline)
3654                 bitset_set (accepts, NEWLINE_CHAR);
3655               else
3656                 continue;
3657             }
3658           if (constraint & NEXT_ENDBUF_CONSTRAINT)
3659             {
3660               bitset_empty (accepts);
3661               continue;
3662             }
3663
3664           if (constraint & NEXT_WORD_CONSTRAINT)
3665             {
3666               bitset_word_t any_set = 0;
3667               if (type == CHARACTER && !node->word_char)
3668                 {
3669                   bitset_empty (accepts);
3670                   continue;
3671                 }
3672 #ifdef RE_ENABLE_I18N
3673               if (dfa->mb_cur_max > 1)
3674                 for (j = 0; j < BITSET_WORDS; ++j)
3675                   any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3676               else
3677 #endif
3678                 for (j = 0; j < BITSET_WORDS; ++j)
3679                   any_set |= (accepts[j] &= dfa->word_char[j]);
3680               if (!any_set)
3681                 continue;
3682             }
3683           if (constraint & NEXT_NOTWORD_CONSTRAINT)
3684             {
3685               bitset_word_t any_set = 0;
3686               if (type == CHARACTER && node->word_char)
3687                 {
3688                   bitset_empty (accepts);
3689                   continue;
3690                 }
3691 #ifdef RE_ENABLE_I18N
3692               if (dfa->mb_cur_max > 1)
3693                 for (j = 0; j < BITSET_WORDS; ++j)
3694                   any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3695               else
3696 #endif
3697                 for (j = 0; j < BITSET_WORDS; ++j)
3698                   any_set |= (accepts[j] &= ~dfa->word_char[j]);
3699               if (!any_set)
3700                 continue;
3701             }
3702         }
3703
3704       /* Then divide `accepts' into DFA states, or create a new
3705          state.  Above, we make sure that accepts is not empty.  */
3706       for (j = 0; j < ndests; ++j)
3707         {
3708           bitset_t intersec; /* Intersection sets, see below.  */
3709           bitset_t remains;
3710           /* Flags, see below.  */
3711           bitset_word_t has_intersec, not_subset, not_consumed;
3712
3713           /* Optimization, skip if this state doesn't accept the character.  */
3714           if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3715             continue;
3716
3717           /* Enumerate the intersection set of this state and `accepts'.  */
3718           has_intersec = 0;
3719           for (k = 0; k < BITSET_WORDS; ++k)
3720             has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3721           /* And skip if the intersection set is empty.  */
3722           if (!has_intersec)
3723             continue;
3724
3725           /* Then check if this state is a subset of `accepts'.  */
3726           not_subset = not_consumed = 0;
3727           for (k = 0; k < BITSET_WORDS; ++k)
3728             {
3729               not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3730               not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3731             }
3732
3733           /* If this state isn't a subset of `accepts', create a
3734              new group state, which has the `remains'. */
3735           if (not_subset)
3736             {
3737               bitset_copy (dests_ch[ndests], remains);
3738               bitset_copy (dests_ch[j], intersec);
3739               err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3740               if (BE (err != REG_NOERROR, 0))
3741                 goto error_return;
3742               ++ndests;
3743             }
3744
3745           /* Put the position in the current group. */
3746           ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3747           if (BE (! ok, 0))
3748             goto error_return;
3749
3750           /* If all characters are consumed, go to next node. */
3751           if (!not_consumed)
3752             break;
3753         }
3754       /* Some characters remain, create a new group. */
3755       if (j == ndests)
3756         {
3757           bitset_copy (dests_ch[ndests], accepts);
3758           err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3759           if (BE (err != REG_NOERROR, 0))
3760             goto error_return;
3761           ++ndests;
3762           bitset_empty (accepts);
3763         }
3764     }
3765   return ndests;
3766  error_return:
3767   for (j = 0; j < ndests; ++j)
3768     re_node_set_free (dests_node + j);
3769   return REG_MISSING;
3770 }
3771
3772 #ifdef RE_ENABLE_I18N
3773 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3774    Return the number of the bytes the node accepts.
3775    STR_IDX is the current index of the input string.
3776
3777    This function handles the nodes which can accept one character, or
3778    one collating element like '.', '[a-z]', opposite to the other nodes
3779    can only accept one byte.  */
3780
3781 static int
3782 internal_function
3783 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3784                          const re_string_t *input, Idx str_idx)
3785 {
3786   const re_token_t *node = dfa->nodes + node_idx;
3787   int char_len, elem_len;
3788   Idx i;
3789
3790   if (BE (node->type == OP_UTF8_PERIOD, 0))
3791     {
3792       unsigned char c = re_string_byte_at (input, str_idx), d;
3793       if (BE (c < 0xc2, 1))
3794         return 0;
3795
3796       if (str_idx + 2 > input->len)
3797         return 0;
3798
3799       d = re_string_byte_at (input, str_idx + 1);
3800       if (c < 0xe0)
3801         return (d < 0x80 || d > 0xbf) ? 0 : 2;
3802       else if (c < 0xf0)
3803         {
3804           char_len = 3;
3805           if (c == 0xe0 && d < 0xa0)
3806             return 0;
3807         }
3808       else if (c < 0xf8)
3809         {
3810           char_len = 4;
3811           if (c == 0xf0 && d < 0x90)
3812             return 0;
3813         }
3814       else if (c < 0xfc)
3815         {
3816           char_len = 5;
3817           if (c == 0xf8 && d < 0x88)
3818             return 0;
3819         }
3820       else if (c < 0xfe)
3821         {
3822           char_len = 6;
3823           if (c == 0xfc && d < 0x84)
3824             return 0;
3825         }
3826       else
3827         return 0;
3828
3829       if (str_idx + char_len > input->len)
3830         return 0;
3831
3832       for (i = 1; i < char_len; ++i)
3833         {
3834           d = re_string_byte_at (input, str_idx + i);
3835           if (d < 0x80 || d > 0xbf)
3836             return 0;
3837         }
3838       return char_len;
3839     }
3840
3841   char_len = re_string_char_size_at (input, str_idx);
3842   if (node->type == OP_PERIOD)
3843     {
3844       if (char_len <= 1)
3845         return 0;
3846       /* FIXME: I don't think this if is needed, as both '\n'
3847          and '\0' are char_len == 1.  */
3848       /* '.' accepts any one character except the following two cases.  */
3849       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3850            re_string_byte_at (input, str_idx) == '\n') ||
3851           ((dfa->syntax & RE_DOT_NOT_NULL) &&
3852            re_string_byte_at (input, str_idx) == '\0'))
3853         return 0;
3854       return char_len;
3855     }
3856
3857   elem_len = re_string_elem_size_at (input, str_idx);
3858   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3859     return 0;
3860
3861   if (node->type == COMPLEX_BRACKET)
3862     {
3863       const re_charset_t *cset = node->opr.mbcset;
3864 # ifdef _LIBC
3865       const unsigned char *pin
3866         = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3867       Idx j;
3868       uint32_t nrules;
3869 # endif /* _LIBC */
3870       int match_len = 0;
3871       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3872                     ? re_string_wchar_at (input, str_idx) : 0);
3873
3874       /* match with multibyte character?  */
3875       for (i = 0; i < cset->nmbchars; ++i)
3876         if (wc == cset->mbchars[i])
3877           {
3878             match_len = char_len;
3879             goto check_node_accept_bytes_match;
3880           }
3881       /* match with character_class?  */
3882       for (i = 0; i < cset->nchar_classes; ++i)
3883         {
3884           wctype_t wt = cset->char_classes[i];
3885           if (__iswctype (wc, wt))
3886             {
3887               match_len = char_len;
3888               goto check_node_accept_bytes_match;
3889             }
3890         }
3891
3892 # ifdef _LIBC
3893       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3894       if (nrules != 0)
3895         {
3896           unsigned int in_collseq = 0;
3897           const int32_t *table, *indirect;
3898           const unsigned char *weights, *extra;
3899           const char *collseqwc;
3900           int32_t idx;
3901           /* This #include defines a local function!  */
3902 #  include <locale/weight.h>
3903
3904           /* match with collating_symbol?  */
3905           if (cset->ncoll_syms)
3906             extra = (const unsigned char *)
3907               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3908           for (i = 0; i < cset->ncoll_syms; ++i)
3909             {
3910               const unsigned char *coll_sym = extra + cset->coll_syms[i];
3911               /* Compare the length of input collating element and
3912                  the length of current collating element.  */
3913               if (*coll_sym != elem_len)
3914                 continue;
3915               /* Compare each bytes.  */
3916               for (j = 0; j < *coll_sym; j++)
3917                 if (pin[j] != coll_sym[1 + j])
3918                   break;
3919               if (j == *coll_sym)
3920                 {
3921                   /* Match if every bytes is equal.  */
3922                   match_len = j;
3923                   goto check_node_accept_bytes_match;
3924                 }
3925             }
3926
3927           if (cset->nranges)
3928             {
3929               if (elem_len <= char_len)
3930                 {
3931                   collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3932                   in_collseq = __collseq_table_lookup (collseqwc, wc);
3933                 }
3934               else
3935                 in_collseq = find_collation_sequence_value (pin, elem_len);
3936             }
3937           /* match with range expression?  */
3938           for (i = 0; i < cset->nranges; ++i)
3939             if (cset->range_starts[i] <= in_collseq
3940                 && in_collseq <= cset->range_ends[i])
3941               {
3942                 match_len = elem_len;
3943                 goto check_node_accept_bytes_match;
3944               }
3945
3946           /* match with equivalence_class?  */
3947           if (cset->nequiv_classes)
3948             {
3949               const unsigned char *cp = pin;
3950               table = (const int32_t *)
3951                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3952               weights = (const unsigned char *)
3953                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3954               extra = (const unsigned char *)
3955                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3956               indirect = (const int32_t *)
3957                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3958               int32_t idx = findidx (&cp);
3959               if (idx > 0)
3960                 for (i = 0; i < cset->nequiv_classes; ++i)
3961                   {
3962                     int32_t equiv_class_idx = cset->equiv_classes[i];
3963                     size_t weight_len = weights[idx & 0xffffff];
3964                     if (weight_len == weights[equiv_class_idx & 0xffffff]
3965                         && (idx >> 24) == (equiv_class_idx >> 24))
3966                       {
3967                         Idx cnt = 0;
3968
3969                         idx &= 0xffffff;
3970                         equiv_class_idx &= 0xffffff;
3971
3972                         while (cnt <= weight_len
3973                                && (weights[equiv_class_idx + 1 + cnt]
3974                                    == weights[idx + 1 + cnt]))
3975                           ++cnt;
3976                         if (cnt > weight_len)
3977                           {
3978                             match_len = elem_len;
3979                             goto check_node_accept_bytes_match;
3980                           }
3981                       }
3982                   }
3983             }
3984         }
3985       else
3986 # endif /* _LIBC */
3987         {
3988           /* match with range expression?  */
3989 #if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__)
3990           wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3991 #else
3992           wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3993           cmp_buf[2] = wc;
3994 #endif
3995           for (i = 0; i < cset->nranges; ++i)
3996             {
3997               cmp_buf[0] = cset->range_starts[i];
3998               cmp_buf[4] = cset->range_ends[i];
3999               if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
4000                   && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
4001                 {
4002                   match_len = char_len;
4003                   goto check_node_accept_bytes_match;
4004                 }
4005             }
4006         }
4007     check_node_accept_bytes_match:
4008       if (!cset->non_match)
4009         return match_len;
4010       else
4011         {
4012           if (match_len > 0)
4013             return 0;
4014           else
4015             return (elem_len > char_len) ? elem_len : char_len;
4016         }
4017     }
4018   return 0;
4019 }
4020
4021 # ifdef _LIBC
4022 static unsigned int
4023 internal_function
4024 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
4025 {
4026   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4027   if (nrules == 0)
4028     {
4029       if (mbs_len == 1)
4030         {
4031           /* No valid character.  Match it as a single byte character.  */
4032           const unsigned char *collseq = (const unsigned char *)
4033             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
4034           return collseq[mbs[0]];
4035         }
4036       return UINT_MAX;
4037     }
4038   else
4039     {
4040       int32_t idx;
4041       const unsigned char *extra = (const unsigned char *)
4042         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4043       int32_t extrasize = (const unsigned char *)
4044         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
4045
4046       for (idx = 0; idx < extrasize;)
4047         {
4048           int mbs_cnt;
4049           bool found = false;
4050           int32_t elem_mbs_len;
4051           /* Skip the name of collating element name.  */
4052           idx = idx + extra[idx] + 1;
4053           elem_mbs_len = extra[idx++];
4054           if (mbs_len == elem_mbs_len)
4055             {
4056               for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4057                 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4058                   break;
4059               if (mbs_cnt == elem_mbs_len)
4060                 /* Found the entry.  */
4061                 found = true;
4062             }
4063           /* Skip the byte sequence of the collating element.  */
4064           idx += elem_mbs_len;
4065           /* Adjust for the alignment.  */
4066           idx = (idx + 3) & ~3;
4067           /* Skip the collation sequence value.  */
4068           idx += sizeof (uint32_t);
4069           /* Skip the wide char sequence of the collating element.  */
4070           idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
4071           /* If we found the entry, return the sequence value.  */
4072           if (found)
4073             return *(uint32_t *) (extra + idx);
4074           /* Skip the collation sequence value.  */
4075           idx += sizeof (uint32_t);
4076         }
4077       return UINT_MAX;
4078     }
4079 }
4080 # endif /* _LIBC */
4081 #endif /* RE_ENABLE_I18N */
4082
4083 /* Check whether the node accepts the byte which is IDX-th
4084    byte of the INPUT.  */
4085
4086 static bool
4087 internal_function
4088 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4089                    Idx idx)
4090 {
4091   unsigned char ch;
4092   ch = re_string_byte_at (&mctx->input, idx);
4093   switch (node->type)
4094     {
4095     case CHARACTER:
4096       if (node->opr.c != ch)
4097         return false;
4098       break;
4099
4100     case SIMPLE_BRACKET:
4101       if (!bitset_contain (node->opr.sbcset, ch))
4102         return false;
4103       break;
4104
4105 #ifdef RE_ENABLE_I18N
4106     case OP_UTF8_PERIOD:
4107       if (ch >= ASCII_CHARS)
4108         return false;
4109       /* FALLTHROUGH */
4110 #endif
4111     case OP_PERIOD:
4112       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4113           || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4114         return false;
4115       break;
4116
4117     default:
4118       return false;
4119     }
4120
4121   if (node->constraint)
4122     {
4123       /* The node has constraints.  Check whether the current context
4124          satisfies the constraints.  */
4125       unsigned int context = re_string_context_at (&mctx->input, idx,
4126                                                    mctx->eflags);
4127       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4128         return false;
4129     }
4130
4131   return true;
4132 }
4133
4134 /* Extend the buffers, if the buffers have run out.  */
4135
4136 static reg_errcode_t
4137 internal_function __attribute_warn_unused_result__
4138 extend_buffers (re_match_context_t *mctx)
4139 {
4140   reg_errcode_t ret;
4141   re_string_t *pstr = &mctx->input;
4142
4143   /* Avoid overflow.  */
4144   if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
4145     return REG_ESPACE;
4146
4147   /* Double the lengthes of the buffers.  */
4148   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4149   if (BE (ret != REG_NOERROR, 0))
4150     return ret;
4151
4152   if (mctx->state_log != NULL)
4153     {
4154       /* And double the length of state_log.  */
4155       /* XXX We have no indication of the size of this buffer.  If this
4156          allocation fail we have no indication that the state_log array
4157          does not have the right size.  */
4158       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4159                                               pstr->bufs_len + 1);
4160       if (BE (new_array == NULL, 0))
4161         return REG_ESPACE;
4162       mctx->state_log = new_array;
4163     }
4164
4165   /* Then reconstruct the buffers.  */
4166   if (pstr->icase)
4167     {
4168 #ifdef RE_ENABLE_I18N
4169       if (pstr->mb_cur_max > 1)
4170         {
4171           ret = build_wcs_upper_buffer (pstr);
4172           if (BE (ret != REG_NOERROR, 0))
4173             return ret;
4174         }
4175       else
4176 #endif /* RE_ENABLE_I18N  */
4177         build_upper_buffer (pstr);
4178     }
4179   else
4180     {
4181 #ifdef RE_ENABLE_I18N
4182       if (pstr->mb_cur_max > 1)
4183         build_wcs_buffer (pstr);
4184       else
4185 #endif /* RE_ENABLE_I18N  */
4186         {
4187           if (pstr->trans != NULL)
4188             re_string_translate_buffer (pstr);
4189         }
4190     }
4191   return REG_NOERROR;
4192 }
4193
4194 \f
4195 /* Functions for matching context.  */
4196
4197 /* Initialize MCTX.  */
4198
4199 static reg_errcode_t
4200 internal_function __attribute_warn_unused_result__
4201 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4202 {
4203   mctx->eflags = eflags;
4204   mctx->match_last = REG_MISSING;
4205   if (n > 0)
4206     {
4207       /* Avoid overflow.  */
4208       size_t max_object_size =
4209         MAX (sizeof (struct re_backref_cache_entry),
4210              sizeof (re_sub_match_top_t *));
4211       if (BE (SIZE_MAX / max_object_size < n, 0))
4212         return REG_ESPACE;
4213
4214       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4215       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4216       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4217         return REG_ESPACE;
4218     }
4219   /* Already zero-ed by the caller.
4220      else
4221        mctx->bkref_ents = NULL;
4222      mctx->nbkref_ents = 0;
4223      mctx->nsub_tops = 0;  */
4224   mctx->abkref_ents = n;
4225   mctx->max_mb_elem_len = 1;
4226   mctx->asub_tops = n;
4227   return REG_NOERROR;
4228 }
4229
4230 /* Clean the entries which depend on the current input in MCTX.
4231    This function must be invoked when the matcher changes the start index
4232    of the input, or changes the input string.  */
4233
4234 static void
4235 internal_function
4236 match_ctx_clean (re_match_context_t *mctx)
4237 {
4238   Idx st_idx;
4239   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4240     {
4241       Idx sl_idx;
4242       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4243       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4244         {
4245           re_sub_match_last_t *last = top->lasts[sl_idx];
4246           re_free (last->path.array);
4247           re_free (last);
4248         }
4249       re_free (top->lasts);
4250       if (top->path)
4251         {
4252           re_free (top->path->array);
4253           re_free (top->path);
4254         }
4255       free (top);
4256     }
4257
4258   mctx->nsub_tops = 0;
4259   mctx->nbkref_ents = 0;
4260 }
4261
4262 /* Free all the memory associated with MCTX.  */
4263
4264 static void
4265 internal_function
4266 match_ctx_free (re_match_context_t *mctx)
4267 {
4268   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4269   match_ctx_clean (mctx);
4270   re_free (mctx->sub_tops);
4271   re_free (mctx->bkref_ents);
4272 }
4273
4274 /* Add a new backreference entry to MCTX.
4275    Note that we assume that caller never call this function with duplicate
4276    entry, and call with STR_IDX which isn't smaller than any existing entry.
4277 */
4278
4279 static reg_errcode_t
4280 internal_function __attribute_warn_unused_result__
4281 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4282                      Idx to)
4283 {
4284   if (mctx->nbkref_ents >= mctx->abkref_ents)
4285     {
4286       struct re_backref_cache_entry* new_entry;
4287       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4288                               mctx->abkref_ents * 2);
4289       if (BE (new_entry == NULL, 0))
4290         {
4291           re_free (mctx->bkref_ents);
4292           return REG_ESPACE;
4293         }
4294       mctx->bkref_ents = new_entry;
4295       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4296               sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4297       mctx->abkref_ents *= 2;
4298     }
4299   if (mctx->nbkref_ents > 0
4300       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4301     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4302
4303   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4304   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4305   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4306   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4307
4308   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4309      If bit N is clear, means that this entry won't epsilon-transition to
4310      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4311      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4312      such node.
4313
4314      A backreference does not epsilon-transition unless it is empty, so set
4315      to all zeros if FROM != TO.  */
4316   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4317     = (from == to ? -1 : 0);
4318
4319   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4320   if (mctx->max_mb_elem_len < to - from)
4321     mctx->max_mb_elem_len = to - from;
4322   return REG_NOERROR;
4323 }
4324
4325 /* Return the first entry with the same str_idx, or REG_MISSING if none is
4326    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4327
4328 static Idx
4329 internal_function
4330 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4331 {
4332   Idx left, right, mid, last;
4333   last = right = mctx->nbkref_ents;
4334   for (left = 0; left < right;)
4335     {
4336       mid = (left + right) / 2;
4337       if (mctx->bkref_ents[mid].str_idx < str_idx)
4338         left = mid + 1;
4339       else
4340         right = mid;
4341     }
4342   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4343     return left;
4344   else
4345     return REG_MISSING;
4346 }
4347
4348 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4349    at STR_IDX.  */
4350
4351 static reg_errcode_t
4352 internal_function __attribute_warn_unused_result__
4353 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4354 {
4355 #ifdef DEBUG
4356   assert (mctx->sub_tops != NULL);
4357   assert (mctx->asub_tops > 0);
4358 #endif
4359   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4360     {
4361       Idx new_asub_tops = mctx->asub_tops * 2;
4362       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4363                                                    re_sub_match_top_t *,
4364                                                    new_asub_tops);
4365       if (BE (new_array == NULL, 0))
4366         return REG_ESPACE;
4367       mctx->sub_tops = new_array;
4368       mctx->asub_tops = new_asub_tops;
4369     }
4370   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4371   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4372     return REG_ESPACE;
4373   mctx->sub_tops[mctx->nsub_tops]->node = node;
4374   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4375   return REG_NOERROR;
4376 }
4377
4378 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4379    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4380
4381 static re_sub_match_last_t *
4382 internal_function
4383 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4384 {
4385   re_sub_match_last_t *new_entry;
4386   if (BE (subtop->nlasts == subtop->alasts, 0))
4387     {
4388       Idx new_alasts = 2 * subtop->alasts + 1;
4389       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4390                                                     re_sub_match_last_t *,
4391                                                     new_alasts);
4392       if (BE (new_array == NULL, 0))
4393         return NULL;
4394       subtop->lasts = new_array;
4395       subtop->alasts = new_alasts;
4396     }
4397   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4398   if (BE (new_entry != NULL, 1))
4399     {
4400       subtop->lasts[subtop->nlasts] = new_entry;
4401       new_entry->node = node;
4402       new_entry->str_idx = str_idx;
4403       ++subtop->nlasts;
4404     }
4405   return new_entry;
4406 }
4407
4408 static void
4409 internal_function
4410 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4411                re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4412 {
4413   sctx->sifted_states = sifted_sts;
4414   sctx->limited_states = limited_sts;
4415   sctx->last_node = last_node;
4416   sctx->last_str_idx = last_str_idx;
4417   re_node_set_init_empty (&sctx->limits);
4418 }