libraries/spcre/libpcre/pcre/pcre_exec.c
changeset 0 7f656887cf89
equal deleted inserted replaced
-1:000000000000 0:7f656887cf89
       
     1 /*************************************************
       
     2 *      Perl-Compatible Regular Expressions       *
       
     3 *************************************************/
       
     4 
       
     5 /* PCRE is a library of functions to support regular expressions whose syntax
       
     6 and semantics are as close as possible to those of the Perl 5 language.
       
     7 
       
     8                        Written by Philip Hazel
       
     9            Copyright (c) 1997-2008 University of Cambridge
       
    10 
       
    11 -----------------------------------------------------------------------------
       
    12 Redistribution and use in source and binary forms, with or without
       
    13 modification, are permitted provided that the following conditions are met:
       
    14 
       
    15     * Redistributions of source code must retain the above copyright notice,
       
    16       this list of conditions and the following disclaimer.
       
    17 
       
    18     * Redistributions in binary form must reproduce the above copyright
       
    19       notice, this list of conditions and the following disclaimer in the
       
    20       documentation and/or other materials provided with the distribution.
       
    21 
       
    22     * Neither the name of the University of Cambridge nor the names of its
       
    23       contributors may be used to endorse or promote products derived from
       
    24       this software without specific prior written permission.
       
    25 
       
    26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
       
    27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
       
    30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       
    31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
       
    32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
       
    33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
       
    34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
       
    35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    36 POSSIBILITY OF SUCH DAMAGE.
       
    37 -----------------------------------------------------------------------------
       
    38 */
       
    39 
       
    40 
       
    41 /* This module contains pcre_exec(), the externally visible function that does
       
    42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
       
    43 possible. There are also some static supporting functions. */
       
    44 
       
    45 #ifdef HAVE_CONFIG_H
       
    46 #include "config.h"
       
    47 #endif
       
    48 
       
    49 #define NLBLOCK md             /* Block containing newline information */
       
    50 #define PSSTART start_subject  /* Field containing processed string start */
       
    51 #define PSEND   end_subject    /* Field containing processed string end */
       
    52 
       
    53 #include "pcre_internal.h"
       
    54 
       
    55 /* Undefine some potentially clashing cpp symbols */
       
    56 
       
    57 #undef min
       
    58 #undef max
       
    59 
       
    60 /* Flag bits for the match() function */
       
    61 
       
    62 #define match_condassert     0x01  /* Called to check a condition assertion */
       
    63 #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
       
    64 
       
    65 /* Non-error returns from the match() function. Error returns are externally
       
    66 defined PCRE_ERROR_xxx codes, which are all negative. */
       
    67 
       
    68 #define MATCH_MATCH        1
       
    69 #define MATCH_NOMATCH      0
       
    70 
       
    71 /* Special internal returns from the match() function. Make them sufficiently
       
    72 negative to avoid the external error codes. */
       
    73 
       
    74 #define MATCH_COMMIT       (-999)
       
    75 #define MATCH_PRUNE        (-998)
       
    76 #define MATCH_SKIP         (-997)
       
    77 #define MATCH_THEN         (-996)
       
    78 
       
    79 /* Maximum number of ints of offset to save on the stack for recursive calls.
       
    80 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
       
    81 because the offset vector is always a multiple of 3 long. */
       
    82 
       
    83 #define REC_STACK_SAVE_MAX 30
       
    84 
       
    85 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
       
    86 
       
    87 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
       
    88 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
       
    89 
       
    90 
       
    91 
       
    92 #ifdef DEBUG
       
    93 /*************************************************
       
    94 *        Debugging function to print chars       *
       
    95 *************************************************/
       
    96 
       
    97 /* Print a sequence of chars in printable format, stopping at the end of the
       
    98 subject if the requested.
       
    99 
       
   100 Arguments:
       
   101   p           points to characters
       
   102   length      number to print
       
   103   is_subject  TRUE if printing from within md->start_subject
       
   104   md          pointer to matching data block, if is_subject is TRUE
       
   105 
       
   106 Returns:     nothing
       
   107 */
       
   108 
       
   109 static void
       
   110 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
       
   111 {
       
   112 unsigned int c;
       
   113 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
       
   114 while (length-- > 0)
       
   115   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
       
   116 }
       
   117 #endif
       
   118 
       
   119 
       
   120 
       
   121 /*************************************************
       
   122 *          Match a back-reference                *
       
   123 *************************************************/
       
   124 
       
   125 /* If a back reference hasn't been set, the length that is passed is greater
       
   126 than the number of characters left in the string, so the match fails.
       
   127 
       
   128 Arguments:
       
   129   offset      index into the offset vector
       
   130   eptr        points into the subject
       
   131   length      length to be matched
       
   132   md          points to match data block
       
   133   ims         the ims flags
       
   134 
       
   135 Returns:      TRUE if matched
       
   136 */
       
   137 
       
   138 static BOOL
       
   139 match_ref(int offset, register USPTR eptr, int length, match_data *md,
       
   140   unsigned long int ims)
       
   141 {
       
   142 USPTR p = md->start_subject + md->offset_vector[offset];
       
   143 
       
   144 #ifdef DEBUG
       
   145 if (eptr >= md->end_subject)
       
   146   printf("matching subject <null>");
       
   147 else
       
   148   {
       
   149   printf("matching subject ");
       
   150   pchars(eptr, length, TRUE, md);
       
   151   }
       
   152 printf(" against backref ");
       
   153 pchars(p, length, FALSE, md);
       
   154 printf("\n");
       
   155 #endif
       
   156 
       
   157 /* Always fail if not enough characters left */
       
   158 
       
   159 if (length > md->end_subject - eptr) return FALSE;
       
   160 
       
   161 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
       
   162 properly if Unicode properties are supported. Otherwise, we can check only
       
   163 ASCII characters. */
       
   164 
       
   165 if ((ims & PCRE_CASELESS) != 0)
       
   166   {
       
   167 #ifdef SUPPORT_UTF8
       
   168 #ifdef SUPPORT_UCP
       
   169   if (md->utf8)
       
   170     {
       
   171     USPTR endptr = eptr + length;
       
   172     while (eptr < endptr)
       
   173       {
       
   174       int c, d;
       
   175       GETCHARINC(c, eptr);
       
   176       GETCHARINC(d, p);
       
   177       if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
       
   178       }
       
   179     }
       
   180   else
       
   181 #endif
       
   182 #endif
       
   183 
       
   184   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
       
   185   is no UCP support. */
       
   186 
       
   187   while (length-- > 0)
       
   188     { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
       
   189   }
       
   190 
       
   191 /* In the caseful case, we can just compare the bytes, whether or not we
       
   192 are in UTF-8 mode. */
       
   193 
       
   194 else
       
   195   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
       
   196 
       
   197 return TRUE;
       
   198 }
       
   199 
       
   200 
       
   201 
       
   202 /***************************************************************************
       
   203 ****************************************************************************
       
   204                    RECURSION IN THE match() FUNCTION
       
   205 
       
   206 The match() function is highly recursive, though not every recursive call
       
   207 increases the recursive depth. Nevertheless, some regular expressions can cause
       
   208 it to recurse to a great depth. I was writing for Unix, so I just let it call
       
   209 itself recursively. This uses the stack for saving everything that has to be
       
   210 saved for a recursive call. On Unix, the stack can be large, and this works
       
   211 fine.
       
   212 
       
   213 It turns out that on some non-Unix-like systems there are problems with
       
   214 programs that use a lot of stack. (This despite the fact that every last chip
       
   215 has oodles of memory these days, and techniques for extending the stack have
       
   216 been known for decades.) So....
       
   217 
       
   218 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
       
   219 calls by keeping local variables that need to be preserved in blocks of memory
       
   220 obtained from malloc() instead instead of on the stack. Macros are used to
       
   221 achieve this so that the actual code doesn't look very different to what it
       
   222 always used to.
       
   223 
       
   224 The original heap-recursive code used longjmp(). However, it seems that this
       
   225 can be very slow on some operating systems. Following a suggestion from Stan
       
   226 Switzer, the use of longjmp() has been abolished, at the cost of having to
       
   227 provide a unique number for each call to RMATCH. There is no way of generating
       
   228 a sequence of numbers at compile time in C. I have given them names, to make
       
   229 them stand out more clearly.
       
   230 
       
   231 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
       
   232 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
       
   233 tests. Furthermore, not using longjmp() means that local dynamic variables
       
   234 don't have indeterminate values; this has meant that the frame size can be
       
   235 reduced because the result can be "passed back" by straight setting of the
       
   236 variable instead of being passed in the frame.
       
   237 ****************************************************************************
       
   238 ***************************************************************************/
       
   239 
       
   240 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
       
   241 below must be updated in sync.  */
       
   242 
       
   243 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
       
   244        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
       
   245        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
       
   246        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
       
   247        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
       
   248        RM51,  RM52, RM53, RM54 };
       
   249 
       
   250 /* These versions of the macros use the stack, as normal. There are debugging
       
   251 versions and production versions. Note that the "rw" argument of RMATCH isn't
       
   252 actuall used in this definition. */
       
   253 
       
   254 #ifndef NO_RECURSE
       
   255 #define REGISTER register
       
   256 
       
   257 #ifdef DEBUG
       
   258 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
       
   259   { \
       
   260   printf("match() called in line %d\n", __LINE__); \
       
   261   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
       
   262   printf("to line %d\n", __LINE__); \
       
   263   }
       
   264 #define RRETURN(ra) \
       
   265   { \
       
   266   printf("match() returned %d from line %d ", ra, __LINE__); \
       
   267   return ra; \
       
   268   }
       
   269 #else
       
   270 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
       
   271   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
       
   272 #define RRETURN(ra) return ra
       
   273 #endif
       
   274 
       
   275 #else
       
   276 
       
   277 
       
   278 /* These versions of the macros manage a private stack on the heap. Note that
       
   279 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
       
   280 argument of match(), which never changes. */
       
   281 
       
   282 #define REGISTER
       
   283 
       
   284 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
       
   285   {\
       
   286   heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
       
   287   frame->Xwhere = rw; \
       
   288   newframe->Xeptr = ra;\
       
   289   newframe->Xecode = rb;\
       
   290   newframe->Xmstart = mstart;\
       
   291   newframe->Xoffset_top = rc;\
       
   292   newframe->Xims = re;\
       
   293   newframe->Xeptrb = rf;\
       
   294   newframe->Xflags = rg;\
       
   295   newframe->Xrdepth = frame->Xrdepth + 1;\
       
   296   newframe->Xprevframe = frame;\
       
   297   frame = newframe;\
       
   298   DPRINTF(("restarting from line %d\n", __LINE__));\
       
   299   goto HEAP_RECURSE;\
       
   300   L_##rw:\
       
   301   DPRINTF(("jumped back to line %d\n", __LINE__));\
       
   302   }
       
   303 
       
   304 #define RRETURN(ra)\
       
   305   {\
       
   306   heapframe *newframe = frame;\
       
   307   frame = newframe->Xprevframe;\
       
   308   (pcre_stack_free)(newframe);\
       
   309   if (frame != NULL)\
       
   310     {\
       
   311     rrc = ra;\
       
   312     goto HEAP_RETURN;\
       
   313     }\
       
   314   return ra;\
       
   315   }
       
   316 
       
   317 
       
   318 /* Structure for remembering the local variables in a private frame */
       
   319 
       
   320 typedef struct heapframe {
       
   321   struct heapframe *Xprevframe;
       
   322 
       
   323   /* Function arguments that may change */
       
   324 
       
   325   const uschar *Xeptr;
       
   326   const uschar *Xecode;
       
   327   const uschar *Xmstart;
       
   328   int Xoffset_top;
       
   329   long int Xims;
       
   330   eptrblock *Xeptrb;
       
   331   int Xflags;
       
   332   unsigned int Xrdepth;
       
   333 
       
   334   /* Function local variables */
       
   335 
       
   336   const uschar *Xcallpat;
       
   337   const uschar *Xcharptr;
       
   338   const uschar *Xdata;
       
   339   const uschar *Xnext;
       
   340   const uschar *Xpp;
       
   341   const uschar *Xprev;
       
   342   const uschar *Xsaved_eptr;
       
   343 
       
   344   recursion_info Xnew_recursive;
       
   345 
       
   346   BOOL Xcur_is_word;
       
   347   BOOL Xcondition;
       
   348   BOOL Xprev_is_word;
       
   349 
       
   350   unsigned long int Xoriginal_ims;
       
   351 
       
   352 #ifdef SUPPORT_UCP
       
   353   int Xprop_type;
       
   354   int Xprop_value;
       
   355   int Xprop_fail_result;
       
   356   int Xprop_category;
       
   357   int Xprop_chartype;
       
   358   int Xprop_script;
       
   359   int Xoclength;
       
   360   uschar Xocchars[8];
       
   361 #endif
       
   362 
       
   363   int Xctype;
       
   364   unsigned int Xfc;
       
   365   int Xfi;
       
   366   int Xlength;
       
   367   int Xmax;
       
   368   int Xmin;
       
   369   int Xnumber;
       
   370   int Xoffset;
       
   371   int Xop;
       
   372   int Xsave_capture_last;
       
   373   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
       
   374   int Xstacksave[REC_STACK_SAVE_MAX];
       
   375 
       
   376   eptrblock Xnewptrb;
       
   377 
       
   378   /* Where to jump back to */
       
   379 
       
   380   int Xwhere;
       
   381 
       
   382 } heapframe;
       
   383 
       
   384 #endif
       
   385 
       
   386 
       
   387 /***************************************************************************
       
   388 ***************************************************************************/
       
   389 
       
   390 
       
   391 
       
   392 /*************************************************
       
   393 *         Match from current position            *
       
   394 *************************************************/
       
   395 
       
   396 /* This function is called recursively in many circumstances. Whenever it
       
   397 returns a negative (error) response, the outer incarnation must also return the
       
   398 same response.
       
   399 
       
   400 Performance note: It might be tempting to extract commonly used fields from the
       
   401 md structure (e.g. utf8, end_subject) into individual variables to improve
       
   402 performance. Tests using gcc on a SPARC disproved this; in the first case, it
       
   403 made performance worse.
       
   404 
       
   405 Arguments:
       
   406    eptr        pointer to current character in subject
       
   407    ecode       pointer to current position in compiled code
       
   408    mstart      pointer to the current match start position (can be modified
       
   409                  by encountering \K)
       
   410    offset_top  current top pointer
       
   411    md          pointer to "static" info for the match
       
   412    ims         current /i, /m, and /s options
       
   413    eptrb       pointer to chain of blocks containing eptr at start of
       
   414                  brackets - for testing for empty matches
       
   415    flags       can contain
       
   416                  match_condassert - this is an assertion condition
       
   417                  match_cbegroup - this is the start of an unlimited repeat
       
   418                    group that can match an empty string
       
   419    rdepth      the recursion depth
       
   420 
       
   421 Returns:       MATCH_MATCH if matched            )  these values are >= 0
       
   422                MATCH_NOMATCH if failed to match  )
       
   423                a negative PCRE_ERROR_xxx value if aborted by an error condition
       
   424                  (e.g. stopped by repeated call or recursion limit)
       
   425 */
       
   426 
       
   427 static int
       
   428 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
       
   429   int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
       
   430   int flags, unsigned int rdepth)
       
   431 {
       
   432 /* These variables do not need to be preserved over recursion in this function,
       
   433 so they can be ordinary variables in all cases. Mark some of them with
       
   434 "register" because they are used a lot in loops. */
       
   435 
       
   436 register int  rrc;         /* Returns from recursive calls */
       
   437 register int  i;           /* Used for loops not involving calls to RMATCH() */
       
   438 register unsigned int c;   /* Character values not kept over RMATCH() calls */
       
   439 register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
       
   440 
       
   441 BOOL minimize, possessive; /* Quantifier options */
       
   442 
       
   443 /* When recursion is not being used, all "local" variables that have to be
       
   444 preserved over calls to RMATCH() are part of a "frame" which is obtained from
       
   445 heap storage. Set up the top-level frame here; others are obtained from the
       
   446 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
       
   447 
       
   448 #ifdef NO_RECURSE
       
   449 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
       
   450 frame->Xprevframe = NULL;            /* Marks the top level */
       
   451 
       
   452 /* Copy in the original argument variables */
       
   453 
       
   454 frame->Xeptr = eptr;
       
   455 frame->Xecode = ecode;
       
   456 frame->Xmstart = mstart;
       
   457 frame->Xoffset_top = offset_top;
       
   458 frame->Xims = ims;
       
   459 frame->Xeptrb = eptrb;
       
   460 frame->Xflags = flags;
       
   461 frame->Xrdepth = rdepth;
       
   462 
       
   463 /* This is where control jumps back to to effect "recursion" */
       
   464 
       
   465 HEAP_RECURSE:
       
   466 
       
   467 /* Macros make the argument variables come from the current frame */
       
   468 
       
   469 #define eptr               frame->Xeptr
       
   470 #define ecode              frame->Xecode
       
   471 #define mstart             frame->Xmstart
       
   472 #define offset_top         frame->Xoffset_top
       
   473 #define ims                frame->Xims
       
   474 #define eptrb              frame->Xeptrb
       
   475 #define flags              frame->Xflags
       
   476 #define rdepth             frame->Xrdepth
       
   477 
       
   478 /* Ditto for the local variables */
       
   479 
       
   480 #ifdef SUPPORT_UTF8
       
   481 #define charptr            frame->Xcharptr
       
   482 #endif
       
   483 #define callpat            frame->Xcallpat
       
   484 #define data               frame->Xdata
       
   485 #define next               frame->Xnext
       
   486 #define pp                 frame->Xpp
       
   487 #define prev               frame->Xprev
       
   488 #define saved_eptr         frame->Xsaved_eptr
       
   489 
       
   490 #define new_recursive      frame->Xnew_recursive
       
   491 
       
   492 #define cur_is_word        frame->Xcur_is_word
       
   493 #define condition          frame->Xcondition
       
   494 #define prev_is_word       frame->Xprev_is_word
       
   495 
       
   496 #define original_ims       frame->Xoriginal_ims
       
   497 
       
   498 #ifdef SUPPORT_UCP
       
   499 #define prop_type          frame->Xprop_type
       
   500 #define prop_value         frame->Xprop_value
       
   501 #define prop_fail_result   frame->Xprop_fail_result
       
   502 #define prop_category      frame->Xprop_category
       
   503 #define prop_chartype      frame->Xprop_chartype
       
   504 #define prop_script        frame->Xprop_script
       
   505 #define oclength           frame->Xoclength
       
   506 #define occhars            frame->Xocchars
       
   507 #endif
       
   508 
       
   509 #define ctype              frame->Xctype
       
   510 #define fc                 frame->Xfc
       
   511 #define fi                 frame->Xfi
       
   512 #define length             frame->Xlength
       
   513 #define max                frame->Xmax
       
   514 #define min                frame->Xmin
       
   515 #define number             frame->Xnumber
       
   516 #define offset             frame->Xoffset
       
   517 #define op                 frame->Xop
       
   518 #define save_capture_last  frame->Xsave_capture_last
       
   519 #define save_offset1       frame->Xsave_offset1
       
   520 #define save_offset2       frame->Xsave_offset2
       
   521 #define save_offset3       frame->Xsave_offset3
       
   522 #define stacksave          frame->Xstacksave
       
   523 
       
   524 #define newptrb            frame->Xnewptrb
       
   525 
       
   526 /* When recursion is being used, local variables are allocated on the stack and
       
   527 get preserved during recursion in the normal way. In this environment, fi and
       
   528 i, and fc and c, can be the same variables. */
       
   529 
       
   530 #else         /* NO_RECURSE not defined */
       
   531 #define fi i
       
   532 #define fc c
       
   533 
       
   534 
       
   535 #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
       
   536 const uschar *charptr;             /* in small blocks of the code. My normal */
       
   537 #endif                             /* style of coding would have declared    */
       
   538 const uschar *callpat;             /* them within each of those blocks.      */
       
   539 const uschar *data;                /* However, in order to accommodate the   */
       
   540 const uschar *next;                /* version of this code that uses an      */
       
   541 USPTR         pp;                  /* external "stack" implemented on the    */
       
   542 const uschar *prev;                /* heap, it is easier to declare them all */
       
   543 USPTR         saved_eptr;          /* here, so the declarations can be cut   */
       
   544                                    /* out in a block. The only declarations  */
       
   545 recursion_info new_recursive;      /* within blocks below are for variables  */
       
   546                                    /* that do not have to be preserved over  */
       
   547 BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
       
   548 BOOL condition;
       
   549 BOOL prev_is_word;
       
   550 
       
   551 unsigned long int original_ims;
       
   552 
       
   553 #ifdef SUPPORT_UCP
       
   554 int prop_type;
       
   555 int prop_value;
       
   556 int prop_fail_result;
       
   557 int prop_category;
       
   558 int prop_chartype;
       
   559 int prop_script;
       
   560 int oclength;
       
   561 uschar occhars[8];
       
   562 #endif
       
   563 
       
   564 int ctype;
       
   565 int length;
       
   566 int max;
       
   567 int min;
       
   568 int number;
       
   569 int offset;
       
   570 int op;
       
   571 int save_capture_last;
       
   572 int save_offset1, save_offset2, save_offset3;
       
   573 int stacksave[REC_STACK_SAVE_MAX];
       
   574 
       
   575 eptrblock newptrb;
       
   576 #endif     /* NO_RECURSE */
       
   577 
       
   578 /* These statements are here to stop the compiler complaining about unitialized
       
   579 variables. */
       
   580 
       
   581 #ifdef SUPPORT_UCP
       
   582 prop_value = 0;
       
   583 prop_fail_result = 0;
       
   584 #endif
       
   585 
       
   586 
       
   587 /* This label is used for tail recursion, which is used in a few cases even
       
   588 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
       
   589 used. Thanks to Ian Taylor for noticing this possibility and sending the
       
   590 original patch. */
       
   591 
       
   592 TAIL_RECURSE:
       
   593 
       
   594 /* OK, now we can get on with the real code of the function. Recursive calls
       
   595 are specified by the macro RMATCH and RRETURN is used to return. When
       
   596 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
       
   597 and a "return", respectively (possibly with some debugging if DEBUG is
       
   598 defined). However, RMATCH isn't like a function call because it's quite a
       
   599 complicated macro. It has to be used in one particular way. This shouldn't,
       
   600 however, impact performance when true recursion is being used. */
       
   601 
       
   602 #ifdef SUPPORT_UTF8
       
   603 utf8 = md->utf8;       /* Local copy of the flag */
       
   604 #else
       
   605 utf8 = FALSE;
       
   606 #endif
       
   607 
       
   608 /* First check that we haven't called match() too many times, or that we
       
   609 haven't exceeded the recursive call limit. */
       
   610 
       
   611 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
       
   612 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
       
   613 
       
   614 original_ims = ims;    /* Save for resetting on ')' */
       
   615 
       
   616 /* At the start of a group with an unlimited repeat that may match an empty
       
   617 string, the match_cbegroup flag is set. When this is the case, add the current
       
   618 subject pointer to the chain of such remembered pointers, to be checked when we
       
   619 hit the closing ket, in order to break infinite loops that match no characters.
       
   620 When match() is called in other circumstances, don't add to the chain. The
       
   621 match_cbegroup flag must NOT be used with tail recursion, because the memory
       
   622 block that is used is on the stack, so a new one may be required for each
       
   623 match(). */
       
   624 
       
   625 if ((flags & match_cbegroup) != 0)
       
   626   {
       
   627   newptrb.epb_saved_eptr = eptr;
       
   628   newptrb.epb_prev = eptrb;
       
   629   eptrb = &newptrb;
       
   630   }
       
   631 
       
   632 /* Now start processing the opcodes. */
       
   633 
       
   634 for (;;)
       
   635   {
       
   636   minimize = possessive = FALSE;
       
   637   op = *ecode;
       
   638 
       
   639   /* For partial matching, remember if we ever hit the end of the subject after
       
   640   matching at least one subject character. */
       
   641 
       
   642   if (md->partial &&
       
   643       eptr >= md->end_subject &&
       
   644       eptr > mstart)
       
   645     md->hitend = TRUE;
       
   646 
       
   647   switch(op)
       
   648     {
       
   649     case OP_FAIL:
       
   650     RRETURN(MATCH_NOMATCH);
       
   651 
       
   652     case OP_PRUNE:
       
   653     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
       
   654       ims, eptrb, flags, RM51);
       
   655     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
   656     RRETURN(MATCH_PRUNE);
       
   657 
       
   658     case OP_COMMIT:
       
   659     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
       
   660       ims, eptrb, flags, RM52);
       
   661     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
   662     RRETURN(MATCH_COMMIT);
       
   663 
       
   664     case OP_SKIP:
       
   665     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
       
   666       ims, eptrb, flags, RM53);
       
   667     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
   668     md->start_match_ptr = eptr;   /* Pass back current position */
       
   669     RRETURN(MATCH_SKIP);
       
   670 
       
   671     case OP_THEN:
       
   672     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
       
   673       ims, eptrb, flags, RM54);
       
   674     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
   675     RRETURN(MATCH_THEN);
       
   676 
       
   677     /* Handle a capturing bracket. If there is space in the offset vector, save
       
   678     the current subject position in the working slot at the top of the vector.
       
   679     We mustn't change the current values of the data slot, because they may be
       
   680     set from a previous iteration of this group, and be referred to by a
       
   681     reference inside the group.
       
   682 
       
   683     If the bracket fails to match, we need to restore this value and also the
       
   684     values of the final offsets, in case they were set by a previous iteration
       
   685     of the same bracket.
       
   686 
       
   687     If there isn't enough space in the offset vector, treat this as if it were
       
   688     a non-capturing bracket. Don't worry about setting the flag for the error
       
   689     case here; that is handled in the code for KET. */
       
   690 
       
   691     case OP_CBRA:
       
   692     case OP_SCBRA:
       
   693     number = GET2(ecode, 1+LINK_SIZE);
       
   694     offset = number << 1;
       
   695 
       
   696 #ifdef DEBUG
       
   697     printf("start bracket %d\n", number);
       
   698     printf("subject=");
       
   699     pchars(eptr, 16, TRUE, md);
       
   700     printf("\n");
       
   701 #endif
       
   702 
       
   703     if (offset < md->offset_max)
       
   704       {
       
   705       save_offset1 = md->offset_vector[offset];
       
   706       save_offset2 = md->offset_vector[offset+1];
       
   707       save_offset3 = md->offset_vector[md->offset_end - number];
       
   708       save_capture_last = md->capture_last;
       
   709 
       
   710       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
       
   711       md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
       
   712 
       
   713       flags = (op == OP_SCBRA)? match_cbegroup : 0;
       
   714       do
       
   715         {
       
   716         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
       
   717           ims, eptrb, flags, RM1);
       
   718         if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
       
   719         md->capture_last = save_capture_last;
       
   720         ecode += GET(ecode, 1);
       
   721         }
       
   722       while (*ecode == OP_ALT);
       
   723 
       
   724       DPRINTF(("bracket %d failed\n", number));
       
   725 
       
   726       md->offset_vector[offset] = save_offset1;
       
   727       md->offset_vector[offset+1] = save_offset2;
       
   728       md->offset_vector[md->offset_end - number] = save_offset3;
       
   729 
       
   730       RRETURN(MATCH_NOMATCH);
       
   731       }
       
   732 
       
   733     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
       
   734     as a non-capturing bracket. */
       
   735 
       
   736     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
       
   737     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
       
   738 
       
   739     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
       
   740 
       
   741     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
       
   742     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
       
   743 
       
   744     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
       
   745     final alternative within the brackets, we would return the result of a
       
   746     recursive call to match() whatever happened. We can reduce stack usage by
       
   747     turning this into a tail recursion, except in the case when match_cbegroup
       
   748     is set.*/
       
   749 
       
   750     case OP_BRA:
       
   751     case OP_SBRA:
       
   752     DPRINTF(("start non-capturing bracket\n"));
       
   753     flags = (op >= OP_SBRA)? match_cbegroup : 0;
       
   754     for (;;)
       
   755       {
       
   756       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
       
   757         {
       
   758         if (flags == 0)    /* Not a possibly empty group */
       
   759           {
       
   760           ecode += _pcre_OP_lengths[*ecode];
       
   761           DPRINTF(("bracket 0 tail recursion\n"));
       
   762           goto TAIL_RECURSE;
       
   763           }
       
   764 
       
   765         /* Possibly empty group; can't use tail recursion. */
       
   766 
       
   767         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
       
   768           eptrb, flags, RM48);
       
   769         RRETURN(rrc);
       
   770         }
       
   771 
       
   772       /* For non-final alternatives, continue the loop for a NOMATCH result;
       
   773       otherwise return. */
       
   774 
       
   775       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
       
   776         eptrb, flags, RM2);
       
   777       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
       
   778       ecode += GET(ecode, 1);
       
   779       }
       
   780     /* Control never reaches here. */
       
   781 
       
   782     /* Conditional group: compilation checked that there are no more than
       
   783     two branches. If the condition is false, skipping the first branch takes us
       
   784     past the end if there is only one branch, but that's OK because that is
       
   785     exactly what going to the ket would do. As there is only one branch to be
       
   786     obeyed, we can use tail recursion to avoid using another stack frame. */
       
   787 
       
   788     case OP_COND:
       
   789     case OP_SCOND:
       
   790     if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
       
   791       {
       
   792       offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
       
   793       condition = md->recursive != NULL &&
       
   794         (offset == RREF_ANY || offset == md->recursive->group_num);
       
   795       ecode += condition? 3 : GET(ecode, 1);
       
   796       }
       
   797 
       
   798     else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
       
   799       {
       
   800       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
       
   801       condition = offset < offset_top && md->offset_vector[offset] >= 0;
       
   802       ecode += condition? 3 : GET(ecode, 1);
       
   803       }
       
   804 
       
   805     else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
       
   806       {
       
   807       condition = FALSE;
       
   808       ecode += GET(ecode, 1);
       
   809       }
       
   810 
       
   811     /* The condition is an assertion. Call match() to evaluate it - setting
       
   812     the final argument match_condassert causes it to stop at the end of an
       
   813     assertion. */
       
   814 
       
   815     else
       
   816       {
       
   817       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
       
   818           match_condassert, RM3);
       
   819       if (rrc == MATCH_MATCH)
       
   820         {
       
   821         condition = TRUE;
       
   822         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
       
   823         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
       
   824         }
       
   825       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
       
   826         {
       
   827         RRETURN(rrc);         /* Need braces because of following else */
       
   828         }
       
   829       else
       
   830         {
       
   831         condition = FALSE;
       
   832         ecode += GET(ecode, 1);
       
   833         }
       
   834       }
       
   835 
       
   836     /* We are now at the branch that is to be obeyed. As there is only one,
       
   837     we can use tail recursion to avoid using another stack frame, except when
       
   838     match_cbegroup is required for an unlimited repeat of a possibly empty
       
   839     group. If the second alternative doesn't exist, we can just plough on. */
       
   840 
       
   841     if (condition || *ecode == OP_ALT)
       
   842       {
       
   843       ecode += 1 + LINK_SIZE;
       
   844       if (op == OP_SCOND)        /* Possibly empty group */
       
   845         {
       
   846         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
       
   847         RRETURN(rrc);
       
   848         }
       
   849       else                       /* Group must match something */
       
   850         {
       
   851         flags = 0;
       
   852         goto TAIL_RECURSE;
       
   853         }
       
   854       }
       
   855     else                         /* Condition false & no 2nd alternative */
       
   856       {
       
   857       ecode += 1 + LINK_SIZE;
       
   858       }
       
   859     break;
       
   860 
       
   861 
       
   862     /* End of the pattern, either real or forced. If we are in a top-level
       
   863     recursion, we should restore the offsets appropriately and continue from
       
   864     after the call. */
       
   865 
       
   866     case OP_ACCEPT:
       
   867     case OP_END:
       
   868     if (md->recursive != NULL && md->recursive->group_num == 0)
       
   869       {
       
   870       recursion_info *rec = md->recursive;
       
   871       DPRINTF(("End of pattern in a (?0) recursion\n"));
       
   872       md->recursive = rec->prevrec;
       
   873       memmove(md->offset_vector, rec->offset_save,
       
   874         rec->saved_max * sizeof(int));
       
   875       mstart = rec->save_start;
       
   876       ims = original_ims;
       
   877       ecode = rec->after_call;
       
   878       break;
       
   879       }
       
   880 
       
   881     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
       
   882     string - backtracking will then try other alternatives, if any. */
       
   883 
       
   884     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
       
   885     md->end_match_ptr = eptr;           /* Record where we ended */
       
   886     md->end_offset_top = offset_top;    /* and how many extracts were taken */
       
   887     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
       
   888     RRETURN(MATCH_MATCH);
       
   889 
       
   890     /* Change option settings */
       
   891 
       
   892     case OP_OPT:
       
   893     ims = ecode[1];
       
   894     ecode += 2;
       
   895     DPRINTF(("ims set to %02lx\n", ims));
       
   896     break;
       
   897 
       
   898     /* Assertion brackets. Check the alternative branches in turn - the
       
   899     matching won't pass the KET for an assertion. If any one branch matches,
       
   900     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
       
   901     start of each branch to move the current point backwards, so the code at
       
   902     this level is identical to the lookahead case. */
       
   903 
       
   904     case OP_ASSERT:
       
   905     case OP_ASSERTBACK:
       
   906     do
       
   907       {
       
   908       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
       
   909         RM4);
       
   910       if (rrc == MATCH_MATCH) break;
       
   911       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
       
   912       ecode += GET(ecode, 1);
       
   913       }
       
   914     while (*ecode == OP_ALT);
       
   915     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
       
   916 
       
   917     /* If checking an assertion for a condition, return MATCH_MATCH. */
       
   918 
       
   919     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
       
   920 
       
   921     /* Continue from after the assertion, updating the offsets high water
       
   922     mark, since extracts may have been taken during the assertion. */
       
   923 
       
   924     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
       
   925     ecode += 1 + LINK_SIZE;
       
   926     offset_top = md->end_offset_top;
       
   927     continue;
       
   928 
       
   929     /* Negative assertion: all branches must fail to match */
       
   930 
       
   931     case OP_ASSERT_NOT:
       
   932     case OP_ASSERTBACK_NOT:
       
   933     do
       
   934       {
       
   935       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
       
   936         RM5);
       
   937       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
       
   938       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
       
   939       ecode += GET(ecode,1);
       
   940       }
       
   941     while (*ecode == OP_ALT);
       
   942 
       
   943     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
       
   944 
       
   945     ecode += 1 + LINK_SIZE;
       
   946     continue;
       
   947 
       
   948     /* Move the subject pointer back. This occurs only at the start of
       
   949     each branch of a lookbehind assertion. If we are too close to the start to
       
   950     move back, this match function fails. When working with UTF-8 we move
       
   951     back a number of characters, not bytes. */
       
   952 
       
   953     case OP_REVERSE:
       
   954 #ifdef SUPPORT_UTF8
       
   955     if (utf8)
       
   956       {
       
   957       i = GET(ecode, 1);
       
   958       while (i-- > 0)
       
   959         {
       
   960         eptr--;
       
   961         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
       
   962         BACKCHAR(eptr);
       
   963         }
       
   964       }
       
   965     else
       
   966 #endif
       
   967 
       
   968     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
       
   969 
       
   970       {
       
   971       eptr -= GET(ecode, 1);
       
   972       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
       
   973       }
       
   974 
       
   975     /* Skip to next op code */
       
   976 
       
   977     ecode += 1 + LINK_SIZE;
       
   978     break;
       
   979 
       
   980     /* The callout item calls an external function, if one is provided, passing
       
   981     details of the match so far. This is mainly for debugging, though the
       
   982     function is able to force a failure. */
       
   983 
       
   984     case OP_CALLOUT:
       
   985     if (pcre_callout != NULL)
       
   986       {
       
   987       pcre_callout_block cb;
       
   988       cb.version          = 1;   /* Version 1 of the callout block */
       
   989       cb.callout_number   = ecode[1];
       
   990       cb.offset_vector    = md->offset_vector;
       
   991       cb.subject          = (PCRE_SPTR)md->start_subject;
       
   992       cb.subject_length   = md->end_subject - md->start_subject;
       
   993       cb.start_match      = mstart - md->start_subject;
       
   994       cb.current_position = eptr - md->start_subject;
       
   995       cb.pattern_position = GET(ecode, 2);
       
   996       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
       
   997       cb.capture_top      = offset_top/2;
       
   998       cb.capture_last     = md->capture_last;
       
   999       cb.callout_data     = md->callout_data;
       
  1000       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
       
  1001       if (rrc < 0) RRETURN(rrc);
       
  1002       }
       
  1003     ecode += 2 + 2*LINK_SIZE;
       
  1004     break;
       
  1005 
       
  1006     /* Recursion either matches the current regex, or some subexpression. The
       
  1007     offset data is the offset to the starting bracket from the start of the
       
  1008     whole pattern. (This is so that it works from duplicated subpatterns.)
       
  1009 
       
  1010     If there are any capturing brackets started but not finished, we have to
       
  1011     save their starting points and reinstate them after the recursion. However,
       
  1012     we don't know how many such there are (offset_top records the completed
       
  1013     total) so we just have to save all the potential data. There may be up to
       
  1014     65535 such values, which is too large to put on the stack, but using malloc
       
  1015     for small numbers seems expensive. As a compromise, the stack is used when
       
  1016     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
       
  1017     is used. A problem is what to do if the malloc fails ... there is no way of
       
  1018     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
       
  1019     values on the stack, and accept that the rest may be wrong.
       
  1020 
       
  1021     There are also other values that have to be saved. We use a chained
       
  1022     sequence of blocks that actually live on the stack. Thanks to Robin Houston
       
  1023     for the original version of this logic. */
       
  1024 
       
  1025     case OP_RECURSE:
       
  1026       {
       
  1027       callpat = md->start_code + GET(ecode, 1);
       
  1028       new_recursive.group_num = (callpat == md->start_code)? 0 :
       
  1029         GET2(callpat, 1 + LINK_SIZE);
       
  1030 
       
  1031       /* Add to "recursing stack" */
       
  1032 
       
  1033       new_recursive.prevrec = md->recursive;
       
  1034       md->recursive = &new_recursive;
       
  1035 
       
  1036       /* Find where to continue from afterwards */
       
  1037 
       
  1038       ecode += 1 + LINK_SIZE;
       
  1039       new_recursive.after_call = ecode;
       
  1040 
       
  1041       /* Now save the offset data. */
       
  1042 
       
  1043       new_recursive.saved_max = md->offset_end;
       
  1044       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
       
  1045         new_recursive.offset_save = stacksave;
       
  1046       else
       
  1047         {
       
  1048         new_recursive.offset_save =
       
  1049           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
       
  1050         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
       
  1051         }
       
  1052 
       
  1053       memcpy(new_recursive.offset_save, md->offset_vector,
       
  1054             new_recursive.saved_max * sizeof(int));
       
  1055       new_recursive.save_start = mstart;
       
  1056       mstart = eptr;
       
  1057 
       
  1058       /* OK, now we can do the recursion. For each top-level alternative we
       
  1059       restore the offset and recursion data. */
       
  1060 
       
  1061       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
       
  1062       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
       
  1063       do
       
  1064         {
       
  1065         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
       
  1066           md, ims, eptrb, flags, RM6);
       
  1067         if (rrc == MATCH_MATCH)
       
  1068           {
       
  1069           DPRINTF(("Recursion matched\n"));
       
  1070           md->recursive = new_recursive.prevrec;
       
  1071           if (new_recursive.offset_save != stacksave)
       
  1072             (pcre_free)(new_recursive.offset_save);
       
  1073           RRETURN(MATCH_MATCH);
       
  1074           }
       
  1075         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
       
  1076           {
       
  1077           DPRINTF(("Recursion gave error %d\n", rrc));
       
  1078           RRETURN(rrc);
       
  1079           }
       
  1080 
       
  1081         md->recursive = &new_recursive;
       
  1082         memcpy(md->offset_vector, new_recursive.offset_save,
       
  1083             new_recursive.saved_max * sizeof(int));
       
  1084         callpat += GET(callpat, 1);
       
  1085         }
       
  1086       while (*callpat == OP_ALT);
       
  1087 
       
  1088       DPRINTF(("Recursion didn't match\n"));
       
  1089       md->recursive = new_recursive.prevrec;
       
  1090       if (new_recursive.offset_save != stacksave)
       
  1091         (pcre_free)(new_recursive.offset_save);
       
  1092       RRETURN(MATCH_NOMATCH);
       
  1093       }
       
  1094     /* Control never reaches here */
       
  1095 
       
  1096     /* "Once" brackets are like assertion brackets except that after a match,
       
  1097     the point in the subject string is not moved back. Thus there can never be
       
  1098     a move back into the brackets. Friedl calls these "atomic" subpatterns.
       
  1099     Check the alternative branches in turn - the matching won't pass the KET
       
  1100     for this kind of subpattern. If any one branch matches, we carry on as at
       
  1101     the end of a normal bracket, leaving the subject pointer. */
       
  1102 
       
  1103     case OP_ONCE:
       
  1104     prev = ecode;
       
  1105     saved_eptr = eptr;
       
  1106 
       
  1107     do
       
  1108       {
       
  1109       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
       
  1110       if (rrc == MATCH_MATCH) break;
       
  1111       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
       
  1112       ecode += GET(ecode,1);
       
  1113       }
       
  1114     while (*ecode == OP_ALT);
       
  1115 
       
  1116     /* If hit the end of the group (which could be repeated), fail */
       
  1117 
       
  1118     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
       
  1119 
       
  1120     /* Continue as from after the assertion, updating the offsets high water
       
  1121     mark, since extracts may have been taken. */
       
  1122 
       
  1123     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
       
  1124 
       
  1125     offset_top = md->end_offset_top;
       
  1126     eptr = md->end_match_ptr;
       
  1127 
       
  1128     /* For a non-repeating ket, just continue at this level. This also
       
  1129     happens for a repeating ket if no characters were matched in the group.
       
  1130     This is the forcible breaking of infinite loops as implemented in Perl
       
  1131     5.005. If there is an options reset, it will get obeyed in the normal
       
  1132     course of events. */
       
  1133 
       
  1134     if (*ecode == OP_KET || eptr == saved_eptr)
       
  1135       {
       
  1136       ecode += 1+LINK_SIZE;
       
  1137       break;
       
  1138       }
       
  1139 
       
  1140     /* The repeating kets try the rest of the pattern or restart from the
       
  1141     preceding bracket, in the appropriate order. The second "call" of match()
       
  1142     uses tail recursion, to avoid using another stack frame. We need to reset
       
  1143     any options that changed within the bracket before re-running it, so
       
  1144     check the next opcode. */
       
  1145 
       
  1146     if (ecode[1+LINK_SIZE] == OP_OPT)
       
  1147       {
       
  1148       ims = (ims & ~PCRE_IMS) | ecode[4];
       
  1149       DPRINTF(("ims set to %02lx at group repeat\n", ims));
       
  1150       }
       
  1151 
       
  1152     if (*ecode == OP_KETRMIN)
       
  1153       {
       
  1154       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
       
  1155       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1156       ecode = prev;
       
  1157       flags = 0;
       
  1158       goto TAIL_RECURSE;
       
  1159       }
       
  1160     else  /* OP_KETRMAX */
       
  1161       {
       
  1162       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
       
  1163       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1164       ecode += 1 + LINK_SIZE;
       
  1165       flags = 0;
       
  1166       goto TAIL_RECURSE;
       
  1167       }
       
  1168     /* Control never gets here */
       
  1169 
       
  1170     /* An alternation is the end of a branch; scan along to find the end of the
       
  1171     bracketed group and go to there. */
       
  1172 
       
  1173     case OP_ALT:
       
  1174     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
       
  1175     break;
       
  1176 
       
  1177     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
       
  1178     indicating that it may occur zero times. It may repeat infinitely, or not
       
  1179     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
       
  1180     with fixed upper repeat limits are compiled as a number of copies, with the
       
  1181     optional ones preceded by BRAZERO or BRAMINZERO. */
       
  1182 
       
  1183     case OP_BRAZERO:
       
  1184       {
       
  1185       next = ecode+1;
       
  1186       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
       
  1187       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1188       do next += GET(next,1); while (*next == OP_ALT);
       
  1189       ecode = next + 1 + LINK_SIZE;
       
  1190       }
       
  1191     break;
       
  1192 
       
  1193     case OP_BRAMINZERO:
       
  1194       {
       
  1195       next = ecode+1;
       
  1196       do next += GET(next, 1); while (*next == OP_ALT);
       
  1197       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
       
  1198       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1199       ecode++;
       
  1200       }
       
  1201     break;
       
  1202 
       
  1203     case OP_SKIPZERO:
       
  1204       {
       
  1205       next = ecode+1;
       
  1206       do next += GET(next,1); while (*next == OP_ALT);
       
  1207       ecode = next + 1 + LINK_SIZE;
       
  1208       }
       
  1209     break;
       
  1210 
       
  1211     /* End of a group, repeated or non-repeating. */
       
  1212 
       
  1213     case OP_KET:
       
  1214     case OP_KETRMIN:
       
  1215     case OP_KETRMAX:
       
  1216     prev = ecode - GET(ecode, 1);
       
  1217 
       
  1218     /* If this was a group that remembered the subject start, in order to break
       
  1219     infinite repeats of empty string matches, retrieve the subject start from
       
  1220     the chain. Otherwise, set it NULL. */
       
  1221 
       
  1222     if (*prev >= OP_SBRA)
       
  1223       {
       
  1224       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
       
  1225       eptrb = eptrb->epb_prev;              /* Backup to previous group */
       
  1226       }
       
  1227     else saved_eptr = NULL;
       
  1228 
       
  1229     /* If we are at the end of an assertion group, stop matching and return
       
  1230     MATCH_MATCH, but record the current high water mark for use by positive
       
  1231     assertions. Do this also for the "once" (atomic) groups. */
       
  1232 
       
  1233     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
       
  1234         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
       
  1235         *prev == OP_ONCE)
       
  1236       {
       
  1237       md->end_match_ptr = eptr;      /* For ONCE */
       
  1238       md->end_offset_top = offset_top;
       
  1239       RRETURN(MATCH_MATCH);
       
  1240       }
       
  1241 
       
  1242     /* For capturing groups we have to check the group number back at the start
       
  1243     and if necessary complete handling an extraction by setting the offsets and
       
  1244     bumping the high water mark. Note that whole-pattern recursion is coded as
       
  1245     a recurse into group 0, so it won't be picked up here. Instead, we catch it
       
  1246     when the OP_END is reached. Other recursion is handled here. */
       
  1247 
       
  1248     if (*prev == OP_CBRA || *prev == OP_SCBRA)
       
  1249       {
       
  1250       number = GET2(prev, 1+LINK_SIZE);
       
  1251       offset = number << 1;
       
  1252 
       
  1253 #ifdef DEBUG
       
  1254       printf("end bracket %d", number);
       
  1255       printf("\n");
       
  1256 #endif
       
  1257 
       
  1258       md->capture_last = number;
       
  1259       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
       
  1260         {
       
  1261         md->offset_vector[offset] =
       
  1262           md->offset_vector[md->offset_end - number];
       
  1263         md->offset_vector[offset+1] = eptr - md->start_subject;
       
  1264         if (offset_top <= offset) offset_top = offset + 2;
       
  1265         }
       
  1266 
       
  1267       /* Handle a recursively called group. Restore the offsets
       
  1268       appropriately and continue from after the call. */
       
  1269 
       
  1270       if (md->recursive != NULL && md->recursive->group_num == number)
       
  1271         {
       
  1272         recursion_info *rec = md->recursive;
       
  1273         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
       
  1274         md->recursive = rec->prevrec;
       
  1275         mstart = rec->save_start;
       
  1276         memcpy(md->offset_vector, rec->offset_save,
       
  1277           rec->saved_max * sizeof(int));
       
  1278         ecode = rec->after_call;
       
  1279         ims = original_ims;
       
  1280         break;
       
  1281         }
       
  1282       }
       
  1283 
       
  1284     /* For both capturing and non-capturing groups, reset the value of the ims
       
  1285     flags, in case they got changed during the group. */
       
  1286 
       
  1287     ims = original_ims;
       
  1288     DPRINTF(("ims reset to %02lx\n", ims));
       
  1289 
       
  1290     /* For a non-repeating ket, just continue at this level. This also
       
  1291     happens for a repeating ket if no characters were matched in the group.
       
  1292     This is the forcible breaking of infinite loops as implemented in Perl
       
  1293     5.005. If there is an options reset, it will get obeyed in the normal
       
  1294     course of events. */
       
  1295 
       
  1296     if (*ecode == OP_KET || eptr == saved_eptr)
       
  1297       {
       
  1298       ecode += 1 + LINK_SIZE;
       
  1299       break;
       
  1300       }
       
  1301 
       
  1302     /* The repeating kets try the rest of the pattern or restart from the
       
  1303     preceding bracket, in the appropriate order. In the second case, we can use
       
  1304     tail recursion to avoid using another stack frame, unless we have an
       
  1305     unlimited repeat of a group that can match an empty string. */
       
  1306 
       
  1307     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
       
  1308 
       
  1309     if (*ecode == OP_KETRMIN)
       
  1310       {
       
  1311       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
       
  1312       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1313       if (flags != 0)    /* Could match an empty string */
       
  1314         {
       
  1315         RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
       
  1316         RRETURN(rrc);
       
  1317         }
       
  1318       ecode = prev;
       
  1319       goto TAIL_RECURSE;
       
  1320       }
       
  1321     else  /* OP_KETRMAX */
       
  1322       {
       
  1323       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
       
  1324       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1325       ecode += 1 + LINK_SIZE;
       
  1326       flags = 0;
       
  1327       goto TAIL_RECURSE;
       
  1328       }
       
  1329     /* Control never gets here */
       
  1330 
       
  1331     /* Start of subject unless notbol, or after internal newline if multiline */
       
  1332 
       
  1333     case OP_CIRC:
       
  1334     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
       
  1335     if ((ims & PCRE_MULTILINE) != 0)
       
  1336       {
       
  1337       if (eptr != md->start_subject &&
       
  1338           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
       
  1339         RRETURN(MATCH_NOMATCH);
       
  1340       ecode++;
       
  1341       break;
       
  1342       }
       
  1343     /* ... else fall through */
       
  1344 
       
  1345     /* Start of subject assertion */
       
  1346 
       
  1347     case OP_SOD:
       
  1348     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
       
  1349     ecode++;
       
  1350     break;
       
  1351 
       
  1352     /* Start of match assertion */
       
  1353 
       
  1354     case OP_SOM:
       
  1355     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
       
  1356     ecode++;
       
  1357     break;
       
  1358 
       
  1359     /* Reset the start of match point */
       
  1360 
       
  1361     case OP_SET_SOM:
       
  1362     mstart = eptr;
       
  1363     ecode++;
       
  1364     break;
       
  1365 
       
  1366     /* Assert before internal newline if multiline, or before a terminating
       
  1367     newline unless endonly is set, else end of subject unless noteol is set. */
       
  1368 
       
  1369     case OP_DOLL:
       
  1370     if ((ims & PCRE_MULTILINE) != 0)
       
  1371       {
       
  1372       if (eptr < md->end_subject)
       
  1373         { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
       
  1374       else
       
  1375         { if (md->noteol) RRETURN(MATCH_NOMATCH); }
       
  1376       ecode++;
       
  1377       break;
       
  1378       }
       
  1379     else
       
  1380       {
       
  1381       if (md->noteol) RRETURN(MATCH_NOMATCH);
       
  1382       if (!md->endonly)
       
  1383         {
       
  1384         if (eptr != md->end_subject &&
       
  1385             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
       
  1386           RRETURN(MATCH_NOMATCH);
       
  1387         ecode++;
       
  1388         break;
       
  1389         }
       
  1390       }
       
  1391     /* ... else fall through for endonly */
       
  1392 
       
  1393     /* End of subject assertion (\z) */
       
  1394 
       
  1395     case OP_EOD:
       
  1396     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1397     ecode++;
       
  1398     break;
       
  1399 
       
  1400     /* End of subject or ending \n assertion (\Z) */
       
  1401 
       
  1402     case OP_EODN:
       
  1403     if (eptr != md->end_subject &&
       
  1404         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
       
  1405       RRETURN(MATCH_NOMATCH);
       
  1406     ecode++;
       
  1407     break;
       
  1408 
       
  1409     /* Word boundary assertions */
       
  1410 
       
  1411     case OP_NOT_WORD_BOUNDARY:
       
  1412     case OP_WORD_BOUNDARY:
       
  1413       {
       
  1414 
       
  1415       /* Find out if the previous and current characters are "word" characters.
       
  1416       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
       
  1417       be "non-word" characters. */
       
  1418 
       
  1419 #ifdef SUPPORT_UTF8
       
  1420       if (utf8)
       
  1421         {
       
  1422         if (eptr == md->start_subject) prev_is_word = FALSE; else
       
  1423           {
       
  1424           const uschar *lastptr = eptr - 1;
       
  1425           while((*lastptr & 0xc0) == 0x80) lastptr--;
       
  1426           GETCHAR(c, lastptr);
       
  1427           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
       
  1428           }
       
  1429         if (eptr >= md->end_subject) cur_is_word = FALSE; else
       
  1430           {
       
  1431           GETCHAR(c, eptr);
       
  1432           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
       
  1433           }
       
  1434         }
       
  1435       else
       
  1436 #endif
       
  1437 
       
  1438       /* More streamlined when not in UTF-8 mode */
       
  1439 
       
  1440         {
       
  1441         prev_is_word = (eptr != md->start_subject) &&
       
  1442           ((md->ctypes[eptr[-1]] & ctype_word) != 0);
       
  1443         cur_is_word = (eptr < md->end_subject) &&
       
  1444           ((md->ctypes[*eptr] & ctype_word) != 0);
       
  1445         }
       
  1446 
       
  1447       /* Now see if the situation is what we want */
       
  1448 
       
  1449       if ((*ecode++ == OP_WORD_BOUNDARY)?
       
  1450            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
       
  1451         RRETURN(MATCH_NOMATCH);
       
  1452       }
       
  1453     break;
       
  1454 
       
  1455     /* Match a single character type; inline for speed */
       
  1456 
       
  1457     case OP_ANY:
       
  1458     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
       
  1459     /* Fall through */
       
  1460 
       
  1461     case OP_ALLANY:
       
  1462     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1463     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       
  1464     ecode++;
       
  1465     break;
       
  1466 
       
  1467     /* Match a single byte, even in UTF-8 mode. This opcode really does match
       
  1468     any byte, even newline, independent of the setting of PCRE_DOTALL. */
       
  1469 
       
  1470     case OP_ANYBYTE:
       
  1471     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1472     ecode++;
       
  1473     break;
       
  1474 
       
  1475     case OP_NOT_DIGIT:
       
  1476     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1477     GETCHARINCTEST(c, eptr);
       
  1478     if (
       
  1479 #ifdef SUPPORT_UTF8
       
  1480        c < 256 &&
       
  1481 #endif
       
  1482        (md->ctypes[c] & ctype_digit) != 0
       
  1483        )
       
  1484       RRETURN(MATCH_NOMATCH);
       
  1485     ecode++;
       
  1486     break;
       
  1487 
       
  1488     case OP_DIGIT:
       
  1489     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1490     GETCHARINCTEST(c, eptr);
       
  1491     if (
       
  1492 #ifdef SUPPORT_UTF8
       
  1493        c >= 256 ||
       
  1494 #endif
       
  1495        (md->ctypes[c] & ctype_digit) == 0
       
  1496        )
       
  1497       RRETURN(MATCH_NOMATCH);
       
  1498     ecode++;
       
  1499     break;
       
  1500 
       
  1501     case OP_NOT_WHITESPACE:
       
  1502     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1503     GETCHARINCTEST(c, eptr);
       
  1504     if (
       
  1505 #ifdef SUPPORT_UTF8
       
  1506        c < 256 &&
       
  1507 #endif
       
  1508        (md->ctypes[c] & ctype_space) != 0
       
  1509        )
       
  1510       RRETURN(MATCH_NOMATCH);
       
  1511     ecode++;
       
  1512     break;
       
  1513 
       
  1514     case OP_WHITESPACE:
       
  1515     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1516     GETCHARINCTEST(c, eptr);
       
  1517     if (
       
  1518 #ifdef SUPPORT_UTF8
       
  1519        c >= 256 ||
       
  1520 #endif
       
  1521        (md->ctypes[c] & ctype_space) == 0
       
  1522        )
       
  1523       RRETURN(MATCH_NOMATCH);
       
  1524     ecode++;
       
  1525     break;
       
  1526 
       
  1527     case OP_NOT_WORDCHAR:
       
  1528     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1529     GETCHARINCTEST(c, eptr);
       
  1530     if (
       
  1531 #ifdef SUPPORT_UTF8
       
  1532        c < 256 &&
       
  1533 #endif
       
  1534        (md->ctypes[c] & ctype_word) != 0
       
  1535        )
       
  1536       RRETURN(MATCH_NOMATCH);
       
  1537     ecode++;
       
  1538     break;
       
  1539 
       
  1540     case OP_WORDCHAR:
       
  1541     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1542     GETCHARINCTEST(c, eptr);
       
  1543     if (
       
  1544 #ifdef SUPPORT_UTF8
       
  1545        c >= 256 ||
       
  1546 #endif
       
  1547        (md->ctypes[c] & ctype_word) == 0
       
  1548        )
       
  1549       RRETURN(MATCH_NOMATCH);
       
  1550     ecode++;
       
  1551     break;
       
  1552 
       
  1553     case OP_ANYNL:
       
  1554     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1555     GETCHARINCTEST(c, eptr);
       
  1556     switch(c)
       
  1557       {
       
  1558       default: RRETURN(MATCH_NOMATCH);
       
  1559       case 0x000d:
       
  1560       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
       
  1561       break;
       
  1562 
       
  1563       case 0x000a:
       
  1564       break;
       
  1565 
       
  1566       case 0x000b:
       
  1567       case 0x000c:
       
  1568       case 0x0085:
       
  1569       case 0x2028:
       
  1570       case 0x2029:
       
  1571       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
       
  1572       break;
       
  1573       }
       
  1574     ecode++;
       
  1575     break;
       
  1576 
       
  1577     case OP_NOT_HSPACE:
       
  1578     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1579     GETCHARINCTEST(c, eptr);
       
  1580     switch(c)
       
  1581       {
       
  1582       default: break;
       
  1583       case 0x09:      /* HT */
       
  1584       case 0x20:      /* SPACE */
       
  1585       case 0xa0:      /* NBSP */
       
  1586       case 0x1680:    /* OGHAM SPACE MARK */
       
  1587       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
       
  1588       case 0x2000:    /* EN QUAD */
       
  1589       case 0x2001:    /* EM QUAD */
       
  1590       case 0x2002:    /* EN SPACE */
       
  1591       case 0x2003:    /* EM SPACE */
       
  1592       case 0x2004:    /* THREE-PER-EM SPACE */
       
  1593       case 0x2005:    /* FOUR-PER-EM SPACE */
       
  1594       case 0x2006:    /* SIX-PER-EM SPACE */
       
  1595       case 0x2007:    /* FIGURE SPACE */
       
  1596       case 0x2008:    /* PUNCTUATION SPACE */
       
  1597       case 0x2009:    /* THIN SPACE */
       
  1598       case 0x200A:    /* HAIR SPACE */
       
  1599       case 0x202f:    /* NARROW NO-BREAK SPACE */
       
  1600       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
       
  1601       case 0x3000:    /* IDEOGRAPHIC SPACE */
       
  1602       RRETURN(MATCH_NOMATCH);
       
  1603       }
       
  1604     ecode++;
       
  1605     break;
       
  1606 
       
  1607     case OP_HSPACE:
       
  1608     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1609     GETCHARINCTEST(c, eptr);
       
  1610     switch(c)
       
  1611       {
       
  1612       default: RRETURN(MATCH_NOMATCH);
       
  1613       case 0x09:      /* HT */
       
  1614       case 0x20:      /* SPACE */
       
  1615       case 0xa0:      /* NBSP */
       
  1616       case 0x1680:    /* OGHAM SPACE MARK */
       
  1617       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
       
  1618       case 0x2000:    /* EN QUAD */
       
  1619       case 0x2001:    /* EM QUAD */
       
  1620       case 0x2002:    /* EN SPACE */
       
  1621       case 0x2003:    /* EM SPACE */
       
  1622       case 0x2004:    /* THREE-PER-EM SPACE */
       
  1623       case 0x2005:    /* FOUR-PER-EM SPACE */
       
  1624       case 0x2006:    /* SIX-PER-EM SPACE */
       
  1625       case 0x2007:    /* FIGURE SPACE */
       
  1626       case 0x2008:    /* PUNCTUATION SPACE */
       
  1627       case 0x2009:    /* THIN SPACE */
       
  1628       case 0x200A:    /* HAIR SPACE */
       
  1629       case 0x202f:    /* NARROW NO-BREAK SPACE */
       
  1630       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
       
  1631       case 0x3000:    /* IDEOGRAPHIC SPACE */
       
  1632       break;
       
  1633       }
       
  1634     ecode++;
       
  1635     break;
       
  1636 
       
  1637     case OP_NOT_VSPACE:
       
  1638     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1639     GETCHARINCTEST(c, eptr);
       
  1640     switch(c)
       
  1641       {
       
  1642       default: break;
       
  1643       case 0x0a:      /* LF */
       
  1644       case 0x0b:      /* VT */
       
  1645       case 0x0c:      /* FF */
       
  1646       case 0x0d:      /* CR */
       
  1647       case 0x85:      /* NEL */
       
  1648       case 0x2028:    /* LINE SEPARATOR */
       
  1649       case 0x2029:    /* PARAGRAPH SEPARATOR */
       
  1650       RRETURN(MATCH_NOMATCH);
       
  1651       }
       
  1652     ecode++;
       
  1653     break;
       
  1654 
       
  1655     case OP_VSPACE:
       
  1656     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1657     GETCHARINCTEST(c, eptr);
       
  1658     switch(c)
       
  1659       {
       
  1660       default: RRETURN(MATCH_NOMATCH);
       
  1661       case 0x0a:      /* LF */
       
  1662       case 0x0b:      /* VT */
       
  1663       case 0x0c:      /* FF */
       
  1664       case 0x0d:      /* CR */
       
  1665       case 0x85:      /* NEL */
       
  1666       case 0x2028:    /* LINE SEPARATOR */
       
  1667       case 0x2029:    /* PARAGRAPH SEPARATOR */
       
  1668       break;
       
  1669       }
       
  1670     ecode++;
       
  1671     break;
       
  1672 
       
  1673 #ifdef SUPPORT_UCP
       
  1674     /* Check the next character by Unicode property. We will get here only
       
  1675     if the support is in the binary; otherwise a compile-time error occurs. */
       
  1676 
       
  1677     case OP_PROP:
       
  1678     case OP_NOTPROP:
       
  1679     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1680     GETCHARINCTEST(c, eptr);
       
  1681       {
       
  1682       const ucd_record * prop = GET_UCD(c);
       
  1683 
       
  1684       switch(ecode[1])
       
  1685         {
       
  1686         case PT_ANY:
       
  1687         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
       
  1688         break;
       
  1689 
       
  1690         case PT_LAMP:
       
  1691         if ((prop->chartype == ucp_Lu ||
       
  1692              prop->chartype == ucp_Ll ||
       
  1693              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
       
  1694           RRETURN(MATCH_NOMATCH);
       
  1695          break;
       
  1696 
       
  1697         case PT_GC:
       
  1698         if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
       
  1699           RRETURN(MATCH_NOMATCH);
       
  1700         break;
       
  1701 
       
  1702         case PT_PC:
       
  1703         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
       
  1704           RRETURN(MATCH_NOMATCH);
       
  1705         break;
       
  1706 
       
  1707         case PT_SC:
       
  1708         if ((ecode[2] != prop->script) == (op == OP_PROP))
       
  1709           RRETURN(MATCH_NOMATCH);
       
  1710         break;
       
  1711 
       
  1712         default:
       
  1713         RRETURN(PCRE_ERROR_INTERNAL);
       
  1714         }
       
  1715 
       
  1716       ecode += 3;
       
  1717       }
       
  1718     break;
       
  1719 
       
  1720     /* Match an extended Unicode sequence. We will get here only if the support
       
  1721     is in the binary; otherwise a compile-time error occurs. */
       
  1722 
       
  1723     case OP_EXTUNI:
       
  1724     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1725     GETCHARINCTEST(c, eptr);
       
  1726       {
       
  1727       int category = UCD_CATEGORY(c);
       
  1728       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
       
  1729       while (eptr < md->end_subject)
       
  1730         {
       
  1731         int len = 1;
       
  1732         if (!utf8) c = *eptr; else
       
  1733           {
       
  1734           GETCHARLEN(c, eptr, len);
       
  1735           }
       
  1736         category = UCD_CATEGORY(c);
       
  1737         if (category != ucp_M) break;
       
  1738         eptr += len;
       
  1739         }
       
  1740       }
       
  1741     ecode++;
       
  1742     break;
       
  1743 #endif
       
  1744 
       
  1745 
       
  1746     /* Match a back reference, possibly repeatedly. Look past the end of the
       
  1747     item to see if there is repeat information following. The code is similar
       
  1748     to that for character classes, but repeated for efficiency. Then obey
       
  1749     similar code to character type repeats - written out again for speed.
       
  1750     However, if the referenced string is the empty string, always treat
       
  1751     it as matched, any number of times (otherwise there could be infinite
       
  1752     loops). */
       
  1753 
       
  1754     case OP_REF:
       
  1755       {
       
  1756       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
       
  1757       ecode += 3;
       
  1758 
       
  1759       /* If the reference is unset, there are two possibilities:
       
  1760 
       
  1761       (a) In the default, Perl-compatible state, set the length to be longer
       
  1762       than the amount of subject left; this ensures that every attempt at a
       
  1763       match fails. We can't just fail here, because of the possibility of
       
  1764       quantifiers with zero minima.
       
  1765 
       
  1766       (b) If the JavaScript compatibility flag is set, set the length to zero
       
  1767       so that the back reference matches an empty string.
       
  1768 
       
  1769       Otherwise, set the length to the length of what was matched by the
       
  1770       referenced subpattern. */
       
  1771 
       
  1772       if (offset >= offset_top || md->offset_vector[offset] < 0)
       
  1773         length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
       
  1774       else
       
  1775         length = md->offset_vector[offset+1] - md->offset_vector[offset];
       
  1776 
       
  1777       /* Set up for repetition, or handle the non-repeated case */
       
  1778 
       
  1779       switch (*ecode)
       
  1780         {
       
  1781         case OP_CRSTAR:
       
  1782         case OP_CRMINSTAR:
       
  1783         case OP_CRPLUS:
       
  1784         case OP_CRMINPLUS:
       
  1785         case OP_CRQUERY:
       
  1786         case OP_CRMINQUERY:
       
  1787         c = *ecode++ - OP_CRSTAR;
       
  1788         minimize = (c & 1) != 0;
       
  1789         min = rep_min[c];                 /* Pick up values from tables; */
       
  1790         max = rep_max[c];                 /* zero for max => infinity */
       
  1791         if (max == 0) max = INT_MAX;
       
  1792         break;
       
  1793 
       
  1794         case OP_CRRANGE:
       
  1795         case OP_CRMINRANGE:
       
  1796         minimize = (*ecode == OP_CRMINRANGE);
       
  1797         min = GET2(ecode, 1);
       
  1798         max = GET2(ecode, 3);
       
  1799         if (max == 0) max = INT_MAX;
       
  1800         ecode += 5;
       
  1801         break;
       
  1802 
       
  1803         default:               /* No repeat follows */
       
  1804         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
       
  1805         eptr += length;
       
  1806         continue;              /* With the main loop */
       
  1807         }
       
  1808 
       
  1809       /* If the length of the reference is zero, just continue with the
       
  1810       main loop. */
       
  1811 
       
  1812       if (length == 0) continue;
       
  1813 
       
  1814       /* First, ensure the minimum number of matches are present. We get back
       
  1815       the length of the reference string explicitly rather than passing the
       
  1816       address of eptr, so that eptr can be a register variable. */
       
  1817 
       
  1818       for (i = 1; i <= min; i++)
       
  1819         {
       
  1820         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
       
  1821         eptr += length;
       
  1822         }
       
  1823 
       
  1824       /* If min = max, continue at the same level without recursion.
       
  1825       They are not both allowed to be zero. */
       
  1826 
       
  1827       if (min == max) continue;
       
  1828 
       
  1829       /* If minimizing, keep trying and advancing the pointer */
       
  1830 
       
  1831       if (minimize)
       
  1832         {
       
  1833         for (fi = min;; fi++)
       
  1834           {
       
  1835           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
       
  1836           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1837           if (fi >= max || !match_ref(offset, eptr, length, md, ims))
       
  1838             RRETURN(MATCH_NOMATCH);
       
  1839           eptr += length;
       
  1840           }
       
  1841         /* Control never gets here */
       
  1842         }
       
  1843 
       
  1844       /* If maximizing, find the longest string and work backwards */
       
  1845 
       
  1846       else
       
  1847         {
       
  1848         pp = eptr;
       
  1849         for (i = min; i < max; i++)
       
  1850           {
       
  1851           if (!match_ref(offset, eptr, length, md, ims)) break;
       
  1852           eptr += length;
       
  1853           }
       
  1854         while (eptr >= pp)
       
  1855           {
       
  1856           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
       
  1857           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1858           eptr -= length;
       
  1859           }
       
  1860         RRETURN(MATCH_NOMATCH);
       
  1861         }
       
  1862       }
       
  1863     /* Control never gets here */
       
  1864 
       
  1865 
       
  1866 
       
  1867     /* Match a bit-mapped character class, possibly repeatedly. This op code is
       
  1868     used when all the characters in the class have values in the range 0-255,
       
  1869     and either the matching is caseful, or the characters are in the range
       
  1870     0-127 when UTF-8 processing is enabled. The only difference between
       
  1871     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
       
  1872     encountered.
       
  1873 
       
  1874     First, look past the end of the item to see if there is repeat information
       
  1875     following. Then obey similar code to character type repeats - written out
       
  1876     again for speed. */
       
  1877 
       
  1878     case OP_NCLASS:
       
  1879     case OP_CLASS:
       
  1880       {
       
  1881       data = ecode + 1;                /* Save for matching */
       
  1882       ecode += 33;                     /* Advance past the item */
       
  1883 
       
  1884       switch (*ecode)
       
  1885         {
       
  1886         case OP_CRSTAR:
       
  1887         case OP_CRMINSTAR:
       
  1888         case OP_CRPLUS:
       
  1889         case OP_CRMINPLUS:
       
  1890         case OP_CRQUERY:
       
  1891         case OP_CRMINQUERY:
       
  1892         c = *ecode++ - OP_CRSTAR;
       
  1893         minimize = (c & 1) != 0;
       
  1894         min = rep_min[c];                 /* Pick up values from tables; */
       
  1895         max = rep_max[c];                 /* zero for max => infinity */
       
  1896         if (max == 0) max = INT_MAX;
       
  1897         break;
       
  1898 
       
  1899         case OP_CRRANGE:
       
  1900         case OP_CRMINRANGE:
       
  1901         minimize = (*ecode == OP_CRMINRANGE);
       
  1902         min = GET2(ecode, 1);
       
  1903         max = GET2(ecode, 3);
       
  1904         if (max == 0) max = INT_MAX;
       
  1905         ecode += 5;
       
  1906         break;
       
  1907 
       
  1908         default:               /* No repeat follows */
       
  1909         min = max = 1;
       
  1910         break;
       
  1911         }
       
  1912 
       
  1913       /* First, ensure the minimum number of matches are present. */
       
  1914 
       
  1915 #ifdef SUPPORT_UTF8
       
  1916       /* UTF-8 mode */
       
  1917       if (utf8)
       
  1918         {
       
  1919         for (i = 1; i <= min; i++)
       
  1920           {
       
  1921           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1922           GETCHARINC(c, eptr);
       
  1923           if (c > 255)
       
  1924             {
       
  1925             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
       
  1926             }
       
  1927           else
       
  1928             {
       
  1929             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
       
  1930             }
       
  1931           }
       
  1932         }
       
  1933       else
       
  1934 #endif
       
  1935       /* Not UTF-8 mode */
       
  1936         {
       
  1937         for (i = 1; i <= min; i++)
       
  1938           {
       
  1939           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1940           c = *eptr++;
       
  1941           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
       
  1942           }
       
  1943         }
       
  1944 
       
  1945       /* If max == min we can continue with the main loop without the
       
  1946       need to recurse. */
       
  1947 
       
  1948       if (min == max) continue;
       
  1949 
       
  1950       /* If minimizing, keep testing the rest of the expression and advancing
       
  1951       the pointer while it matches the class. */
       
  1952 
       
  1953       if (minimize)
       
  1954         {
       
  1955 #ifdef SUPPORT_UTF8
       
  1956         /* UTF-8 mode */
       
  1957         if (utf8)
       
  1958           {
       
  1959           for (fi = min;; fi++)
       
  1960             {
       
  1961             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
       
  1962             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1963             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1964             GETCHARINC(c, eptr);
       
  1965             if (c > 255)
       
  1966               {
       
  1967               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
       
  1968               }
       
  1969             else
       
  1970               {
       
  1971               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
       
  1972               }
       
  1973             }
       
  1974           }
       
  1975         else
       
  1976 #endif
       
  1977         /* Not UTF-8 mode */
       
  1978           {
       
  1979           for (fi = min;; fi++)
       
  1980             {
       
  1981             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
       
  1982             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1983             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1984             c = *eptr++;
       
  1985             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
       
  1986             }
       
  1987           }
       
  1988         /* Control never gets here */
       
  1989         }
       
  1990 
       
  1991       /* If maximizing, find the longest possible run, then work backwards. */
       
  1992 
       
  1993       else
       
  1994         {
       
  1995         pp = eptr;
       
  1996 
       
  1997 #ifdef SUPPORT_UTF8
       
  1998         /* UTF-8 mode */
       
  1999         if (utf8)
       
  2000           {
       
  2001           for (i = min; i < max; i++)
       
  2002             {
       
  2003             int len = 1;
       
  2004             if (eptr >= md->end_subject) break;
       
  2005             GETCHARLEN(c, eptr, len);
       
  2006             if (c > 255)
       
  2007               {
       
  2008               if (op == OP_CLASS) break;
       
  2009               }
       
  2010             else
       
  2011               {
       
  2012               if ((data[c/8] & (1 << (c&7))) == 0) break;
       
  2013               }
       
  2014             eptr += len;
       
  2015             }
       
  2016           for (;;)
       
  2017             {
       
  2018             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
       
  2019             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2020             if (eptr-- == pp) break;        /* Stop if tried at original pos */
       
  2021             BACKCHAR(eptr);
       
  2022             }
       
  2023           }
       
  2024         else
       
  2025 #endif
       
  2026           /* Not UTF-8 mode */
       
  2027           {
       
  2028           for (i = min; i < max; i++)
       
  2029             {
       
  2030             if (eptr >= md->end_subject) break;
       
  2031             c = *eptr;
       
  2032             if ((data[c/8] & (1 << (c&7))) == 0) break;
       
  2033             eptr++;
       
  2034             }
       
  2035           while (eptr >= pp)
       
  2036             {
       
  2037             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
       
  2038             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2039             eptr--;
       
  2040             }
       
  2041           }
       
  2042 
       
  2043         RRETURN(MATCH_NOMATCH);
       
  2044         }
       
  2045       }
       
  2046     /* Control never gets here */
       
  2047 
       
  2048 
       
  2049     /* Match an extended character class. This opcode is encountered only
       
  2050     in UTF-8 mode, because that's the only time it is compiled. */
       
  2051 
       
  2052 #ifdef SUPPORT_UTF8
       
  2053     case OP_XCLASS:
       
  2054       {
       
  2055       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
       
  2056       ecode += GET(ecode, 1);                      /* Advance past the item */
       
  2057 
       
  2058       switch (*ecode)
       
  2059         {
       
  2060         case OP_CRSTAR:
       
  2061         case OP_CRMINSTAR:
       
  2062         case OP_CRPLUS:
       
  2063         case OP_CRMINPLUS:
       
  2064         case OP_CRQUERY:
       
  2065         case OP_CRMINQUERY:
       
  2066         c = *ecode++ - OP_CRSTAR;
       
  2067         minimize = (c & 1) != 0;
       
  2068         min = rep_min[c];                 /* Pick up values from tables; */
       
  2069         max = rep_max[c];                 /* zero for max => infinity */
       
  2070         if (max == 0) max = INT_MAX;
       
  2071         break;
       
  2072 
       
  2073         case OP_CRRANGE:
       
  2074         case OP_CRMINRANGE:
       
  2075         minimize = (*ecode == OP_CRMINRANGE);
       
  2076         min = GET2(ecode, 1);
       
  2077         max = GET2(ecode, 3);
       
  2078         if (max == 0) max = INT_MAX;
       
  2079         ecode += 5;
       
  2080         break;
       
  2081 
       
  2082         default:               /* No repeat follows */
       
  2083         min = max = 1;
       
  2084         break;
       
  2085         }
       
  2086 
       
  2087       /* First, ensure the minimum number of matches are present. */
       
  2088 
       
  2089       for (i = 1; i <= min; i++)
       
  2090         {
       
  2091         if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2092         GETCHARINC(c, eptr);
       
  2093         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
       
  2094         }
       
  2095 
       
  2096       /* If max == min we can continue with the main loop without the
       
  2097       need to recurse. */
       
  2098 
       
  2099       if (min == max) continue;
       
  2100 
       
  2101       /* If minimizing, keep testing the rest of the expression and advancing
       
  2102       the pointer while it matches the class. */
       
  2103 
       
  2104       if (minimize)
       
  2105         {
       
  2106         for (fi = min;; fi++)
       
  2107           {
       
  2108           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
       
  2109           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2110           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2111           GETCHARINC(c, eptr);
       
  2112           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
       
  2113           }
       
  2114         /* Control never gets here */
       
  2115         }
       
  2116 
       
  2117       /* If maximizing, find the longest possible run, then work backwards. */
       
  2118 
       
  2119       else
       
  2120         {
       
  2121         pp = eptr;
       
  2122         for (i = min; i < max; i++)
       
  2123           {
       
  2124           int len = 1;
       
  2125           if (eptr >= md->end_subject) break;
       
  2126           GETCHARLEN(c, eptr, len);
       
  2127           if (!_pcre_xclass(c, data)) break;
       
  2128           eptr += len;
       
  2129           }
       
  2130         for(;;)
       
  2131           {
       
  2132           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
       
  2133           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2134           if (eptr-- == pp) break;        /* Stop if tried at original pos */
       
  2135           if (utf8) BACKCHAR(eptr);
       
  2136           }
       
  2137         RRETURN(MATCH_NOMATCH);
       
  2138         }
       
  2139 
       
  2140       /* Control never gets here */
       
  2141       }
       
  2142 #endif    /* End of XCLASS */
       
  2143 
       
  2144     /* Match a single character, casefully */
       
  2145 
       
  2146     case OP_CHAR:
       
  2147 #ifdef SUPPORT_UTF8
       
  2148     if (utf8)
       
  2149       {
       
  2150       length = 1;
       
  2151       ecode++;
       
  2152       GETCHARLEN(fc, ecode, length);
       
  2153       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
       
  2154       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
       
  2155       }
       
  2156     else
       
  2157 #endif
       
  2158 
       
  2159     /* Non-UTF-8 mode */
       
  2160       {
       
  2161       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
       
  2162       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
       
  2163       ecode += 2;
       
  2164       }
       
  2165     break;
       
  2166 
       
  2167     /* Match a single character, caselessly */
       
  2168 
       
  2169     case OP_CHARNC:
       
  2170 #ifdef SUPPORT_UTF8
       
  2171     if (utf8)
       
  2172       {
       
  2173       length = 1;
       
  2174       ecode++;
       
  2175       GETCHARLEN(fc, ecode, length);
       
  2176 
       
  2177       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
       
  2178 
       
  2179       /* If the pattern character's value is < 128, we have only one byte, and
       
  2180       can use the fast lookup table. */
       
  2181 
       
  2182       if (fc < 128)
       
  2183         {
       
  2184         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
       
  2185         }
       
  2186 
       
  2187       /* Otherwise we must pick up the subject character */
       
  2188 
       
  2189       else
       
  2190         {
       
  2191         unsigned int dc;
       
  2192         GETCHARINC(dc, eptr);
       
  2193         ecode += length;
       
  2194 
       
  2195         /* If we have Unicode property support, we can use it to test the other
       
  2196         case of the character, if there is one. */
       
  2197 
       
  2198         if (fc != dc)
       
  2199           {
       
  2200 #ifdef SUPPORT_UCP
       
  2201           if (dc != UCD_OTHERCASE(fc))
       
  2202 #endif
       
  2203             RRETURN(MATCH_NOMATCH);
       
  2204           }
       
  2205         }
       
  2206       }
       
  2207     else
       
  2208 #endif   /* SUPPORT_UTF8 */
       
  2209 
       
  2210     /* Non-UTF-8 mode */
       
  2211       {
       
  2212       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
       
  2213       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
       
  2214       ecode += 2;
       
  2215       }
       
  2216     break;
       
  2217 
       
  2218     /* Match a single character repeatedly. */
       
  2219 
       
  2220     case OP_EXACT:
       
  2221     min = max = GET2(ecode, 1);
       
  2222     ecode += 3;
       
  2223     goto REPEATCHAR;
       
  2224 
       
  2225     case OP_POSUPTO:
       
  2226     possessive = TRUE;
       
  2227     /* Fall through */
       
  2228 
       
  2229     case OP_UPTO:
       
  2230     case OP_MINUPTO:
       
  2231     min = 0;
       
  2232     max = GET2(ecode, 1);
       
  2233     minimize = *ecode == OP_MINUPTO;
       
  2234     ecode += 3;
       
  2235     goto REPEATCHAR;
       
  2236 
       
  2237     case OP_POSSTAR:
       
  2238     possessive = TRUE;
       
  2239     min = 0;
       
  2240     max = INT_MAX;
       
  2241     ecode++;
       
  2242     goto REPEATCHAR;
       
  2243 
       
  2244     case OP_POSPLUS:
       
  2245     possessive = TRUE;
       
  2246     min = 1;
       
  2247     max = INT_MAX;
       
  2248     ecode++;
       
  2249     goto REPEATCHAR;
       
  2250 
       
  2251     case OP_POSQUERY:
       
  2252     possessive = TRUE;
       
  2253     min = 0;
       
  2254     max = 1;
       
  2255     ecode++;
       
  2256     goto REPEATCHAR;
       
  2257 
       
  2258     case OP_STAR:
       
  2259     case OP_MINSTAR:
       
  2260     case OP_PLUS:
       
  2261     case OP_MINPLUS:
       
  2262     case OP_QUERY:
       
  2263     case OP_MINQUERY:
       
  2264     c = *ecode++ - OP_STAR;
       
  2265     minimize = (c & 1) != 0;
       
  2266     min = rep_min[c];                 /* Pick up values from tables; */
       
  2267     max = rep_max[c];                 /* zero for max => infinity */
       
  2268     if (max == 0) max = INT_MAX;
       
  2269 
       
  2270     /* Common code for all repeated single-character matches. We can give
       
  2271     up quickly if there are fewer than the minimum number of characters left in
       
  2272     the subject. */
       
  2273 
       
  2274     REPEATCHAR:
       
  2275 #ifdef SUPPORT_UTF8
       
  2276     if (utf8)
       
  2277       {
       
  2278       length = 1;
       
  2279       charptr = ecode;
       
  2280       GETCHARLEN(fc, ecode, length);
       
  2281       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
       
  2282       ecode += length;
       
  2283 
       
  2284       /* Handle multibyte character matching specially here. There is
       
  2285       support for caseless matching if UCP support is present. */
       
  2286 
       
  2287       if (length > 1)
       
  2288         {
       
  2289 #ifdef SUPPORT_UCP
       
  2290         unsigned int othercase;
       
  2291         if ((ims & PCRE_CASELESS) != 0 &&
       
  2292             (othercase = UCD_OTHERCASE(fc)) != fc)
       
  2293           oclength = _pcre_ord2utf8(othercase, occhars);
       
  2294         else oclength = 0;
       
  2295 #endif  /* SUPPORT_UCP */
       
  2296 
       
  2297         for (i = 1; i <= min; i++)
       
  2298           {
       
  2299           if (memcmp(eptr, charptr, length) == 0) eptr += length;
       
  2300 #ifdef SUPPORT_UCP
       
  2301           /* Need braces because of following else */
       
  2302           else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
       
  2303           else
       
  2304             {
       
  2305             if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
       
  2306             eptr += oclength;
       
  2307             }
       
  2308 #else   /* without SUPPORT_UCP */
       
  2309           else { RRETURN(MATCH_NOMATCH); }
       
  2310 #endif  /* SUPPORT_UCP */
       
  2311           }
       
  2312 
       
  2313         if (min == max) continue;
       
  2314 
       
  2315         if (minimize)
       
  2316           {
       
  2317           for (fi = min;; fi++)
       
  2318             {
       
  2319             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
       
  2320             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2321             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2322             if (memcmp(eptr, charptr, length) == 0) eptr += length;
       
  2323 #ifdef SUPPORT_UCP
       
  2324             /* Need braces because of following else */
       
  2325             else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
       
  2326             else
       
  2327               {
       
  2328               if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
       
  2329               eptr += oclength;
       
  2330               }
       
  2331 #else   /* without SUPPORT_UCP */
       
  2332             else { RRETURN (MATCH_NOMATCH); }
       
  2333 #endif  /* SUPPORT_UCP */
       
  2334             }
       
  2335           /* Control never gets here */
       
  2336           }
       
  2337 
       
  2338         else  /* Maximize */
       
  2339           {
       
  2340           pp = eptr;
       
  2341           for (i = min; i < max; i++)
       
  2342             {
       
  2343             if (eptr > md->end_subject - length) break;
       
  2344             if (memcmp(eptr, charptr, length) == 0) eptr += length;
       
  2345 #ifdef SUPPORT_UCP
       
  2346             else if (oclength == 0) break;
       
  2347             else
       
  2348               {
       
  2349               if (memcmp(eptr, occhars, oclength) != 0) break;
       
  2350               eptr += oclength;
       
  2351               }
       
  2352 #else   /* without SUPPORT_UCP */
       
  2353             else break;
       
  2354 #endif  /* SUPPORT_UCP */
       
  2355             }
       
  2356 
       
  2357           if (possessive) continue;
       
  2358           for(;;)
       
  2359            {
       
  2360            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
       
  2361            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2362            if (eptr == pp) RRETURN(MATCH_NOMATCH);
       
  2363 #ifdef SUPPORT_UCP
       
  2364            eptr--;
       
  2365            BACKCHAR(eptr);
       
  2366 #else   /* without SUPPORT_UCP */
       
  2367            eptr -= length;
       
  2368 #endif  /* SUPPORT_UCP */
       
  2369            }
       
  2370           }
       
  2371         /* Control never gets here */
       
  2372         }
       
  2373 
       
  2374       /* If the length of a UTF-8 character is 1, we fall through here, and
       
  2375       obey the code as for non-UTF-8 characters below, though in this case the
       
  2376       value of fc will always be < 128. */
       
  2377       }
       
  2378     else
       
  2379 #endif  /* SUPPORT_UTF8 */
       
  2380 
       
  2381     /* When not in UTF-8 mode, load a single-byte character. */
       
  2382       {
       
  2383       if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
       
  2384       fc = *ecode++;
       
  2385       }
       
  2386 
       
  2387     /* The value of fc at this point is always less than 256, though we may or
       
  2388     may not be in UTF-8 mode. The code is duplicated for the caseless and
       
  2389     caseful cases, for speed, since matching characters is likely to be quite
       
  2390     common. First, ensure the minimum number of matches are present. If min =
       
  2391     max, continue at the same level without recursing. Otherwise, if
       
  2392     minimizing, keep trying the rest of the expression and advancing one
       
  2393     matching character if failing, up to the maximum. Alternatively, if
       
  2394     maximizing, find the maximum number of characters and work backwards. */
       
  2395 
       
  2396     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
       
  2397       max, eptr));
       
  2398 
       
  2399     if ((ims & PCRE_CASELESS) != 0)
       
  2400       {
       
  2401       fc = md->lcc[fc];
       
  2402       for (i = 1; i <= min; i++)
       
  2403         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
       
  2404       if (min == max) continue;
       
  2405       if (minimize)
       
  2406         {
       
  2407         for (fi = min;; fi++)
       
  2408           {
       
  2409           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
       
  2410           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2411           if (fi >= max || eptr >= md->end_subject ||
       
  2412               fc != md->lcc[*eptr++])
       
  2413             RRETURN(MATCH_NOMATCH);
       
  2414           }
       
  2415         /* Control never gets here */
       
  2416         }
       
  2417       else  /* Maximize */
       
  2418         {
       
  2419         pp = eptr;
       
  2420         for (i = min; i < max; i++)
       
  2421           {
       
  2422           if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
       
  2423           eptr++;
       
  2424           }
       
  2425         if (possessive) continue;
       
  2426         while (eptr >= pp)
       
  2427           {
       
  2428           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
       
  2429           eptr--;
       
  2430           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2431           }
       
  2432         RRETURN(MATCH_NOMATCH);
       
  2433         }
       
  2434       /* Control never gets here */
       
  2435       }
       
  2436 
       
  2437     /* Caseful comparisons (includes all multi-byte characters) */
       
  2438 
       
  2439     else
       
  2440       {
       
  2441       for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
       
  2442       if (min == max) continue;
       
  2443       if (minimize)
       
  2444         {
       
  2445         for (fi = min;; fi++)
       
  2446           {
       
  2447           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
       
  2448           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2449           if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
       
  2450             RRETURN(MATCH_NOMATCH);
       
  2451           }
       
  2452         /* Control never gets here */
       
  2453         }
       
  2454       else  /* Maximize */
       
  2455         {
       
  2456         pp = eptr;
       
  2457         for (i = min; i < max; i++)
       
  2458           {
       
  2459           if (eptr >= md->end_subject || fc != *eptr) break;
       
  2460           eptr++;
       
  2461           }
       
  2462         if (possessive) continue;
       
  2463         while (eptr >= pp)
       
  2464           {
       
  2465           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
       
  2466           eptr--;
       
  2467           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2468           }
       
  2469         RRETURN(MATCH_NOMATCH);
       
  2470         }
       
  2471       }
       
  2472     /* Control never gets here */
       
  2473 
       
  2474     /* Match a negated single one-byte character. The character we are
       
  2475     checking can be multibyte. */
       
  2476 
       
  2477     case OP_NOT:
       
  2478     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2479     ecode++;
       
  2480     GETCHARINCTEST(c, eptr);
       
  2481     if ((ims & PCRE_CASELESS) != 0)
       
  2482       {
       
  2483 #ifdef SUPPORT_UTF8
       
  2484       if (c < 256)
       
  2485 #endif
       
  2486       c = md->lcc[c];
       
  2487       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
       
  2488       }
       
  2489     else
       
  2490       {
       
  2491       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
       
  2492       }
       
  2493     break;
       
  2494 
       
  2495     /* Match a negated single one-byte character repeatedly. This is almost a
       
  2496     repeat of the code for a repeated single character, but I haven't found a
       
  2497     nice way of commoning these up that doesn't require a test of the
       
  2498     positive/negative option for each character match. Maybe that wouldn't add
       
  2499     very much to the time taken, but character matching *is* what this is all
       
  2500     about... */
       
  2501 
       
  2502     case OP_NOTEXACT:
       
  2503     min = max = GET2(ecode, 1);
       
  2504     ecode += 3;
       
  2505     goto REPEATNOTCHAR;
       
  2506 
       
  2507     case OP_NOTUPTO:
       
  2508     case OP_NOTMINUPTO:
       
  2509     min = 0;
       
  2510     max = GET2(ecode, 1);
       
  2511     minimize = *ecode == OP_NOTMINUPTO;
       
  2512     ecode += 3;
       
  2513     goto REPEATNOTCHAR;
       
  2514 
       
  2515     case OP_NOTPOSSTAR:
       
  2516     possessive = TRUE;
       
  2517     min = 0;
       
  2518     max = INT_MAX;
       
  2519     ecode++;
       
  2520     goto REPEATNOTCHAR;
       
  2521 
       
  2522     case OP_NOTPOSPLUS:
       
  2523     possessive = TRUE;
       
  2524     min = 1;
       
  2525     max = INT_MAX;
       
  2526     ecode++;
       
  2527     goto REPEATNOTCHAR;
       
  2528 
       
  2529     case OP_NOTPOSQUERY:
       
  2530     possessive = TRUE;
       
  2531     min = 0;
       
  2532     max = 1;
       
  2533     ecode++;
       
  2534     goto REPEATNOTCHAR;
       
  2535 
       
  2536     case OP_NOTPOSUPTO:
       
  2537     possessive = TRUE;
       
  2538     min = 0;
       
  2539     max = GET2(ecode, 1);
       
  2540     ecode += 3;
       
  2541     goto REPEATNOTCHAR;
       
  2542 
       
  2543     case OP_NOTSTAR:
       
  2544     case OP_NOTMINSTAR:
       
  2545     case OP_NOTPLUS:
       
  2546     case OP_NOTMINPLUS:
       
  2547     case OP_NOTQUERY:
       
  2548     case OP_NOTMINQUERY:
       
  2549     c = *ecode++ - OP_NOTSTAR;
       
  2550     minimize = (c & 1) != 0;
       
  2551     min = rep_min[c];                 /* Pick up values from tables; */
       
  2552     max = rep_max[c];                 /* zero for max => infinity */
       
  2553     if (max == 0) max = INT_MAX;
       
  2554 
       
  2555     /* Common code for all repeated single-byte matches. We can give up quickly
       
  2556     if there are fewer than the minimum number of bytes left in the
       
  2557     subject. */
       
  2558 
       
  2559     REPEATNOTCHAR:
       
  2560     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
       
  2561     fc = *ecode++;
       
  2562 
       
  2563     /* The code is duplicated for the caseless and caseful cases, for speed,
       
  2564     since matching characters is likely to be quite common. First, ensure the
       
  2565     minimum number of matches are present. If min = max, continue at the same
       
  2566     level without recursing. Otherwise, if minimizing, keep trying the rest of
       
  2567     the expression and advancing one matching character if failing, up to the
       
  2568     maximum. Alternatively, if maximizing, find the maximum number of
       
  2569     characters and work backwards. */
       
  2570 
       
  2571     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
       
  2572       max, eptr));
       
  2573 
       
  2574     if ((ims & PCRE_CASELESS) != 0)
       
  2575       {
       
  2576       fc = md->lcc[fc];
       
  2577 
       
  2578 #ifdef SUPPORT_UTF8
       
  2579       /* UTF-8 mode */
       
  2580       if (utf8)
       
  2581         {
       
  2582         register unsigned int d;
       
  2583         for (i = 1; i <= min; i++)
       
  2584           {
       
  2585           GETCHARINC(d, eptr);
       
  2586           if (d < 256) d = md->lcc[d];
       
  2587           if (fc == d) RRETURN(MATCH_NOMATCH);
       
  2588           }
       
  2589         }
       
  2590       else
       
  2591 #endif
       
  2592 
       
  2593       /* Not UTF-8 mode */
       
  2594         {
       
  2595         for (i = 1; i <= min; i++)
       
  2596           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
       
  2597         }
       
  2598 
       
  2599       if (min == max) continue;
       
  2600 
       
  2601       if (minimize)
       
  2602         {
       
  2603 #ifdef SUPPORT_UTF8
       
  2604         /* UTF-8 mode */
       
  2605         if (utf8)
       
  2606           {
       
  2607           register unsigned int d;
       
  2608           for (fi = min;; fi++)
       
  2609             {
       
  2610             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
       
  2611             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2612             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2613             GETCHARINC(d, eptr);
       
  2614             if (d < 256) d = md->lcc[d];
       
  2615             if (fc == d) RRETURN(MATCH_NOMATCH);
       
  2616 
       
  2617             }
       
  2618           }
       
  2619         else
       
  2620 #endif
       
  2621         /* Not UTF-8 mode */
       
  2622           {
       
  2623           for (fi = min;; fi++)
       
  2624             {
       
  2625             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
       
  2626             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2627             if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
       
  2628               RRETURN(MATCH_NOMATCH);
       
  2629             }
       
  2630           }
       
  2631         /* Control never gets here */
       
  2632         }
       
  2633 
       
  2634       /* Maximize case */
       
  2635 
       
  2636       else
       
  2637         {
       
  2638         pp = eptr;
       
  2639 
       
  2640 #ifdef SUPPORT_UTF8
       
  2641         /* UTF-8 mode */
       
  2642         if (utf8)
       
  2643           {
       
  2644           register unsigned int d;
       
  2645           for (i = min; i < max; i++)
       
  2646             {
       
  2647             int len = 1;
       
  2648             if (eptr >= md->end_subject) break;
       
  2649             GETCHARLEN(d, eptr, len);
       
  2650             if (d < 256) d = md->lcc[d];
       
  2651             if (fc == d) break;
       
  2652             eptr += len;
       
  2653             }
       
  2654         if (possessive) continue;
       
  2655         for(;;)
       
  2656             {
       
  2657             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
       
  2658             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2659             if (eptr-- == pp) break;        /* Stop if tried at original pos */
       
  2660             BACKCHAR(eptr);
       
  2661             }
       
  2662           }
       
  2663         else
       
  2664 #endif
       
  2665         /* Not UTF-8 mode */
       
  2666           {
       
  2667           for (i = min; i < max; i++)
       
  2668             {
       
  2669             if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
       
  2670             eptr++;
       
  2671             }
       
  2672           if (possessive) continue;
       
  2673           while (eptr >= pp)
       
  2674             {
       
  2675             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
       
  2676             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2677             eptr--;
       
  2678             }
       
  2679           }
       
  2680 
       
  2681         RRETURN(MATCH_NOMATCH);
       
  2682         }
       
  2683       /* Control never gets here */
       
  2684       }
       
  2685 
       
  2686     /* Caseful comparisons */
       
  2687 
       
  2688     else
       
  2689       {
       
  2690 #ifdef SUPPORT_UTF8
       
  2691       /* UTF-8 mode */
       
  2692       if (utf8)
       
  2693         {
       
  2694         register unsigned int d;
       
  2695         for (i = 1; i <= min; i++)
       
  2696           {
       
  2697           GETCHARINC(d, eptr);
       
  2698           if (fc == d) RRETURN(MATCH_NOMATCH);
       
  2699           }
       
  2700         }
       
  2701       else
       
  2702 #endif
       
  2703       /* Not UTF-8 mode */
       
  2704         {
       
  2705         for (i = 1; i <= min; i++)
       
  2706           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
       
  2707         }
       
  2708 
       
  2709       if (min == max) continue;
       
  2710 
       
  2711       if (minimize)
       
  2712         {
       
  2713 #ifdef SUPPORT_UTF8
       
  2714         /* UTF-8 mode */
       
  2715         if (utf8)
       
  2716           {
       
  2717           register unsigned int d;
       
  2718           for (fi = min;; fi++)
       
  2719             {
       
  2720             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
       
  2721             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2722             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2723             GETCHARINC(d, eptr);
       
  2724             if (fc == d) RRETURN(MATCH_NOMATCH);
       
  2725             }
       
  2726           }
       
  2727         else
       
  2728 #endif
       
  2729         /* Not UTF-8 mode */
       
  2730           {
       
  2731           for (fi = min;; fi++)
       
  2732             {
       
  2733             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
       
  2734             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2735             if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
       
  2736               RRETURN(MATCH_NOMATCH);
       
  2737             }
       
  2738           }
       
  2739         /* Control never gets here */
       
  2740         }
       
  2741 
       
  2742       /* Maximize case */
       
  2743 
       
  2744       else
       
  2745         {
       
  2746         pp = eptr;
       
  2747 
       
  2748 #ifdef SUPPORT_UTF8
       
  2749         /* UTF-8 mode */
       
  2750         if (utf8)
       
  2751           {
       
  2752           register unsigned int d;
       
  2753           for (i = min; i < max; i++)
       
  2754             {
       
  2755             int len = 1;
       
  2756             if (eptr >= md->end_subject) break;
       
  2757             GETCHARLEN(d, eptr, len);
       
  2758             if (fc == d) break;
       
  2759             eptr += len;
       
  2760             }
       
  2761           if (possessive) continue;
       
  2762           for(;;)
       
  2763             {
       
  2764             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
       
  2765             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2766             if (eptr-- == pp) break;        /* Stop if tried at original pos */
       
  2767             BACKCHAR(eptr);
       
  2768             }
       
  2769           }
       
  2770         else
       
  2771 #endif
       
  2772         /* Not UTF-8 mode */
       
  2773           {
       
  2774           for (i = min; i < max; i++)
       
  2775             {
       
  2776             if (eptr >= md->end_subject || fc == *eptr) break;
       
  2777             eptr++;
       
  2778             }
       
  2779           if (possessive) continue;
       
  2780           while (eptr >= pp)
       
  2781             {
       
  2782             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
       
  2783             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2784             eptr--;
       
  2785             }
       
  2786           }
       
  2787 
       
  2788         RRETURN(MATCH_NOMATCH);
       
  2789         }
       
  2790       }
       
  2791     /* Control never gets here */
       
  2792 
       
  2793     /* Match a single character type repeatedly; several different opcodes
       
  2794     share code. This is very similar to the code for single characters, but we
       
  2795     repeat it in the interests of efficiency. */
       
  2796 
       
  2797     case OP_TYPEEXACT:
       
  2798     min = max = GET2(ecode, 1);
       
  2799     minimize = TRUE;
       
  2800     ecode += 3;
       
  2801     goto REPEATTYPE;
       
  2802 
       
  2803     case OP_TYPEUPTO:
       
  2804     case OP_TYPEMINUPTO:
       
  2805     min = 0;
       
  2806     max = GET2(ecode, 1);
       
  2807     minimize = *ecode == OP_TYPEMINUPTO;
       
  2808     ecode += 3;
       
  2809     goto REPEATTYPE;
       
  2810 
       
  2811     case OP_TYPEPOSSTAR:
       
  2812     possessive = TRUE;
       
  2813     min = 0;
       
  2814     max = INT_MAX;
       
  2815     ecode++;
       
  2816     goto REPEATTYPE;
       
  2817 
       
  2818     case OP_TYPEPOSPLUS:
       
  2819     possessive = TRUE;
       
  2820     min = 1;
       
  2821     max = INT_MAX;
       
  2822     ecode++;
       
  2823     goto REPEATTYPE;
       
  2824 
       
  2825     case OP_TYPEPOSQUERY:
       
  2826     possessive = TRUE;
       
  2827     min = 0;
       
  2828     max = 1;
       
  2829     ecode++;
       
  2830     goto REPEATTYPE;
       
  2831 
       
  2832     case OP_TYPEPOSUPTO:
       
  2833     possessive = TRUE;
       
  2834     min = 0;
       
  2835     max = GET2(ecode, 1);
       
  2836     ecode += 3;
       
  2837     goto REPEATTYPE;
       
  2838 
       
  2839     case OP_TYPESTAR:
       
  2840     case OP_TYPEMINSTAR:
       
  2841     case OP_TYPEPLUS:
       
  2842     case OP_TYPEMINPLUS:
       
  2843     case OP_TYPEQUERY:
       
  2844     case OP_TYPEMINQUERY:
       
  2845     c = *ecode++ - OP_TYPESTAR;
       
  2846     minimize = (c & 1) != 0;
       
  2847     min = rep_min[c];                 /* Pick up values from tables; */
       
  2848     max = rep_max[c];                 /* zero for max => infinity */
       
  2849     if (max == 0) max = INT_MAX;
       
  2850 
       
  2851     /* Common code for all repeated single character type matches. Note that
       
  2852     in UTF-8 mode, '.' matches a character of any length, but for the other
       
  2853     character types, the valid characters are all one-byte long. */
       
  2854 
       
  2855     REPEATTYPE:
       
  2856     ctype = *ecode++;      /* Code for the character type */
       
  2857 
       
  2858 #ifdef SUPPORT_UCP
       
  2859     if (ctype == OP_PROP || ctype == OP_NOTPROP)
       
  2860       {
       
  2861       prop_fail_result = ctype == OP_NOTPROP;
       
  2862       prop_type = *ecode++;
       
  2863       prop_value = *ecode++;
       
  2864       }
       
  2865     else prop_type = -1;
       
  2866 #endif
       
  2867 
       
  2868     /* First, ensure the minimum number of matches are present. Use inline
       
  2869     code for maximizing the speed, and do the type test once at the start
       
  2870     (i.e. keep it out of the loop). Also we can test that there are at least
       
  2871     the minimum number of bytes before we start. This isn't as effective in
       
  2872     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
       
  2873     is tidier. Also separate the UCP code, which can be the same for both UTF-8
       
  2874     and single-bytes. */
       
  2875 
       
  2876     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
       
  2877     if (min > 0)
       
  2878       {
       
  2879 #ifdef SUPPORT_UCP
       
  2880       if (prop_type >= 0)
       
  2881         {
       
  2882         switch(prop_type)
       
  2883           {
       
  2884           case PT_ANY:
       
  2885           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
       
  2886           for (i = 1; i <= min; i++)
       
  2887             {
       
  2888             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2889             GETCHARINCTEST(c, eptr);
       
  2890             }
       
  2891           break;
       
  2892 
       
  2893           case PT_LAMP:
       
  2894           for (i = 1; i <= min; i++)
       
  2895             {
       
  2896             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2897             GETCHARINCTEST(c, eptr);
       
  2898             prop_chartype = UCD_CHARTYPE(c);
       
  2899             if ((prop_chartype == ucp_Lu ||
       
  2900                  prop_chartype == ucp_Ll ||
       
  2901                  prop_chartype == ucp_Lt) == prop_fail_result)
       
  2902               RRETURN(MATCH_NOMATCH);
       
  2903             }
       
  2904           break;
       
  2905 
       
  2906           case PT_GC:
       
  2907           for (i = 1; i <= min; i++)
       
  2908             {
       
  2909             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2910             GETCHARINCTEST(c, eptr);
       
  2911             prop_category = UCD_CATEGORY(c);
       
  2912             if ((prop_category == prop_value) == prop_fail_result)
       
  2913               RRETURN(MATCH_NOMATCH);
       
  2914             }
       
  2915           break;
       
  2916 
       
  2917           case PT_PC:
       
  2918           for (i = 1; i <= min; i++)
       
  2919             {
       
  2920             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2921             GETCHARINCTEST(c, eptr);
       
  2922             prop_chartype = UCD_CHARTYPE(c);
       
  2923             if ((prop_chartype == prop_value) == prop_fail_result)
       
  2924               RRETURN(MATCH_NOMATCH);
       
  2925             }
       
  2926           break;
       
  2927 
       
  2928           case PT_SC:
       
  2929           for (i = 1; i <= min; i++)
       
  2930             {
       
  2931             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2932             GETCHARINCTEST(c, eptr);
       
  2933             prop_script = UCD_SCRIPT(c);
       
  2934             if ((prop_script == prop_value) == prop_fail_result)
       
  2935               RRETURN(MATCH_NOMATCH);
       
  2936             }
       
  2937           break;
       
  2938 
       
  2939           default:
       
  2940           RRETURN(PCRE_ERROR_INTERNAL);
       
  2941           }
       
  2942         }
       
  2943 
       
  2944       /* Match extended Unicode sequences. We will get here only if the
       
  2945       support is in the binary; otherwise a compile-time error occurs. */
       
  2946 
       
  2947       else if (ctype == OP_EXTUNI)
       
  2948         {
       
  2949         for (i = 1; i <= min; i++)
       
  2950           {
       
  2951           GETCHARINCTEST(c, eptr);
       
  2952           prop_category = UCD_CATEGORY(c);
       
  2953           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
       
  2954           while (eptr < md->end_subject)
       
  2955             {
       
  2956             int len = 1;
       
  2957             if (!utf8) c = *eptr; else
       
  2958               {
       
  2959               GETCHARLEN(c, eptr, len);
       
  2960               }
       
  2961             prop_category = UCD_CATEGORY(c);
       
  2962             if (prop_category != ucp_M) break;
       
  2963             eptr += len;
       
  2964             }
       
  2965           }
       
  2966         }
       
  2967 
       
  2968       else
       
  2969 #endif     /* SUPPORT_UCP */
       
  2970 
       
  2971 /* Handle all other cases when the coding is UTF-8 */
       
  2972 
       
  2973 #ifdef SUPPORT_UTF8
       
  2974       if (utf8) switch(ctype)
       
  2975         {
       
  2976         case OP_ANY:
       
  2977         for (i = 1; i <= min; i++)
       
  2978           {
       
  2979           if (eptr >= md->end_subject || IS_NEWLINE(eptr))
       
  2980             RRETURN(MATCH_NOMATCH);
       
  2981           eptr++;
       
  2982           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       
  2983           }
       
  2984         break;
       
  2985 
       
  2986         case OP_ALLANY:
       
  2987         for (i = 1; i <= min; i++)
       
  2988           {
       
  2989           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2990           eptr++;
       
  2991           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       
  2992           }
       
  2993         break;
       
  2994 
       
  2995         case OP_ANYBYTE:
       
  2996         eptr += min;
       
  2997         break;
       
  2998 
       
  2999         case OP_ANYNL:
       
  3000         for (i = 1; i <= min; i++)
       
  3001           {
       
  3002           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3003           GETCHARINC(c, eptr);
       
  3004           switch(c)
       
  3005             {
       
  3006             default: RRETURN(MATCH_NOMATCH);
       
  3007             case 0x000d:
       
  3008             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
       
  3009             break;
       
  3010 
       
  3011             case 0x000a:
       
  3012             break;
       
  3013 
       
  3014             case 0x000b:
       
  3015             case 0x000c:
       
  3016             case 0x0085:
       
  3017             case 0x2028:
       
  3018             case 0x2029:
       
  3019             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
       
  3020             break;
       
  3021             }
       
  3022           }
       
  3023         break;
       
  3024 
       
  3025         case OP_NOT_HSPACE:
       
  3026         for (i = 1; i <= min; i++)
       
  3027           {
       
  3028           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3029           GETCHARINC(c, eptr);
       
  3030           switch(c)
       
  3031             {
       
  3032             default: break;
       
  3033             case 0x09:      /* HT */
       
  3034             case 0x20:      /* SPACE */
       
  3035             case 0xa0:      /* NBSP */
       
  3036             case 0x1680:    /* OGHAM SPACE MARK */
       
  3037             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
       
  3038             case 0x2000:    /* EN QUAD */
       
  3039             case 0x2001:    /* EM QUAD */
       
  3040             case 0x2002:    /* EN SPACE */
       
  3041             case 0x2003:    /* EM SPACE */
       
  3042             case 0x2004:    /* THREE-PER-EM SPACE */
       
  3043             case 0x2005:    /* FOUR-PER-EM SPACE */
       
  3044             case 0x2006:    /* SIX-PER-EM SPACE */
       
  3045             case 0x2007:    /* FIGURE SPACE */
       
  3046             case 0x2008:    /* PUNCTUATION SPACE */
       
  3047             case 0x2009:    /* THIN SPACE */
       
  3048             case 0x200A:    /* HAIR SPACE */
       
  3049             case 0x202f:    /* NARROW NO-BREAK SPACE */
       
  3050             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
       
  3051             case 0x3000:    /* IDEOGRAPHIC SPACE */
       
  3052             RRETURN(MATCH_NOMATCH);
       
  3053             }
       
  3054           }
       
  3055         break;
       
  3056 
       
  3057         case OP_HSPACE:
       
  3058         for (i = 1; i <= min; i++)
       
  3059           {
       
  3060           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3061           GETCHARINC(c, eptr);
       
  3062           switch(c)
       
  3063             {
       
  3064             default: RRETURN(MATCH_NOMATCH);
       
  3065             case 0x09:      /* HT */
       
  3066             case 0x20:      /* SPACE */
       
  3067             case 0xa0:      /* NBSP */
       
  3068             case 0x1680:    /* OGHAM SPACE MARK */
       
  3069             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
       
  3070             case 0x2000:    /* EN QUAD */
       
  3071             case 0x2001:    /* EM QUAD */
       
  3072             case 0x2002:    /* EN SPACE */
       
  3073             case 0x2003:    /* EM SPACE */
       
  3074             case 0x2004:    /* THREE-PER-EM SPACE */
       
  3075             case 0x2005:    /* FOUR-PER-EM SPACE */
       
  3076             case 0x2006:    /* SIX-PER-EM SPACE */
       
  3077             case 0x2007:    /* FIGURE SPACE */
       
  3078             case 0x2008:    /* PUNCTUATION SPACE */
       
  3079             case 0x2009:    /* THIN SPACE */
       
  3080             case 0x200A:    /* HAIR SPACE */
       
  3081             case 0x202f:    /* NARROW NO-BREAK SPACE */
       
  3082             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
       
  3083             case 0x3000:    /* IDEOGRAPHIC SPACE */
       
  3084             break;
       
  3085             }
       
  3086           }
       
  3087         break;
       
  3088 
       
  3089         case OP_NOT_VSPACE:
       
  3090         for (i = 1; i <= min; i++)
       
  3091           {
       
  3092           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3093           GETCHARINC(c, eptr);
       
  3094           switch(c)
       
  3095             {
       
  3096             default: break;
       
  3097             case 0x0a:      /* LF */
       
  3098             case 0x0b:      /* VT */
       
  3099             case 0x0c:      /* FF */
       
  3100             case 0x0d:      /* CR */
       
  3101             case 0x85:      /* NEL */
       
  3102             case 0x2028:    /* LINE SEPARATOR */
       
  3103             case 0x2029:    /* PARAGRAPH SEPARATOR */
       
  3104             RRETURN(MATCH_NOMATCH);
       
  3105             }
       
  3106           }
       
  3107         break;
       
  3108 
       
  3109         case OP_VSPACE:
       
  3110         for (i = 1; i <= min; i++)
       
  3111           {
       
  3112           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3113           GETCHARINC(c, eptr);
       
  3114           switch(c)
       
  3115             {
       
  3116             default: RRETURN(MATCH_NOMATCH);
       
  3117             case 0x0a:      /* LF */
       
  3118             case 0x0b:      /* VT */
       
  3119             case 0x0c:      /* FF */
       
  3120             case 0x0d:      /* CR */
       
  3121             case 0x85:      /* NEL */
       
  3122             case 0x2028:    /* LINE SEPARATOR */
       
  3123             case 0x2029:    /* PARAGRAPH SEPARATOR */
       
  3124             break;
       
  3125             }
       
  3126           }
       
  3127         break;
       
  3128 
       
  3129         case OP_NOT_DIGIT:
       
  3130         for (i = 1; i <= min; i++)
       
  3131           {
       
  3132           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3133           GETCHARINC(c, eptr);
       
  3134           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
       
  3135             RRETURN(MATCH_NOMATCH);
       
  3136           }
       
  3137         break;
       
  3138 
       
  3139         case OP_DIGIT:
       
  3140         for (i = 1; i <= min; i++)
       
  3141           {
       
  3142           if (eptr >= md->end_subject ||
       
  3143              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
       
  3144             RRETURN(MATCH_NOMATCH);
       
  3145           /* No need to skip more bytes - we know it's a 1-byte character */
       
  3146           }
       
  3147         break;
       
  3148 
       
  3149         case OP_NOT_WHITESPACE:
       
  3150         for (i = 1; i <= min; i++)
       
  3151           {
       
  3152           if (eptr >= md->end_subject ||
       
  3153              (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
       
  3154             RRETURN(MATCH_NOMATCH);
       
  3155           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
       
  3156           }
       
  3157         break;
       
  3158 
       
  3159         case OP_WHITESPACE:
       
  3160         for (i = 1; i <= min; i++)
       
  3161           {
       
  3162           if (eptr >= md->end_subject ||
       
  3163              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
       
  3164             RRETURN(MATCH_NOMATCH);
       
  3165           /* No need to skip more bytes - we know it's a 1-byte character */
       
  3166           }
       
  3167         break;
       
  3168 
       
  3169         case OP_NOT_WORDCHAR:
       
  3170         for (i = 1; i <= min; i++)
       
  3171           {
       
  3172           if (eptr >= md->end_subject ||
       
  3173              (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
       
  3174             RRETURN(MATCH_NOMATCH);
       
  3175           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
       
  3176           }
       
  3177         break;
       
  3178 
       
  3179         case OP_WORDCHAR:
       
  3180         for (i = 1; i <= min; i++)
       
  3181           {
       
  3182           if (eptr >= md->end_subject ||
       
  3183              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
       
  3184             RRETURN(MATCH_NOMATCH);
       
  3185           /* No need to skip more bytes - we know it's a 1-byte character */
       
  3186           }
       
  3187         break;
       
  3188 
       
  3189         default:
       
  3190         RRETURN(PCRE_ERROR_INTERNAL);
       
  3191         }  /* End switch(ctype) */
       
  3192 
       
  3193       else
       
  3194 #endif     /* SUPPORT_UTF8 */
       
  3195 
       
  3196       /* Code for the non-UTF-8 case for minimum matching of operators other
       
  3197       than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
       
  3198       number of bytes present, as this was tested above. */
       
  3199 
       
  3200       switch(ctype)
       
  3201         {
       
  3202         case OP_ANY:
       
  3203         for (i = 1; i <= min; i++)
       
  3204           {
       
  3205           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
       
  3206           eptr++;
       
  3207           }
       
  3208         break;
       
  3209 
       
  3210         case OP_ALLANY:
       
  3211         eptr += min;
       
  3212         break;
       
  3213 
       
  3214         case OP_ANYBYTE:
       
  3215         eptr += min;
       
  3216         break;
       
  3217 
       
  3218         /* Because of the CRLF case, we can't assume the minimum number of
       
  3219         bytes are present in this case. */
       
  3220 
       
  3221         case OP_ANYNL:
       
  3222         for (i = 1; i <= min; i++)
       
  3223           {
       
  3224           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3225           switch(*eptr++)
       
  3226             {
       
  3227             default: RRETURN(MATCH_NOMATCH);
       
  3228             case 0x000d:
       
  3229             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
       
  3230             break;
       
  3231             case 0x000a:
       
  3232             break;
       
  3233 
       
  3234             case 0x000b:
       
  3235             case 0x000c:
       
  3236             case 0x0085:
       
  3237             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
       
  3238             break;
       
  3239             }
       
  3240           }
       
  3241         break;
       
  3242 
       
  3243         case OP_NOT_HSPACE:
       
  3244         for (i = 1; i <= min; i++)
       
  3245           {
       
  3246           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3247           switch(*eptr++)
       
  3248             {
       
  3249             default: break;
       
  3250             case 0x09:      /* HT */
       
  3251             case 0x20:      /* SPACE */
       
  3252             case 0xa0:      /* NBSP */
       
  3253             RRETURN(MATCH_NOMATCH);
       
  3254             }
       
  3255           }
       
  3256         break;
       
  3257 
       
  3258         case OP_HSPACE:
       
  3259         for (i = 1; i <= min; i++)
       
  3260           {
       
  3261           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3262           switch(*eptr++)
       
  3263             {
       
  3264             default: RRETURN(MATCH_NOMATCH);
       
  3265             case 0x09:      /* HT */
       
  3266             case 0x20:      /* SPACE */
       
  3267             case 0xa0:      /* NBSP */
       
  3268             break;
       
  3269             }
       
  3270           }
       
  3271         break;
       
  3272 
       
  3273         case OP_NOT_VSPACE:
       
  3274         for (i = 1; i <= min; i++)
       
  3275           {
       
  3276           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3277           switch(*eptr++)
       
  3278             {
       
  3279             default: break;
       
  3280             case 0x0a:      /* LF */
       
  3281             case 0x0b:      /* VT */
       
  3282             case 0x0c:      /* FF */
       
  3283             case 0x0d:      /* CR */
       
  3284             case 0x85:      /* NEL */
       
  3285             RRETURN(MATCH_NOMATCH);
       
  3286             }
       
  3287           }
       
  3288         break;
       
  3289 
       
  3290         case OP_VSPACE:
       
  3291         for (i = 1; i <= min; i++)
       
  3292           {
       
  3293           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3294           switch(*eptr++)
       
  3295             {
       
  3296             default: RRETURN(MATCH_NOMATCH);
       
  3297             case 0x0a:      /* LF */
       
  3298             case 0x0b:      /* VT */
       
  3299             case 0x0c:      /* FF */
       
  3300             case 0x0d:      /* CR */
       
  3301             case 0x85:      /* NEL */
       
  3302             break;
       
  3303             }
       
  3304           }
       
  3305         break;
       
  3306 
       
  3307         case OP_NOT_DIGIT:
       
  3308         for (i = 1; i <= min; i++)
       
  3309           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
       
  3310         break;
       
  3311 
       
  3312         case OP_DIGIT:
       
  3313         for (i = 1; i <= min; i++)
       
  3314           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
       
  3315         break;
       
  3316 
       
  3317         case OP_NOT_WHITESPACE:
       
  3318         for (i = 1; i <= min; i++)
       
  3319           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
       
  3320         break;
       
  3321 
       
  3322         case OP_WHITESPACE:
       
  3323         for (i = 1; i <= min; i++)
       
  3324           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
       
  3325         break;
       
  3326 
       
  3327         case OP_NOT_WORDCHAR:
       
  3328         for (i = 1; i <= min; i++)
       
  3329           if ((md->ctypes[*eptr++] & ctype_word) != 0)
       
  3330             RRETURN(MATCH_NOMATCH);
       
  3331         break;
       
  3332 
       
  3333         case OP_WORDCHAR:
       
  3334         for (i = 1; i <= min; i++)
       
  3335           if ((md->ctypes[*eptr++] & ctype_word) == 0)
       
  3336             RRETURN(MATCH_NOMATCH);
       
  3337         break;
       
  3338 
       
  3339         default:
       
  3340         RRETURN(PCRE_ERROR_INTERNAL);
       
  3341         }
       
  3342       }
       
  3343 
       
  3344     /* If min = max, continue at the same level without recursing */
       
  3345 
       
  3346     if (min == max) continue;
       
  3347 
       
  3348     /* If minimizing, we have to test the rest of the pattern before each
       
  3349     subsequent match. Again, separate the UTF-8 case for speed, and also
       
  3350     separate the UCP cases. */
       
  3351 
       
  3352     if (minimize)
       
  3353       {
       
  3354 #ifdef SUPPORT_UCP
       
  3355       if (prop_type >= 0)
       
  3356         {
       
  3357         switch(prop_type)
       
  3358           {
       
  3359           case PT_ANY:
       
  3360           for (fi = min;; fi++)
       
  3361             {
       
  3362             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
       
  3363             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  3364             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3365             GETCHARINC(c, eptr);
       
  3366             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
       
  3367             }
       
  3368           /* Control never gets here */
       
  3369 
       
  3370           case PT_LAMP:
       
  3371           for (fi = min;; fi++)
       
  3372             {
       
  3373             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
       
  3374             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  3375             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3376             GETCHARINC(c, eptr);
       
  3377             prop_chartype = UCD_CHARTYPE(c);
       
  3378             if ((prop_chartype == ucp_Lu ||
       
  3379                  prop_chartype == ucp_Ll ||
       
  3380                  prop_chartype == ucp_Lt) == prop_fail_result)
       
  3381               RRETURN(MATCH_NOMATCH);
       
  3382             }
       
  3383           /* Control never gets here */
       
  3384 
       
  3385           case PT_GC:
       
  3386           for (fi = min;; fi++)
       
  3387             {
       
  3388             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
       
  3389             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  3390             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3391             GETCHARINC(c, eptr);
       
  3392             prop_category = UCD_CATEGORY(c);
       
  3393             if ((prop_category == prop_value) == prop_fail_result)
       
  3394               RRETURN(MATCH_NOMATCH);
       
  3395             }
       
  3396           /* Control never gets here */
       
  3397 
       
  3398           case PT_PC:
       
  3399           for (fi = min;; fi++)
       
  3400             {
       
  3401             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
       
  3402             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  3403             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3404             GETCHARINC(c, eptr);
       
  3405             prop_chartype = UCD_CHARTYPE(c);
       
  3406             if ((prop_chartype == prop_value) == prop_fail_result)
       
  3407               RRETURN(MATCH_NOMATCH);
       
  3408             }
       
  3409           /* Control never gets here */
       
  3410 
       
  3411           case PT_SC:
       
  3412           for (fi = min;; fi++)
       
  3413             {
       
  3414             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
       
  3415             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  3416             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3417             GETCHARINC(c, eptr);
       
  3418             prop_script = UCD_SCRIPT(c);
       
  3419             if ((prop_script == prop_value) == prop_fail_result)
       
  3420               RRETURN(MATCH_NOMATCH);
       
  3421             }
       
  3422           /* Control never gets here */
       
  3423 
       
  3424           default:
       
  3425           RRETURN(PCRE_ERROR_INTERNAL);
       
  3426           }
       
  3427         }
       
  3428 
       
  3429       /* Match extended Unicode sequences. We will get here only if the
       
  3430       support is in the binary; otherwise a compile-time error occurs. */
       
  3431 
       
  3432       else if (ctype == OP_EXTUNI)
       
  3433         {
       
  3434         for (fi = min;; fi++)
       
  3435           {
       
  3436           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
       
  3437           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  3438           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  3439           GETCHARINCTEST(c, eptr);
       
  3440           prop_category = UCD_CATEGORY(c);
       
  3441           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
       
  3442           while (eptr < md->end_subject)
       
  3443             {
       
  3444             int len = 1;
       
  3445             if (!utf8) c = *eptr; else
       
  3446               {
       
  3447               GETCHARLEN(c, eptr, len);
       
  3448               }
       
  3449             prop_category = UCD_CATEGORY(c);
       
  3450             if (prop_category != ucp_M) break;
       
  3451             eptr += len;
       
  3452             }
       
  3453           }
       
  3454         }
       
  3455 
       
  3456       else
       
  3457 #endif     /* SUPPORT_UCP */
       
  3458 
       
  3459 #ifdef SUPPORT_UTF8
       
  3460       /* UTF-8 mode */
       
  3461       if (utf8)
       
  3462         {
       
  3463         for (fi = min;; fi++)
       
  3464           {
       
  3465           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
       
  3466           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  3467           if (fi >= max || eptr >= md->end_subject ||
       
  3468                (ctype == OP_ANY && IS_NEWLINE(eptr)))
       
  3469             RRETURN(MATCH_NOMATCH);
       
  3470 
       
  3471           GETCHARINC(c, eptr);
       
  3472           switch(ctype)
       
  3473             {
       
  3474             case OP_ANY:        /* This is the non-NL case */
       
  3475             case OP_ALLANY:
       
  3476             case OP_ANYBYTE:
       
  3477             break;
       
  3478 
       
  3479             case OP_ANYNL:
       
  3480             switch(c)
       
  3481               {
       
  3482               default: RRETURN(MATCH_NOMATCH);
       
  3483               case 0x000d:
       
  3484               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
       
  3485               break;
       
  3486               case 0x000a:
       
  3487               break;
       
  3488 
       
  3489               case 0x000b:
       
  3490               case 0x000c:
       
  3491               case 0x0085:
       
  3492               case 0x2028:
       
  3493               case 0x2029:
       
  3494               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
       
  3495               break;
       
  3496               }
       
  3497             break;
       
  3498 
       
  3499             case OP_NOT_HSPACE:
       
  3500             switch(c)
       
  3501               {
       
  3502               default: break;
       
  3503               case 0x09:      /* HT */
       
  3504               case 0x20:      /* SPACE */
       
  3505               case 0xa0:      /* NBSP */
       
  3506               case 0x1680:    /* OGHAM SPACE MARK */
       
  3507               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
       
  3508               case 0x2000:    /* EN QUAD */
       
  3509               case 0x2001:    /* EM QUAD */
       
  3510               case 0x2002:    /* EN SPACE */
       
  3511               case 0x2003:    /* EM SPACE */
       
  3512               case 0x2004:    /* THREE-PER-EM SPACE */
       
  3513               case 0x2005:    /* FOUR-PER-EM SPACE */
       
  3514               case 0x2006:    /* SIX-PER-EM SPACE */
       
  3515               case 0x2007:    /* FIGURE SPACE */
       
  3516               case 0x2008:    /* PUNCTUATION SPACE */
       
  3517               case 0x2009:    /* THIN SPACE */
       
  3518               case 0x200A:    /* HAIR SPACE */
       
  3519               case 0x202f:    /* NARROW NO-BREAK SPACE */
       
  3520               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
       
  3521               case 0x3000:    /* IDEOGRAPHIC SPACE */
       
  3522               RRETURN(MATCH_NOMATCH);
       
  3523               }
       
  3524             break;
       
  3525 
       
  3526             case OP_HSPACE:
       
  3527             switch(c)
       
  3528               {
       
  3529               default: RRETURN(MATCH_NOMATCH);
       
  3530               case 0x09:      /* HT */
       
  3531               case 0x20:      /* SPACE */
       
  3532               case 0xa0:      /* NBSP */
       
  3533               case 0x1680:    /* OGHAM SPACE MARK */
       
  3534               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
       
  3535               case 0x2000:    /* EN QUAD */
       
  3536               case 0x2001:    /* EM QUAD */
       
  3537               case 0x2002:    /* EN SPACE */
       
  3538               case 0x2003:    /* EM SPACE */
       
  3539               case 0x2004:    /* THREE-PER-EM SPACE */
       
  3540               case 0x2005:    /* FOUR-PER-EM SPACE */
       
  3541               case 0x2006:    /* SIX-PER-EM SPACE */
       
  3542               case 0x2007:    /* FIGURE SPACE */
       
  3543               case 0x2008:    /* PUNCTUATION SPACE */
       
  3544               case 0x2009:    /* THIN SPACE */
       
  3545               case 0x200A:    /* HAIR SPACE */
       
  3546               case 0x202f:    /* NARROW NO-BREAK SPACE */
       
  3547               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
       
  3548               case 0x3000:    /* IDEOGRAPHIC SPACE */
       
  3549               break;
       
  3550               }
       
  3551             break;
       
  3552 
       
  3553             case OP_NOT_VSPACE:
       
  3554             switch(c)
       
  3555               {
       
  3556               default: break;
       
  3557               case 0x0a:      /* LF */
       
  3558               case 0x0b:      /* VT */
       
  3559               case 0x0c:      /* FF */
       
  3560               case 0x0d:      /* CR */
       
  3561               case 0x85:      /* NEL */
       
  3562               case 0x2028:    /* LINE SEPARATOR */
       
  3563               case 0x2029:    /* PARAGRAPH SEPARATOR */
       
  3564               RRETURN(MATCH_NOMATCH);
       
  3565               }
       
  3566             break;
       
  3567 
       
  3568             case OP_VSPACE:
       
  3569             switch(c)
       
  3570               {
       
  3571               default: RRETURN(MATCH_NOMATCH);
       
  3572               case 0x0a:      /* LF */
       
  3573               case 0x0b:      /* VT */
       
  3574               case 0x0c:      /* FF */
       
  3575               case 0x0d:      /* CR */
       
  3576               case 0x85:      /* NEL */
       
  3577               case 0x2028:    /* LINE SEPARATOR */
       
  3578               case 0x2029:    /* PARAGRAPH SEPARATOR */
       
  3579               break;
       
  3580               }
       
  3581             break;
       
  3582 
       
  3583             case OP_NOT_DIGIT:
       
  3584             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
       
  3585               RRETURN(MATCH_NOMATCH);
       
  3586             break;
       
  3587 
       
  3588             case OP_DIGIT:
       
  3589             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
       
  3590               RRETURN(MATCH_NOMATCH);
       
  3591             break;
       
  3592 
       
  3593             case OP_NOT_WHITESPACE:
       
  3594             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
       
  3595               RRETURN(MATCH_NOMATCH);
       
  3596             break;
       
  3597 
       
  3598             case OP_WHITESPACE:
       
  3599             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
       
  3600               RRETURN(MATCH_NOMATCH);
       
  3601             break;
       
  3602 
       
  3603             case OP_NOT_WORDCHAR:
       
  3604             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
       
  3605               RRETURN(MATCH_NOMATCH);
       
  3606             break;
       
  3607 
       
  3608             case OP_WORDCHAR:
       
  3609             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
       
  3610               RRETURN(MATCH_NOMATCH);
       
  3611             break;
       
  3612 
       
  3613             default:
       
  3614             RRETURN(PCRE_ERROR_INTERNAL);
       
  3615             }
       
  3616           }
       
  3617         }
       
  3618       else
       
  3619 #endif
       
  3620       /* Not UTF-8 mode */
       
  3621         {
       
  3622         for (fi = min;; fi++)
       
  3623           {
       
  3624           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
       
  3625           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  3626           if (fi >= max || eptr >= md->end_subject ||
       
  3627                (ctype == OP_ANY && IS_NEWLINE(eptr)))
       
  3628             RRETURN(MATCH_NOMATCH);
       
  3629 
       
  3630           c = *eptr++;
       
  3631           switch(ctype)
       
  3632             {
       
  3633             case OP_ANY:     /* This is the non-NL case */
       
  3634             case OP_ALLANY:
       
  3635             case OP_ANYBYTE:
       
  3636             break;
       
  3637 
       
  3638             case OP_ANYNL:
       
  3639             switch(c)
       
  3640               {
       
  3641               default: RRETURN(MATCH_NOMATCH);
       
  3642               case 0x000d:
       
  3643               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
       
  3644               break;
       
  3645 
       
  3646               case 0x000a:
       
  3647               break;
       
  3648 
       
  3649               case 0x000b:
       
  3650               case 0x000c:
       
  3651               case 0x0085:
       
  3652               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
       
  3653               break;
       
  3654               }
       
  3655             break;
       
  3656 
       
  3657             case OP_NOT_HSPACE:
       
  3658             switch(c)
       
  3659               {
       
  3660               default: break;
       
  3661               case 0x09:      /* HT */
       
  3662               case 0x20:      /* SPACE */
       
  3663               case 0xa0:      /* NBSP */
       
  3664               RRETURN(MATCH_NOMATCH);
       
  3665               }
       
  3666             break;
       
  3667 
       
  3668             case OP_HSPACE:
       
  3669             switch(c)
       
  3670               {
       
  3671               default: RRETURN(MATCH_NOMATCH);
       
  3672               case 0x09:      /* HT */
       
  3673               case 0x20:      /* SPACE */
       
  3674               case 0xa0:      /* NBSP */
       
  3675               break;
       
  3676               }
       
  3677             break;
       
  3678 
       
  3679             case OP_NOT_VSPACE:
       
  3680             switch(c)
       
  3681               {
       
  3682               default: break;
       
  3683               case 0x0a:      /* LF */
       
  3684               case 0x0b:      /* VT */
       
  3685               case 0x0c:      /* FF */
       
  3686               case 0x0d:      /* CR */
       
  3687               case 0x85:      /* NEL */
       
  3688               RRETURN(MATCH_NOMATCH);
       
  3689               }
       
  3690             break;
       
  3691 
       
  3692             case OP_VSPACE:
       
  3693             switch(c)
       
  3694               {
       
  3695               default: RRETURN(MATCH_NOMATCH);
       
  3696               case 0x0a:      /* LF */
       
  3697               case 0x0b:      /* VT */
       
  3698               case 0x0c:      /* FF */
       
  3699               case 0x0d:      /* CR */
       
  3700               case 0x85:      /* NEL */
       
  3701               break;
       
  3702               }
       
  3703             break;
       
  3704 
       
  3705             case OP_NOT_DIGIT:
       
  3706             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
       
  3707             break;
       
  3708 
       
  3709             case OP_DIGIT:
       
  3710             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
       
  3711             break;
       
  3712 
       
  3713             case OP_NOT_WHITESPACE:
       
  3714             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
       
  3715             break;
       
  3716 
       
  3717             case OP_WHITESPACE:
       
  3718             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
       
  3719             break;
       
  3720 
       
  3721             case OP_NOT_WORDCHAR:
       
  3722             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
       
  3723             break;
       
  3724 
       
  3725             case OP_WORDCHAR:
       
  3726             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
       
  3727             break;
       
  3728 
       
  3729             default:
       
  3730             RRETURN(PCRE_ERROR_INTERNAL);
       
  3731             }
       
  3732           }
       
  3733         }
       
  3734       /* Control never gets here */
       
  3735       }
       
  3736 
       
  3737     /* If maximizing, it is worth using inline code for speed, doing the type
       
  3738     test once at the start (i.e. keep it out of the loop). Again, keep the
       
  3739     UTF-8 and UCP stuff separate. */
       
  3740 
       
  3741     else
       
  3742       {
       
  3743       pp = eptr;  /* Remember where we started */
       
  3744 
       
  3745 #ifdef SUPPORT_UCP
       
  3746       if (prop_type >= 0)
       
  3747         {
       
  3748         switch(prop_type)
       
  3749           {
       
  3750           case PT_ANY:
       
  3751           for (i = min; i < max; i++)
       
  3752             {
       
  3753             int len = 1;
       
  3754             if (eptr >= md->end_subject) break;
       
  3755             GETCHARLEN(c, eptr, len);
       
  3756             if (prop_fail_result) break;
       
  3757             eptr+= len;
       
  3758             }
       
  3759           break;
       
  3760 
       
  3761           case PT_LAMP:
       
  3762           for (i = min; i < max; i++)
       
  3763             {
       
  3764             int len = 1;
       
  3765             if (eptr >= md->end_subject) break;
       
  3766             GETCHARLEN(c, eptr, len);
       
  3767             prop_chartype = UCD_CHARTYPE(c);
       
  3768             if ((prop_chartype == ucp_Lu ||
       
  3769                  prop_chartype == ucp_Ll ||
       
  3770                  prop_chartype == ucp_Lt) == prop_fail_result)
       
  3771               break;
       
  3772             eptr+= len;
       
  3773             }
       
  3774           break;
       
  3775 
       
  3776           case PT_GC:
       
  3777           for (i = min; i < max; i++)
       
  3778             {
       
  3779             int len = 1;
       
  3780             if (eptr >= md->end_subject) break;
       
  3781             GETCHARLEN(c, eptr, len);
       
  3782             prop_category = UCD_CATEGORY(c);
       
  3783             if ((prop_category == prop_value) == prop_fail_result)
       
  3784               break;
       
  3785             eptr+= len;
       
  3786             }
       
  3787           break;
       
  3788 
       
  3789           case PT_PC:
       
  3790           for (i = min; i < max; i++)
       
  3791             {
       
  3792             int len = 1;
       
  3793             if (eptr >= md->end_subject) break;
       
  3794             GETCHARLEN(c, eptr, len);
       
  3795             prop_chartype = UCD_CHARTYPE(c);
       
  3796             if ((prop_chartype == prop_value) == prop_fail_result)
       
  3797               break;
       
  3798             eptr+= len;
       
  3799             }
       
  3800           break;
       
  3801 
       
  3802           case PT_SC:
       
  3803           for (i = min; i < max; i++)
       
  3804             {
       
  3805             int len = 1;
       
  3806             if (eptr >= md->end_subject) break;
       
  3807             GETCHARLEN(c, eptr, len);
       
  3808             prop_script = UCD_SCRIPT(c);
       
  3809             if ((prop_script == prop_value) == prop_fail_result)
       
  3810               break;
       
  3811             eptr+= len;
       
  3812             }
       
  3813           break;
       
  3814           }
       
  3815 
       
  3816         /* eptr is now past the end of the maximum run */
       
  3817 
       
  3818         if (possessive) continue;
       
  3819         for(;;)
       
  3820           {
       
  3821           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
       
  3822           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  3823           if (eptr-- == pp) break;        /* Stop if tried at original pos */
       
  3824           if (utf8) BACKCHAR(eptr);
       
  3825           }
       
  3826         }
       
  3827 
       
  3828       /* Match extended Unicode sequences. We will get here only if the
       
  3829       support is in the binary; otherwise a compile-time error occurs. */
       
  3830 
       
  3831       else if (ctype == OP_EXTUNI)
       
  3832         {
       
  3833         for (i = min; i < max; i++)
       
  3834           {
       
  3835           if (eptr >= md->end_subject) break;
       
  3836           GETCHARINCTEST(c, eptr);
       
  3837           prop_category = UCD_CATEGORY(c);
       
  3838           if (prop_category == ucp_M) break;
       
  3839           while (eptr < md->end_subject)
       
  3840             {
       
  3841             int len = 1;
       
  3842             if (!utf8) c = *eptr; else
       
  3843               {
       
  3844               GETCHARLEN(c, eptr, len);
       
  3845               }
       
  3846             prop_category = UCD_CATEGORY(c);
       
  3847             if (prop_category != ucp_M) break;
       
  3848             eptr += len;
       
  3849             }
       
  3850           }
       
  3851 
       
  3852         /* eptr is now past the end of the maximum run */
       
  3853 
       
  3854         if (possessive) continue;
       
  3855         for(;;)
       
  3856           {
       
  3857           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
       
  3858           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  3859           if (eptr-- == pp) break;        /* Stop if tried at original pos */
       
  3860           for (;;)                        /* Move back over one extended */
       
  3861             {
       
  3862             int len = 1;
       
  3863             if (!utf8) c = *eptr; else
       
  3864               {
       
  3865               BACKCHAR(eptr);
       
  3866               GETCHARLEN(c, eptr, len);
       
  3867               }
       
  3868             prop_category = UCD_CATEGORY(c);
       
  3869             if (prop_category != ucp_M) break;
       
  3870             eptr--;
       
  3871             }
       
  3872           }
       
  3873         }
       
  3874 
       
  3875       else
       
  3876 #endif   /* SUPPORT_UCP */
       
  3877 
       
  3878 #ifdef SUPPORT_UTF8
       
  3879       /* UTF-8 mode */
       
  3880 
       
  3881       if (utf8)
       
  3882         {
       
  3883         switch(ctype)
       
  3884           {
       
  3885           case OP_ANY:
       
  3886           if (max < INT_MAX)
       
  3887             {
       
  3888             for (i = min; i < max; i++)
       
  3889               {
       
  3890               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
       
  3891               eptr++;
       
  3892               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       
  3893               }
       
  3894             }
       
  3895 
       
  3896           /* Handle unlimited UTF-8 repeat */
       
  3897 
       
  3898           else
       
  3899             {
       
  3900             for (i = min; i < max; i++)
       
  3901               {
       
  3902               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
       
  3903               eptr++;
       
  3904               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       
  3905               }
       
  3906             }
       
  3907           break;
       
  3908 
       
  3909           case OP_ALLANY:
       
  3910           if (max < INT_MAX)
       
  3911             {
       
  3912             for (i = min; i < max; i++)
       
  3913               {
       
  3914               if (eptr >= md->end_subject) break;
       
  3915               eptr++;
       
  3916               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       
  3917               }
       
  3918             }
       
  3919           else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
       
  3920           break;
       
  3921 
       
  3922           /* The byte case is the same as non-UTF8 */
       
  3923 
       
  3924           case OP_ANYBYTE:
       
  3925           c = max - min;
       
  3926           if (c > (unsigned int)(md->end_subject - eptr))
       
  3927             c = md->end_subject - eptr;
       
  3928           eptr += c;
       
  3929           break;
       
  3930 
       
  3931           case OP_ANYNL:
       
  3932           for (i = min; i < max; i++)
       
  3933             {
       
  3934             int len = 1;
       
  3935             if (eptr >= md->end_subject) break;
       
  3936             GETCHARLEN(c, eptr, len);
       
  3937             if (c == 0x000d)
       
  3938               {
       
  3939               if (++eptr >= md->end_subject) break;
       
  3940               if (*eptr == 0x000a) eptr++;
       
  3941               }
       
  3942             else
       
  3943               {
       
  3944               if (c != 0x000a &&
       
  3945                   (md->bsr_anycrlf ||
       
  3946                    (c != 0x000b && c != 0x000c &&
       
  3947                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
       
  3948                 break;
       
  3949               eptr += len;
       
  3950               }
       
  3951             }
       
  3952           break;
       
  3953 
       
  3954           case OP_NOT_HSPACE:
       
  3955           case OP_HSPACE:
       
  3956           for (i = min; i < max; i++)
       
  3957             {
       
  3958             BOOL gotspace;
       
  3959             int len = 1;
       
  3960             if (eptr >= md->end_subject) break;
       
  3961             GETCHARLEN(c, eptr, len);
       
  3962             switch(c)
       
  3963               {
       
  3964               default: gotspace = FALSE; break;
       
  3965               case 0x09:      /* HT */
       
  3966               case 0x20:      /* SPACE */
       
  3967               case 0xa0:      /* NBSP */
       
  3968               case 0x1680:    /* OGHAM SPACE MARK */
       
  3969               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
       
  3970               case 0x2000:    /* EN QUAD */
       
  3971               case 0x2001:    /* EM QUAD */
       
  3972               case 0x2002:    /* EN SPACE */
       
  3973               case 0x2003:    /* EM SPACE */
       
  3974               case 0x2004:    /* THREE-PER-EM SPACE */
       
  3975               case 0x2005:    /* FOUR-PER-EM SPACE */
       
  3976               case 0x2006:    /* SIX-PER-EM SPACE */
       
  3977               case 0x2007:    /* FIGURE SPACE */
       
  3978               case 0x2008:    /* PUNCTUATION SPACE */
       
  3979               case 0x2009:    /* THIN SPACE */
       
  3980               case 0x200A:    /* HAIR SPACE */
       
  3981               case 0x202f:    /* NARROW NO-BREAK SPACE */
       
  3982               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
       
  3983               case 0x3000:    /* IDEOGRAPHIC SPACE */
       
  3984               gotspace = TRUE;
       
  3985               break;
       
  3986               }
       
  3987             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
       
  3988             eptr += len;
       
  3989             }
       
  3990           break;
       
  3991 
       
  3992           case OP_NOT_VSPACE:
       
  3993           case OP_VSPACE:
       
  3994           for (i = min; i < max; i++)
       
  3995             {
       
  3996             BOOL gotspace;
       
  3997             int len = 1;
       
  3998             if (eptr >= md->end_subject) break;
       
  3999             GETCHARLEN(c, eptr, len);
       
  4000             switch(c)
       
  4001               {
       
  4002               default: gotspace = FALSE; break;
       
  4003               case 0x0a:      /* LF */
       
  4004               case 0x0b:      /* VT */
       
  4005               case 0x0c:      /* FF */
       
  4006               case 0x0d:      /* CR */
       
  4007               case 0x85:      /* NEL */
       
  4008               case 0x2028:    /* LINE SEPARATOR */
       
  4009               case 0x2029:    /* PARAGRAPH SEPARATOR */
       
  4010               gotspace = TRUE;
       
  4011               break;
       
  4012               }
       
  4013             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
       
  4014             eptr += len;
       
  4015             }
       
  4016           break;
       
  4017 
       
  4018           case OP_NOT_DIGIT:
       
  4019           for (i = min; i < max; i++)
       
  4020             {
       
  4021             int len = 1;
       
  4022             if (eptr >= md->end_subject) break;
       
  4023             GETCHARLEN(c, eptr, len);
       
  4024             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
       
  4025             eptr+= len;
       
  4026             }
       
  4027           break;
       
  4028 
       
  4029           case OP_DIGIT:
       
  4030           for (i = min; i < max; i++)
       
  4031             {
       
  4032             int len = 1;
       
  4033             if (eptr >= md->end_subject) break;
       
  4034             GETCHARLEN(c, eptr, len);
       
  4035             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
       
  4036             eptr+= len;
       
  4037             }
       
  4038           break;
       
  4039 
       
  4040           case OP_NOT_WHITESPACE:
       
  4041           for (i = min; i < max; i++)
       
  4042             {
       
  4043             int len = 1;
       
  4044             if (eptr >= md->end_subject) break;
       
  4045             GETCHARLEN(c, eptr, len);
       
  4046             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
       
  4047             eptr+= len;
       
  4048             }
       
  4049           break;
       
  4050 
       
  4051           case OP_WHITESPACE:
       
  4052           for (i = min; i < max; i++)
       
  4053             {
       
  4054             int len = 1;
       
  4055             if (eptr >= md->end_subject) break;
       
  4056             GETCHARLEN(c, eptr, len);
       
  4057             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
       
  4058             eptr+= len;
       
  4059             }
       
  4060           break;
       
  4061 
       
  4062           case OP_NOT_WORDCHAR:
       
  4063           for (i = min; i < max; i++)
       
  4064             {
       
  4065             int len = 1;
       
  4066             if (eptr >= md->end_subject) break;
       
  4067             GETCHARLEN(c, eptr, len);
       
  4068             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
       
  4069             eptr+= len;
       
  4070             }
       
  4071           break;
       
  4072 
       
  4073           case OP_WORDCHAR:
       
  4074           for (i = min; i < max; i++)
       
  4075             {
       
  4076             int len = 1;
       
  4077             if (eptr >= md->end_subject) break;
       
  4078             GETCHARLEN(c, eptr, len);
       
  4079             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
       
  4080             eptr+= len;
       
  4081             }
       
  4082           break;
       
  4083 
       
  4084           default:
       
  4085           RRETURN(PCRE_ERROR_INTERNAL);
       
  4086           }
       
  4087 
       
  4088         /* eptr is now past the end of the maximum run */
       
  4089 
       
  4090         if (possessive) continue;
       
  4091         for(;;)
       
  4092           {
       
  4093           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
       
  4094           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  4095           if (eptr-- == pp) break;        /* Stop if tried at original pos */
       
  4096           BACKCHAR(eptr);
       
  4097           }
       
  4098         }
       
  4099       else
       
  4100 #endif  /* SUPPORT_UTF8 */
       
  4101 
       
  4102       /* Not UTF-8 mode */
       
  4103         {
       
  4104         switch(ctype)
       
  4105           {
       
  4106           case OP_ANY:
       
  4107           for (i = min; i < max; i++)
       
  4108             {
       
  4109             if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
       
  4110             eptr++;
       
  4111             }
       
  4112           break;
       
  4113 
       
  4114           case OP_ALLANY:
       
  4115           case OP_ANYBYTE:
       
  4116           c = max - min;
       
  4117           if (c > (unsigned int)(md->end_subject - eptr))
       
  4118             c = md->end_subject - eptr;
       
  4119           eptr += c;
       
  4120           break;
       
  4121 
       
  4122           case OP_ANYNL:
       
  4123           for (i = min; i < max; i++)
       
  4124             {
       
  4125             if (eptr >= md->end_subject) break;
       
  4126             c = *eptr;
       
  4127             if (c == 0x000d)
       
  4128               {
       
  4129               if (++eptr >= md->end_subject) break;
       
  4130               if (*eptr == 0x000a) eptr++;
       
  4131               }
       
  4132             else
       
  4133               {
       
  4134               if (c != 0x000a &&
       
  4135                   (md->bsr_anycrlf ||
       
  4136                     (c != 0x000b && c != 0x000c && c != 0x0085)))
       
  4137                 break;
       
  4138               eptr++;
       
  4139               }
       
  4140             }
       
  4141           break;
       
  4142 
       
  4143           case OP_NOT_HSPACE:
       
  4144           for (i = min; i < max; i++)
       
  4145             {
       
  4146             if (eptr >= md->end_subject) break;
       
  4147             c = *eptr;
       
  4148             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
       
  4149             eptr++;
       
  4150             }
       
  4151           break;
       
  4152 
       
  4153           case OP_HSPACE:
       
  4154           for (i = min; i < max; i++)
       
  4155             {
       
  4156             if (eptr >= md->end_subject) break;
       
  4157             c = *eptr;
       
  4158             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
       
  4159             eptr++;
       
  4160             }
       
  4161           break;
       
  4162 
       
  4163           case OP_NOT_VSPACE:
       
  4164           for (i = min; i < max; i++)
       
  4165             {
       
  4166             if (eptr >= md->end_subject) break;
       
  4167             c = *eptr;
       
  4168             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
       
  4169               break;
       
  4170             eptr++;
       
  4171             }
       
  4172           break;
       
  4173 
       
  4174           case OP_VSPACE:
       
  4175           for (i = min; i < max; i++)
       
  4176             {
       
  4177             if (eptr >= md->end_subject) break;
       
  4178             c = *eptr;
       
  4179             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
       
  4180               break;
       
  4181             eptr++;
       
  4182             }
       
  4183           break;
       
  4184 
       
  4185           case OP_NOT_DIGIT:
       
  4186           for (i = min; i < max; i++)
       
  4187             {
       
  4188             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
       
  4189               break;
       
  4190             eptr++;
       
  4191             }
       
  4192           break;
       
  4193 
       
  4194           case OP_DIGIT:
       
  4195           for (i = min; i < max; i++)
       
  4196             {
       
  4197             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
       
  4198               break;
       
  4199             eptr++;
       
  4200             }
       
  4201           break;
       
  4202 
       
  4203           case OP_NOT_WHITESPACE:
       
  4204           for (i = min; i < max; i++)
       
  4205             {
       
  4206             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
       
  4207               break;
       
  4208             eptr++;
       
  4209             }
       
  4210           break;
       
  4211 
       
  4212           case OP_WHITESPACE:
       
  4213           for (i = min; i < max; i++)
       
  4214             {
       
  4215             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
       
  4216               break;
       
  4217             eptr++;
       
  4218             }
       
  4219           break;
       
  4220 
       
  4221           case OP_NOT_WORDCHAR:
       
  4222           for (i = min; i < max; i++)
       
  4223             {
       
  4224             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
       
  4225               break;
       
  4226             eptr++;
       
  4227             }
       
  4228           break;
       
  4229 
       
  4230           case OP_WORDCHAR:
       
  4231           for (i = min; i < max; i++)
       
  4232             {
       
  4233             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
       
  4234               break;
       
  4235             eptr++;
       
  4236             }
       
  4237           break;
       
  4238 
       
  4239           default:
       
  4240           RRETURN(PCRE_ERROR_INTERNAL);
       
  4241           }
       
  4242 
       
  4243         /* eptr is now past the end of the maximum run */
       
  4244 
       
  4245         if (possessive) continue;
       
  4246         while (eptr >= pp)
       
  4247           {
       
  4248           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
       
  4249           eptr--;
       
  4250           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  4251           }
       
  4252         }
       
  4253 
       
  4254       /* Get here if we can't make it match with any permitted repetitions */
       
  4255 
       
  4256       RRETURN(MATCH_NOMATCH);
       
  4257       }
       
  4258     /* Control never gets here */
       
  4259 
       
  4260     /* There's been some horrible disaster. Arrival here can only mean there is
       
  4261     something seriously wrong in the code above or the OP_xxx definitions. */
       
  4262 
       
  4263     default:
       
  4264     DPRINTF(("Unknown opcode %d\n", *ecode));
       
  4265     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
       
  4266     }
       
  4267 
       
  4268   /* Do not stick any code in here without much thought; it is assumed
       
  4269   that "continue" in the code above comes out to here to repeat the main
       
  4270   loop. */
       
  4271 
       
  4272   }             /* End of main loop */
       
  4273 /* Control never reaches here */
       
  4274 
       
  4275 
       
  4276 /* When compiling to use the heap rather than the stack for recursive calls to
       
  4277 match(), the RRETURN() macro jumps here. The number that is saved in
       
  4278 frame->Xwhere indicates which label we actually want to return to. */
       
  4279 
       
  4280 #ifdef NO_RECURSE
       
  4281 #define LBL(val) case val: goto L_RM##val;
       
  4282 HEAP_RETURN:
       
  4283 switch (frame->Xwhere)
       
  4284   {
       
  4285   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
       
  4286   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
       
  4287   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
       
  4288   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
       
  4289   LBL(53) LBL(54)
       
  4290 #ifdef SUPPORT_UTF8
       
  4291   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
       
  4292   LBL(32) LBL(34) LBL(42) LBL(46)
       
  4293 #ifdef SUPPORT_UCP
       
  4294   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
       
  4295 #endif  /* SUPPORT_UCP */
       
  4296 #endif  /* SUPPORT_UTF8 */
       
  4297   default:
       
  4298   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
       
  4299   return PCRE_ERROR_INTERNAL;
       
  4300   }
       
  4301 #undef LBL
       
  4302 #endif  /* NO_RECURSE */
       
  4303 }
       
  4304 
       
  4305 
       
  4306 /***************************************************************************
       
  4307 ****************************************************************************
       
  4308                    RECURSION IN THE match() FUNCTION
       
  4309 
       
  4310 Undefine all the macros that were defined above to handle this. */
       
  4311 
       
  4312 #ifdef NO_RECURSE
       
  4313 #undef eptr
       
  4314 #undef ecode
       
  4315 #undef mstart
       
  4316 #undef offset_top
       
  4317 #undef ims
       
  4318 #undef eptrb
       
  4319 #undef flags
       
  4320 
       
  4321 #undef callpat
       
  4322 #undef charptr
       
  4323 #undef data
       
  4324 #undef next
       
  4325 #undef pp
       
  4326 #undef prev
       
  4327 #undef saved_eptr
       
  4328 
       
  4329 #undef new_recursive
       
  4330 
       
  4331 #undef cur_is_word
       
  4332 #undef condition
       
  4333 #undef prev_is_word
       
  4334 
       
  4335 #undef original_ims
       
  4336 
       
  4337 #undef ctype
       
  4338 #undef length
       
  4339 #undef max
       
  4340 #undef min
       
  4341 #undef number
       
  4342 #undef offset
       
  4343 #undef op
       
  4344 #undef save_capture_last
       
  4345 #undef save_offset1
       
  4346 #undef save_offset2
       
  4347 #undef save_offset3
       
  4348 #undef stacksave
       
  4349 
       
  4350 #undef newptrb
       
  4351 
       
  4352 #endif
       
  4353 
       
  4354 /* These two are defined as macros in both cases */
       
  4355 
       
  4356 #undef fc
       
  4357 #undef fi
       
  4358 
       
  4359 /***************************************************************************
       
  4360 ***************************************************************************/
       
  4361 
       
  4362 
       
  4363 
       
  4364 /*************************************************
       
  4365 *         Execute a Regular Expression           *
       
  4366 *************************************************/
       
  4367 
       
  4368 /* This function applies a compiled re to a subject string and picks out
       
  4369 portions of the string if it matches. Two elements in the vector are set for
       
  4370 each substring: the offsets to the start and end of the substring.
       
  4371 
       
  4372 Arguments:
       
  4373   argument_re     points to the compiled expression
       
  4374   extra_data      points to extra data or is NULL
       
  4375   subject         points to the subject string
       
  4376   length          length of subject string (may contain binary zeros)
       
  4377   start_offset    where to start in the subject string
       
  4378   options         option bits
       
  4379   offsets         points to a vector of ints to be filled in with offsets
       
  4380   offsetcount     the number of elements in the vector
       
  4381 
       
  4382 Returns:          > 0 => success; value is the number of elements filled in
       
  4383                   = 0 => success, but offsets is not big enough
       
  4384                    -1 => failed to match
       
  4385                  < -1 => some kind of unexpected problem
       
  4386 */
       
  4387 
       
  4388 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
       
  4389 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
       
  4390   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
       
  4391   int offsetcount)
       
  4392 {
       
  4393 int rc, resetcount, ocount;
       
  4394 int first_byte = -1;
       
  4395 int req_byte = -1;
       
  4396 int req_byte2 = -1;
       
  4397 int newline;
       
  4398 unsigned long int ims;
       
  4399 BOOL using_temporary_offsets = FALSE;
       
  4400 BOOL anchored;
       
  4401 BOOL startline;
       
  4402 BOOL firstline;
       
  4403 BOOL first_byte_caseless = FALSE;
       
  4404 BOOL req_byte_caseless = FALSE;
       
  4405 BOOL utf8;
       
  4406 match_data match_block;
       
  4407 match_data *md = &match_block;
       
  4408 const uschar *tables;
       
  4409 const uschar *start_bits = NULL;
       
  4410 USPTR start_match = (USPTR)subject + start_offset;
       
  4411 USPTR end_subject;
       
  4412 USPTR req_byte_ptr = start_match - 1;
       
  4413 
       
  4414 pcre_study_data internal_study;
       
  4415 const pcre_study_data *study;
       
  4416 
       
  4417 real_pcre internal_re;
       
  4418 const real_pcre *external_re = (const real_pcre *)argument_re;
       
  4419 const real_pcre *re = external_re;
       
  4420 
       
  4421 /* Plausibility checks */
       
  4422 
       
  4423 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
       
  4424 if (re == NULL || subject == NULL ||
       
  4425    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
       
  4426 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
       
  4427 
       
  4428 /* Fish out the optional data from the extra_data structure, first setting
       
  4429 the default values. */
       
  4430 
       
  4431 study = NULL;
       
  4432 md->match_limit = MATCH_LIMIT;
       
  4433 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
       
  4434 md->callout_data = NULL;
       
  4435 
       
  4436 /* The table pointer is always in native byte order. */
       
  4437 
       
  4438 tables = external_re->tables;
       
  4439 
       
  4440 if (extra_data != NULL)
       
  4441   {
       
  4442   register unsigned int flags = extra_data->flags;
       
  4443   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
       
  4444     study = (const pcre_study_data *)extra_data->study_data;
       
  4445   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
       
  4446     md->match_limit = extra_data->match_limit;
       
  4447   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
       
  4448     md->match_limit_recursion = extra_data->match_limit_recursion;
       
  4449   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
       
  4450     md->callout_data = extra_data->callout_data;
       
  4451   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
       
  4452   }
       
  4453 
       
  4454 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
       
  4455 is a feature that makes it possible to save compiled regex and re-use them
       
  4456 in other programs later. */
       
  4457 
       
  4458 if (tables == NULL) tables = _pcre_default_tables;
       
  4459 
       
  4460 /* Check that the first field in the block is the magic number. If it is not,
       
  4461 test for a regex that was compiled on a host of opposite endianness. If this is
       
  4462 the case, flipped values are put in internal_re and internal_study if there was
       
  4463 study data too. */
       
  4464 
       
  4465 if (re->magic_number != MAGIC_NUMBER)
       
  4466   {
       
  4467   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
       
  4468   if (re == NULL) return PCRE_ERROR_BADMAGIC;
       
  4469   if (study != NULL) study = &internal_study;
       
  4470   }
       
  4471 
       
  4472 /* Set up other data */
       
  4473 
       
  4474 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
       
  4475 startline = (re->flags & PCRE_STARTLINE) != 0;
       
  4476 firstline = (re->options & PCRE_FIRSTLINE) != 0;
       
  4477 
       
  4478 /* The code starts after the real_pcre block and the capture name table. */
       
  4479 
       
  4480 md->start_code = (const uschar *)external_re + re->name_table_offset +
       
  4481   re->name_count * re->name_entry_size;
       
  4482 
       
  4483 md->start_subject = (USPTR)subject;
       
  4484 md->start_offset = start_offset;
       
  4485 md->end_subject = md->start_subject + length;
       
  4486 end_subject = md->end_subject;
       
  4487 
       
  4488 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
       
  4489 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
       
  4490 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
       
  4491 
       
  4492 md->notbol = (options & PCRE_NOTBOL) != 0;
       
  4493 md->noteol = (options & PCRE_NOTEOL) != 0;
       
  4494 md->notempty = (options & PCRE_NOTEMPTY) != 0;
       
  4495 md->partial = (options & PCRE_PARTIAL) != 0;
       
  4496 md->hitend = FALSE;
       
  4497 
       
  4498 md->recursive = NULL;                   /* No recursion at top level */
       
  4499 
       
  4500 md->lcc = tables + lcc_offset;
       
  4501 md->ctypes = tables + ctypes_offset;
       
  4502 
       
  4503 /* Handle different \R options. */
       
  4504 
       
  4505 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
       
  4506   {
       
  4507   case 0:
       
  4508   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
       
  4509     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
       
  4510   else
       
  4511 #ifdef BSR_ANYCRLF
       
  4512   md->bsr_anycrlf = TRUE;
       
  4513 #else
       
  4514   md->bsr_anycrlf = FALSE;
       
  4515 #endif
       
  4516   break;
       
  4517 
       
  4518   case PCRE_BSR_ANYCRLF:
       
  4519   md->bsr_anycrlf = TRUE;
       
  4520   break;
       
  4521 
       
  4522   case PCRE_BSR_UNICODE:
       
  4523   md->bsr_anycrlf = FALSE;
       
  4524   break;
       
  4525 
       
  4526   default: return PCRE_ERROR_BADNEWLINE;
       
  4527   }
       
  4528 
       
  4529 /* Handle different types of newline. The three bits give eight cases. If
       
  4530 nothing is set at run time, whatever was used at compile time applies. */
       
  4531 
       
  4532 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
       
  4533         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
       
  4534   {
       
  4535   case 0: newline = NEWLINE; break;   /* Compile-time default */
       
  4536   case PCRE_NEWLINE_CR: newline = '\r'; break;
       
  4537   case PCRE_NEWLINE_LF: newline = '\n'; break;
       
  4538   case PCRE_NEWLINE_CR+
       
  4539        PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
       
  4540   case PCRE_NEWLINE_ANY: newline = -1; break;
       
  4541   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
       
  4542   default: return PCRE_ERROR_BADNEWLINE;
       
  4543   }
       
  4544 
       
  4545 if (newline == -2)
       
  4546   {
       
  4547   md->nltype = NLTYPE_ANYCRLF;
       
  4548   }
       
  4549 else if (newline < 0)
       
  4550   {
       
  4551   md->nltype = NLTYPE_ANY;
       
  4552   }
       
  4553 else
       
  4554   {
       
  4555   md->nltype = NLTYPE_FIXED;
       
  4556   if (newline > 255)
       
  4557     {
       
  4558     md->nllen = 2;
       
  4559     md->nl[0] = (newline >> 8) & 255;
       
  4560     md->nl[1] = newline & 255;
       
  4561     }
       
  4562   else
       
  4563     {
       
  4564     md->nllen = 1;
       
  4565     md->nl[0] = newline;
       
  4566     }
       
  4567   }
       
  4568 
       
  4569 /* Partial matching is supported only for a restricted set of regexes at the
       
  4570 moment. */
       
  4571 
       
  4572 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
       
  4573   return PCRE_ERROR_BADPARTIAL;
       
  4574 
       
  4575 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
       
  4576 back the character offset. */
       
  4577 
       
  4578 #ifdef SUPPORT_UTF8
       
  4579 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
       
  4580   {
       
  4581   if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
       
  4582     return PCRE_ERROR_BADUTF8;
       
  4583   if (start_offset > 0 && start_offset < length)
       
  4584     {
       
  4585     int tb = ((uschar *)subject)[start_offset];
       
  4586     if (tb > 127)
       
  4587       {
       
  4588       tb &= 0xc0;
       
  4589       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
       
  4590       }
       
  4591     }
       
  4592   }
       
  4593 #endif
       
  4594 
       
  4595 /* The ims options can vary during the matching as a result of the presence
       
  4596 of (?ims) items in the pattern. They are kept in a local variable so that
       
  4597 restoring at the exit of a group is easy. */
       
  4598 
       
  4599 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
       
  4600 
       
  4601 /* If the expression has got more back references than the offsets supplied can
       
  4602 hold, we get a temporary chunk of working store to use during the matching.
       
  4603 Otherwise, we can use the vector supplied, rounding down its size to a multiple
       
  4604 of 3. */
       
  4605 
       
  4606 ocount = offsetcount - (offsetcount % 3);
       
  4607 
       
  4608 if (re->top_backref > 0 && re->top_backref >= ocount/3)
       
  4609   {
       
  4610   ocount = re->top_backref * 3 + 3;
       
  4611   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
       
  4612   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
       
  4613   using_temporary_offsets = TRUE;
       
  4614   DPRINTF(("Got memory to hold back references\n"));
       
  4615   }
       
  4616 else md->offset_vector = offsets;
       
  4617 
       
  4618 md->offset_end = ocount;
       
  4619 md->offset_max = (2*ocount)/3;
       
  4620 md->offset_overflow = FALSE;
       
  4621 md->capture_last = -1;
       
  4622 
       
  4623 /* Compute the minimum number of offsets that we need to reset each time. Doing
       
  4624 this makes a huge difference to execution time when there aren't many brackets
       
  4625 in the pattern. */
       
  4626 
       
  4627 resetcount = 2 + re->top_bracket * 2;
       
  4628 if (resetcount > offsetcount) resetcount = ocount;
       
  4629 
       
  4630 /* Reset the working variable associated with each extraction. These should
       
  4631 never be used unless previously set, but they get saved and restored, and so we
       
  4632 initialize them to avoid reading uninitialized locations. */
       
  4633 
       
  4634 if (md->offset_vector != NULL)
       
  4635   {
       
  4636   register int *iptr = md->offset_vector + ocount;
       
  4637   register int *iend = iptr - resetcount/2 + 1;
       
  4638   while (--iptr >= iend) *iptr = -1;
       
  4639   }
       
  4640 
       
  4641 /* Set up the first character to match, if available. The first_byte value is
       
  4642 never set for an anchored regular expression, but the anchoring may be forced
       
  4643 at run time, so we have to test for anchoring. The first char may be unset for
       
  4644 an unanchored pattern, of course. If there's no first char and the pattern was
       
  4645 studied, there may be a bitmap of possible first characters. */
       
  4646 
       
  4647 if (!anchored)
       
  4648   {
       
  4649   if ((re->flags & PCRE_FIRSTSET) != 0)
       
  4650     {
       
  4651     first_byte = re->first_byte & 255;
       
  4652     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
       
  4653       first_byte = md->lcc[first_byte];
       
  4654     }
       
  4655   else
       
  4656     if (!startline && study != NULL &&
       
  4657       (study->options & PCRE_STUDY_MAPPED) != 0)
       
  4658         start_bits = study->start_bits;
       
  4659   }
       
  4660 
       
  4661 /* For anchored or unanchored matches, there may be a "last known required
       
  4662 character" set. */
       
  4663 
       
  4664 if ((re->flags & PCRE_REQCHSET) != 0)
       
  4665   {
       
  4666   req_byte = re->req_byte & 255;
       
  4667   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
       
  4668   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
       
  4669   }
       
  4670 
       
  4671 
       
  4672 /* ==========================================================================*/
       
  4673 
       
  4674 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
       
  4675 the loop runs just once. */
       
  4676 
       
  4677 for(;;)
       
  4678   {
       
  4679   USPTR save_end_subject = end_subject;
       
  4680   USPTR new_start_match;
       
  4681 
       
  4682   /* Reset the maximum number of extractions we might see. */
       
  4683 
       
  4684   if (md->offset_vector != NULL)
       
  4685     {
       
  4686     register int *iptr = md->offset_vector;
       
  4687     register int *iend = iptr + resetcount;
       
  4688     while (iptr < iend) *iptr++ = -1;
       
  4689     }
       
  4690 
       
  4691   /* Advance to a unique first char if possible. If firstline is TRUE, the
       
  4692   start of the match is constrained to the first line of a multiline string.
       
  4693   That is, the match must be before or at the first newline. Implement this by
       
  4694   temporarily adjusting end_subject so that we stop scanning at a newline. If
       
  4695   the match fails at the newline, later code breaks this loop. */
       
  4696 
       
  4697   if (firstline)
       
  4698     {
       
  4699     USPTR t = start_match;
       
  4700 #ifdef SUPPORT_UTF8
       
  4701     if (utf8)
       
  4702       {
       
  4703       while (t < md->end_subject && !IS_NEWLINE(t))
       
  4704         {
       
  4705         t++;
       
  4706         while (t < end_subject && (*t & 0xc0) == 0x80) t++;
       
  4707         }
       
  4708       }
       
  4709     else
       
  4710 #endif
       
  4711     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
       
  4712     end_subject = t;
       
  4713     }
       
  4714 
       
  4715   /* Now advance to a unique first byte if there is one. */
       
  4716 
       
  4717   if (first_byte >= 0)
       
  4718     {
       
  4719     if (first_byte_caseless)
       
  4720       while (start_match < end_subject && md->lcc[*start_match] != first_byte)
       
  4721         start_match++;
       
  4722     else
       
  4723       while (start_match < end_subject && *start_match != first_byte)
       
  4724         start_match++;
       
  4725     }
       
  4726 
       
  4727   /* Or to just after a linebreak for a multiline match */
       
  4728 
       
  4729   else if (startline)
       
  4730     {
       
  4731     if (start_match > md->start_subject + start_offset)
       
  4732       {
       
  4733 #ifdef SUPPORT_UTF8
       
  4734       if (utf8)
       
  4735         {
       
  4736         while (start_match < end_subject && !WAS_NEWLINE(start_match))
       
  4737           {
       
  4738           start_match++;
       
  4739           while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
       
  4740             start_match++;
       
  4741           }
       
  4742         }
       
  4743       else
       
  4744 #endif
       
  4745       while (start_match < end_subject && !WAS_NEWLINE(start_match))
       
  4746         start_match++;
       
  4747 
       
  4748       /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
       
  4749       and we are now at a LF, advance the match position by one more character.
       
  4750       */
       
  4751 
       
  4752       if (start_match[-1] == '\r' &&
       
  4753            (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
       
  4754            start_match < end_subject &&
       
  4755            *start_match == '\n')
       
  4756         start_match++;
       
  4757       }
       
  4758     }
       
  4759 
       
  4760   /* Or to a non-unique first byte after study */
       
  4761 
       
  4762   else if (start_bits != NULL)
       
  4763     {
       
  4764     while (start_match < end_subject)
       
  4765       {
       
  4766       register unsigned int c = *start_match;
       
  4767       if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
       
  4768         else break;
       
  4769       }
       
  4770     }
       
  4771 
       
  4772   /* Restore fudged end_subject */
       
  4773 
       
  4774   end_subject = save_end_subject;
       
  4775 
       
  4776 #ifdef DEBUG  /* Sigh. Some compilers never learn. */
       
  4777   printf(">>>> Match against: ");
       
  4778   pchars(start_match, end_subject - start_match, TRUE, md);
       
  4779   printf("\n");
       
  4780 #endif
       
  4781 
       
  4782   /* If req_byte is set, we know that that character must appear in the subject
       
  4783   for the match to succeed. If the first character is set, req_byte must be
       
  4784   later in the subject; otherwise the test starts at the match point. This
       
  4785   optimization can save a huge amount of backtracking in patterns with nested
       
  4786   unlimited repeats that aren't going to match. Writing separate code for
       
  4787   cased/caseless versions makes it go faster, as does using an autoincrement
       
  4788   and backing off on a match.
       
  4789 
       
  4790   HOWEVER: when the subject string is very, very long, searching to its end can
       
  4791   take a long time, and give bad performance on quite ordinary patterns. This
       
  4792   showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
       
  4793   string... so we don't do this when the string is sufficiently long.
       
  4794 
       
  4795   ALSO: this processing is disabled when partial matching is requested.
       
  4796   */
       
  4797 
       
  4798   if (req_byte >= 0 &&
       
  4799       end_subject - start_match < REQ_BYTE_MAX &&
       
  4800       !md->partial)
       
  4801     {
       
  4802     register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
       
  4803 
       
  4804     /* We don't need to repeat the search if we haven't yet reached the
       
  4805     place we found it at last time. */
       
  4806 
       
  4807     if (p > req_byte_ptr)
       
  4808       {
       
  4809       if (req_byte_caseless)
       
  4810         {
       
  4811         while (p < end_subject)
       
  4812           {
       
  4813           register int pp = *p++;
       
  4814           if (pp == req_byte || pp == req_byte2) { p--; break; }
       
  4815           }
       
  4816         }
       
  4817       else
       
  4818         {
       
  4819         while (p < end_subject)
       
  4820           {
       
  4821           if (*p++ == req_byte) { p--; break; }
       
  4822           }
       
  4823         }
       
  4824 
       
  4825       /* If we can't find the required character, break the matching loop,
       
  4826       forcing a match failure. */
       
  4827 
       
  4828       if (p >= end_subject)
       
  4829         {
       
  4830         rc = MATCH_NOMATCH;
       
  4831         break;
       
  4832         }
       
  4833 
       
  4834       /* If we have found the required character, save the point where we
       
  4835       found it, so that we don't search again next time round the loop if
       
  4836       the start hasn't passed this character yet. */
       
  4837 
       
  4838       req_byte_ptr = p;
       
  4839       }
       
  4840     }
       
  4841 
       
  4842   /* OK, we can now run the match. */
       
  4843 
       
  4844   md->start_match_ptr = start_match;
       
  4845   md->match_call_count = 0;
       
  4846   rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
       
  4847 
       
  4848   switch(rc)
       
  4849     {
       
  4850     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
       
  4851     exactly like PRUNE. */
       
  4852 
       
  4853     case MATCH_NOMATCH:
       
  4854     case MATCH_PRUNE:
       
  4855     case MATCH_THEN:
       
  4856     new_start_match = start_match + 1;
       
  4857 #ifdef SUPPORT_UTF8
       
  4858     if (utf8)
       
  4859       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
       
  4860         new_start_match++;
       
  4861 #endif
       
  4862     break;
       
  4863 
       
  4864     /* SKIP passes back the next starting point explicitly. */
       
  4865 
       
  4866     case MATCH_SKIP:
       
  4867     new_start_match = md->start_match_ptr;
       
  4868     break;
       
  4869 
       
  4870     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
       
  4871 
       
  4872     case MATCH_COMMIT:
       
  4873     rc = MATCH_NOMATCH;
       
  4874     goto ENDLOOP;
       
  4875 
       
  4876     /* Any other return is some kind of error. */
       
  4877 
       
  4878     default:
       
  4879     goto ENDLOOP;
       
  4880     }
       
  4881 
       
  4882   /* Control reaches here for the various types of "no match at this point"
       
  4883   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
       
  4884 
       
  4885   rc = MATCH_NOMATCH;
       
  4886 
       
  4887   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
       
  4888   newline in the subject (though it may continue over the newline). Therefore,
       
  4889   if we have just failed to match, starting at a newline, do not continue. */
       
  4890 
       
  4891   if (firstline && IS_NEWLINE(start_match)) break;
       
  4892 
       
  4893   /* Advance to new matching position */
       
  4894 
       
  4895   start_match = new_start_match;
       
  4896 
       
  4897   /* Break the loop if the pattern is anchored or if we have passed the end of
       
  4898   the subject. */
       
  4899 
       
  4900   if (anchored || start_match > end_subject) break;
       
  4901 
       
  4902   /* If we have just passed a CR and we are now at a LF, and the pattern does
       
  4903   not contain any explicit matches for \r or \n, and the newline option is CRLF
       
  4904   or ANY or ANYCRLF, advance the match position by one more character. */
       
  4905 
       
  4906   if (start_match[-1] == '\r' &&
       
  4907       start_match < end_subject &&
       
  4908       *start_match == '\n' &&
       
  4909       (re->flags & PCRE_HASCRORLF) == 0 &&
       
  4910         (md->nltype == NLTYPE_ANY ||
       
  4911          md->nltype == NLTYPE_ANYCRLF ||
       
  4912          md->nllen == 2))
       
  4913     start_match++;
       
  4914 
       
  4915   }   /* End of for(;;) "bumpalong" loop */
       
  4916 
       
  4917 /* ==========================================================================*/
       
  4918 
       
  4919 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
       
  4920 conditions is true:
       
  4921 
       
  4922 (1) The pattern is anchored or the match was failed by (*COMMIT);
       
  4923 
       
  4924 (2) We are past the end of the subject;
       
  4925 
       
  4926 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
       
  4927     this option requests that a match occur at or before the first newline in
       
  4928     the subject.
       
  4929 
       
  4930 When we have a match and the offset vector is big enough to deal with any
       
  4931 backreferences, captured substring offsets will already be set up. In the case
       
  4932 where we had to get some local store to hold offsets for backreference
       
  4933 processing, copy those that we can. In this case there need not be overflow if
       
  4934 certain parts of the pattern were not used, even though there are more
       
  4935 capturing parentheses than vector slots. */
       
  4936 
       
  4937 ENDLOOP:
       
  4938 
       
  4939 if (rc == MATCH_MATCH)
       
  4940   {
       
  4941   if (using_temporary_offsets)
       
  4942     {
       
  4943     if (offsetcount >= 4)
       
  4944       {
       
  4945       memcpy(offsets + 2, md->offset_vector + 2,
       
  4946         (offsetcount - 2) * sizeof(int));
       
  4947       DPRINTF(("Copied offsets from temporary memory\n"));
       
  4948       }
       
  4949     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
       
  4950     DPRINTF(("Freeing temporary memory\n"));
       
  4951     (pcre_free)(md->offset_vector);
       
  4952     }
       
  4953 
       
  4954   /* Set the return code to the number of captured strings, or 0 if there are
       
  4955   too many to fit into the vector. */
       
  4956 
       
  4957   rc = md->offset_overflow? 0 : md->end_offset_top/2;
       
  4958 
       
  4959   /* If there is space, set up the whole thing as substring 0. The value of
       
  4960   md->start_match_ptr might be modified if \K was encountered on the success
       
  4961   matching path. */
       
  4962 
       
  4963   if (offsetcount < 2) rc = 0; else
       
  4964     {
       
  4965     offsets[0] = md->start_match_ptr - md->start_subject;
       
  4966     offsets[1] = md->end_match_ptr - md->start_subject;
       
  4967     }
       
  4968 
       
  4969   DPRINTF((">>>> returning %d\n", rc));
       
  4970   return rc;
       
  4971   }
       
  4972 
       
  4973 /* Control gets here if there has been an error, or if the overall match
       
  4974 attempt has failed at all permitted starting positions. */
       
  4975 
       
  4976 if (using_temporary_offsets)
       
  4977   {
       
  4978   DPRINTF(("Freeing temporary memory\n"));
       
  4979   (pcre_free)(md->offset_vector);
       
  4980   }
       
  4981 
       
  4982 if (rc != MATCH_NOMATCH)
       
  4983   {
       
  4984   DPRINTF((">>>> error: returning %d\n", rc));
       
  4985   return rc;
       
  4986   }
       
  4987 else if (md->partial && md->hitend)
       
  4988   {
       
  4989   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
       
  4990   return PCRE_ERROR_PARTIAL;
       
  4991   }
       
  4992 else
       
  4993   {
       
  4994   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
       
  4995   return PCRE_ERROR_NOMATCH;
       
  4996   }
       
  4997 }
       
  4998 
       
  4999 /* End of pcre_exec.c */