webengine/osswebengine/JavaScriptCore/pcre/pcre_exec.c
changeset 0 dd21522fd290
child 74 91031d3aab7d
equal deleted inserted replaced
-1:000000000000 0:dd21522fd290
       
     1 /*************************************************
       
     2 *      Perl-Compatible Regular Expressions       *
       
     3 *************************************************/
       
     4 
       
     5 /* PCRE is a library of functions to support regular expressions whose syntax
       
     6 and semantics are as close as possible to those of the Perl 5 language.
       
     7 
       
     8                        Written by Philip Hazel
       
     9            Copyright (c) 1997-2005 University of Cambridge
       
    10 
       
    11     Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
       
    12 
       
    13 -----------------------------------------------------------------------------
       
    14 Redistribution and use in source and binary forms, with or without
       
    15 modification, are permitted provided that the following conditions are met:
       
    16 
       
    17     * Redistributions of source code must retain the above copyright notice,
       
    18       this list of conditions and the following disclaimer.
       
    19 
       
    20     * Redistributions in binary form must reproduce the above copyright
       
    21       notice, this list of conditions and the following disclaimer in the
       
    22       documentation and/or other materials provided with the distribution.
       
    23 
       
    24     * Neither the name of the University of Cambridge nor the names of its
       
    25       contributors may be used to endorse or promote products derived from
       
    26       this software without specific prior written permission.
       
    27 
       
    28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
       
    29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
       
    32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       
    33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
       
    34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
       
    35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
       
    36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
       
    37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    38 POSSIBILITY OF SUCH DAMAGE.
       
    39 -----------------------------------------------------------------------------
       
    40 */
       
    41 
       
    42 
       
    43 /* This module contains pcre_exec(), the externally visible function that does
       
    44 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
       
    45 possible. There are also some static supporting functions. */
       
    46 
       
    47 #include "pcre_internal.h"
       
    48 
       
    49 /* Avoid warnings on Windows. */
       
    50 #undef min
       
    51 #undef max
       
    52 
       
    53 /* Structure for building a chain of data that actually lives on the
       
    54 stack, for holding the values of the subject pointer at the start of each
       
    55 subpattern, so as to detect when an empty string has been matched by a
       
    56 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks
       
    57 are on the heap, not on the stack. */
       
    58 
       
    59 typedef struct eptrblock {
       
    60   struct eptrblock *epb_prev;
       
    61   const pcre_uchar *epb_saved_eptr;
       
    62 } eptrblock;
       
    63 
       
    64 /* Flag bits for the match() function */
       
    65 
       
    66 #define match_condassert   0x01    /* Called to check a condition assertion */
       
    67 #define match_isgroup      0x02    /* Set if start of bracketed group */
       
    68 
       
    69 /* Non-error returns from the match() function. Error returns are externally
       
    70 defined PCRE_ERROR_xxx codes, which are all negative. */
       
    71 
       
    72 #define MATCH_MATCH        1
       
    73 #define MATCH_NOMATCH      0
       
    74 
       
    75 /* Maximum number of ints of offset to save on the stack for recursive calls.
       
    76 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
       
    77 because the offset vector is always a multiple of 3 long. */
       
    78 
       
    79 #define REC_STACK_SAVE_MAX 30
       
    80 
       
    81 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
       
    82 
       
    83 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
       
    84 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
       
    85 
       
    86 
       
    87 
       
    88 #ifdef DEBUG
       
    89 /*************************************************
       
    90 *        Debugging function to print chars       *
       
    91 *************************************************/
       
    92 
       
    93 /* Print a sequence of chars in printable format, stopping at the end of the
       
    94 subject if the requested.
       
    95 
       
    96 Arguments:
       
    97   p           points to characters
       
    98   length      number to print
       
    99   is_subject  TRUE if printing from within md->start_subject
       
   100   md          pointer to matching data block, if is_subject is TRUE
       
   101 
       
   102 Returns:     nothing
       
   103 */
       
   104 
       
   105 static void
       
   106 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
       
   107 {
       
   108 int c;
       
   109 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
       
   110 while (length-- > 0)
       
   111   if (isprint(c = *(p++))) printf("%c", c);
       
   112 #if PCRE_UTF16
       
   113   else if (c < 256) printf("\\x%02x", c);
       
   114   else printf("\\x{%x}", c);
       
   115 #else
       
   116   else printf("\\x%02x", c);
       
   117 #endif
       
   118 }
       
   119 #endif
       
   120 
       
   121 
       
   122 
       
   123 /*************************************************
       
   124 *          Match a back-reference                *
       
   125 *************************************************/
       
   126 
       
   127 /* If a back reference hasn't been set, the length that is passed is greater
       
   128 than the number of characters left in the string, so the match fails.
       
   129 
       
   130 Arguments:
       
   131   offset      index into the offset vector
       
   132   eptr        points into the subject
       
   133   length      length to be matched
       
   134   md          points to match data block
       
   135   ims         the ims flags
       
   136 
       
   137 Returns:      TRUE if matched
       
   138 */
       
   139 
       
   140 static BOOL
       
   141 match_ref(int offset, register const pcre_uchar *eptr, int length, match_data *md,
       
   142   unsigned long int ims)
       
   143 {
       
   144 const pcre_uchar *p = md->start_subject + md->offset_vector[offset];
       
   145 
       
   146 #ifdef DEBUG
       
   147 if (eptr >= md->end_subject)
       
   148   printf("matching subject <null>");
       
   149 else
       
   150   {
       
   151   printf("matching subject ");
       
   152   pchars(eptr, length, TRUE, md);
       
   153   }
       
   154 printf(" against backref ");
       
   155 pchars(p, length, FALSE, md);
       
   156 printf("\n");
       
   157 #endif
       
   158 
       
   159 /* Always fail if not enough characters left */
       
   160 
       
   161 if (length > md->end_subject - eptr) return FALSE;
       
   162 
       
   163 /* Separate the caselesss case for speed */
       
   164 
       
   165 if ((ims & PCRE_CASELESS) != 0)
       
   166   {
       
   167   while (length-- > 0)
       
   168     if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
       
   169   }
       
   170 else
       
   171   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
       
   172 
       
   173 return TRUE;
       
   174 }
       
   175 
       
   176 
       
   177 
       
   178 /***************************************************************************
       
   179 ****************************************************************************
       
   180                    RECURSION IN THE match() FUNCTION
       
   181 
       
   182 The match() function is highly recursive. Some regular expressions can cause
       
   183 it to recurse thousands of times. I was writing for Unix, so I just let it
       
   184 call itself recursively. This uses the stack for saving everything that has
       
   185 to be saved for a recursive call. On Unix, the stack can be large, and this
       
   186 works fine.
       
   187 
       
   188 It turns out that on non-Unix systems there are problems with programs that
       
   189 use a lot of stack. (This despite the fact that every last chip has oodles
       
   190 of memory these days, and techniques for extending the stack have been known
       
   191 for decades.) So....
       
   192 
       
   193 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
       
   194 calls by keeping local variables that need to be preserved in blocks of memory
       
   195 obtained from malloc instead instead of on the stack. Macros are used to
       
   196 achieve this so that the actual code doesn't look very different to what it
       
   197 always used to.
       
   198 ****************************************************************************
       
   199 ***************************************************************************/
       
   200 
       
   201 
       
   202 /* These versions of the macros use the stack, as normal */
       
   203 
       
   204 #ifndef NO_RECURSE
       
   205 #define REGISTER register
       
   206 #define RMATCH(num,rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)
       
   207 #define RRETURN(ra) return ra
       
   208 #else
       
   209 
       
   210 
       
   211 /* These versions of the macros manage a private stack on the heap. Note
       
   212 that the rd argument of RMATCH isn't actually used. It's the md argument of
       
   213 match(), which never changes. */
       
   214 
       
   215 #define REGISTER
       
   216 
       
   217 #ifndef __GNUC__
       
   218 
       
   219 /* Use numbered labels and switch statement at the bottom of the match function. */
       
   220 
       
   221 #define RMATCH_WHERE(num) num
       
   222 #define RRETURN_LABEL RRETURN_SWITCH
       
   223 
       
   224 #else
       
   225 
       
   226 /* Use GCC's computed goto extension. */
       
   227 
       
   228 /* For one test case this is more than 40% faster than the switch statement.
       
   229 We could avoid the use of the num argument entirely by using local labels,
       
   230 but using it for the GCC case as well as the non-GCC case allows us to share
       
   231 a bit more code and notice if we use conflicting numbers.*/
       
   232 
       
   233 #define RMATCH_WHERE(num) &&RRETURN_##num
       
   234 #define RRETURN_LABEL *frame->Xwhere
       
   235 
       
   236 #endif
       
   237 
       
   238 
       
   239 #define RMATCH(num,rx,ra,rb,rc,rd,re,rf,rg)\
       
   240   {\
       
   241   heapframe *newframe;\
       
   242   if (frame >= stackframes && frame + 1 < stackframesend)\
       
   243     newframe = frame + 1;\
       
   244   else\
       
   245     newframe = (pcre_stack_malloc)(sizeof(heapframe));\
       
   246   frame->Xwhere = RMATCH_WHERE(num);\
       
   247   newframe->Xeptr = ra;\
       
   248   newframe->Xecode = rb;\
       
   249   newframe->Xoffset_top = rc;\
       
   250   newframe->Xims = re;\
       
   251   newframe->Xeptrb = rf;\
       
   252   newframe->Xflags = rg;\
       
   253   newframe->Xprevframe = frame;\
       
   254   frame = newframe;\
       
   255   DPRINTF(("restarting from line %d\n", __LINE__));\
       
   256   goto HEAP_RECURSE;\
       
   257 RRETURN_##num:\
       
   258   DPRINTF(("did a goto back to line %d\n", __LINE__));\
       
   259   frame = md->thisframe;\
       
   260   rx = frame->Xresult;\
       
   261   }
       
   262 
       
   263 #define RRETURN(ra)\
       
   264   {\
       
   265   heapframe *newframe = frame;\
       
   266   frame = newframe->Xprevframe;\
       
   267   if (!(newframe >= stackframes && newframe < stackframesend))\
       
   268     (pcre_stack_free)(newframe);\
       
   269   if (frame != NULL)\
       
   270     {\
       
   271     frame->Xresult = ra;\
       
   272     md->thisframe = frame;\
       
   273     goto RRETURN_LABEL;\
       
   274     }\
       
   275   return ra;\
       
   276   }
       
   277 
       
   278 /* Structure for remembering the local variables in a private frame */
       
   279 
       
   280 typedef struct heapframe {
       
   281   struct heapframe *Xprevframe;
       
   282 
       
   283   /* Function arguments that may change */
       
   284 
       
   285   const pcre_uchar *Xeptr;
       
   286   const uschar *Xecode;
       
   287   int Xoffset_top;
       
   288   long int Xims;
       
   289   eptrblock *Xeptrb;
       
   290   int Xflags;
       
   291 
       
   292   /* Function local variables */
       
   293 
       
   294   const uschar *Xcallpat;
       
   295   const uschar *Xcharptr;
       
   296   const uschar *Xdata;
       
   297   const uschar *Xnext;
       
   298   const pcre_uchar *Xpp;
       
   299   const uschar *Xprev;
       
   300   const pcre_uchar *Xsaved_eptr;
       
   301 
       
   302   recursion_info Xnew_recursive;
       
   303 
       
   304   BOOL Xcur_is_word;
       
   305   BOOL Xcondition;
       
   306   BOOL Xminimize;
       
   307   BOOL Xprev_is_word;
       
   308 
       
   309   unsigned long int Xoriginal_ims;
       
   310 
       
   311 #ifdef SUPPORT_UCP
       
   312   int Xprop_type;
       
   313   int Xprop_fail_result;
       
   314   int Xprop_category;
       
   315   int Xprop_chartype;
       
   316   int Xprop_othercase;
       
   317   int Xprop_test_against;
       
   318   int *Xprop_test_variable;
       
   319 
       
   320   int Xrepeat_othercase;
       
   321 #endif
       
   322 
       
   323   int Xctype;
       
   324   int Xfc;
       
   325   int Xfi;
       
   326   int Xlength;
       
   327   int Xmax;
       
   328   int Xmin;
       
   329   int Xnumber;
       
   330   int Xoffset;
       
   331   int Xop;
       
   332   int Xsave_capture_last;
       
   333   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
       
   334   int Xstacksave[REC_STACK_SAVE_MAX];
       
   335 
       
   336   eptrblock Xnewptrb;
       
   337 
       
   338   /* Place to pass back result, and where to jump back to */
       
   339 
       
   340   int Xresult;
       
   341 #ifndef __GNUC__
       
   342   int Xwhere;
       
   343 #else
       
   344   void *Xwhere;
       
   345 #endif
       
   346 
       
   347 } heapframe;
       
   348 
       
   349 #endif
       
   350 
       
   351 
       
   352 /***************************************************************************
       
   353 ***************************************************************************/
       
   354 
       
   355 
       
   356 
       
   357 /*************************************************
       
   358 *         Match from current position            *
       
   359 *************************************************/
       
   360 
       
   361 /* On entry ecode points to the first opcode, and eptr to the first character
       
   362 in the subject string, while eptrb holds the value of eptr at the start of the
       
   363 last bracketed group - used for breaking infinite loops matching zero-length
       
   364 strings. This function is called recursively in many circumstances. Whenever it
       
   365 returns a negative (error) response, the outer incarnation must also return the
       
   366 same response.
       
   367 
       
   368 Performance note: It might be tempting to extract commonly used fields from the
       
   369 md structure (e.g. utf8, end_subject) into individual variables to improve
       
   370 performance. Tests using gcc on a SPARC disproved this; in the first case, it
       
   371 made performance worse.
       
   372 
       
   373 Arguments:
       
   374    eptr        pointer in subject
       
   375    ecode       position in code
       
   376    offset_top  current top pointer
       
   377    md          pointer to "static" info for the match
       
   378    ims         current /i, /m, and /s options
       
   379    eptrb       pointer to chain of blocks containing eptr at start of
       
   380                  brackets - for testing for empty matches
       
   381    flags       can contain
       
   382                  match_condassert - this is an assertion condition
       
   383                  match_isgroup - this is the start of a bracketed group
       
   384 
       
   385 Returns:       MATCH_MATCH if matched            )  these values are >= 0
       
   386                MATCH_NOMATCH if failed to match  )
       
   387                a negative PCRE_ERROR_xxx value if aborted by an error condition
       
   388                  (e.g. stopped by recursion limit)
       
   389 */
       
   390 
       
   391 static int
       
   392 match(REGISTER const pcre_uchar *eptr, REGISTER const uschar *ecode,
       
   393   int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
       
   394   int flags)
       
   395 {
       
   396 /* These variables do not need to be preserved over recursion in this function,
       
   397 so they can be ordinary variables in all cases. Mark them with "register"
       
   398 because they are used a lot in loops. */
       
   399 
       
   400 register int  rrc;    /* Returns from recursive calls */
       
   401 register int  i;      /* Used for loops not involving calls to RMATCH() */
       
   402 register int  c;      /* Character values not kept over RMATCH() calls */
       
   403 register BOOL utf8;   /* Local copy of UTF-8 flag for speed */
       
   404 
       
   405 /* When recursion is not being used, all "local" variables that have to be
       
   406 preserved over calls to RMATCH() are part of a "frame" which is obtained from
       
   407 heap storage. Set up the top-level frame here; others are obtained from the
       
   408 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
       
   409 
       
   410 #ifdef NO_RECURSE
       
   411 
       
   412 /* The value 16 here is large enough that most regular expressions don't require
       
   413 any calls to pcre_stack_malloc, yet the amount of stack used for the array is
       
   414 modest enough that we don't run out of stack. */
       
   415 heapframe stackframes[16];
       
   416 heapframe *stackframesend = stackframes + sizeof(stackframes) / sizeof(stackframes[0]);
       
   417 
       
   418 heapframe *frame = stackframes;
       
   419 frame->Xprevframe = NULL;            /* Marks the top level */
       
   420 
       
   421 /* Copy in the original argument variables */
       
   422 
       
   423 frame->Xeptr = eptr;
       
   424 frame->Xecode = ecode;
       
   425 frame->Xoffset_top = offset_top;
       
   426 frame->Xims = ims;
       
   427 frame->Xeptrb = eptrb;
       
   428 frame->Xflags = flags;
       
   429 
       
   430 /* This is where control jumps back to to effect "recursion" */
       
   431 
       
   432 HEAP_RECURSE:
       
   433 
       
   434 /* Macros make the argument variables come from the current frame */
       
   435 
       
   436 #define eptr               frame->Xeptr
       
   437 #define ecode              frame->Xecode
       
   438 #define offset_top         frame->Xoffset_top
       
   439 #define ims                frame->Xims
       
   440 #define eptrb              frame->Xeptrb
       
   441 #define flags              frame->Xflags
       
   442 
       
   443 /* Ditto for the local variables */
       
   444 
       
   445 #ifdef SUPPORT_UTF8
       
   446 #define charptr            frame->Xcharptr
       
   447 #endif
       
   448 #define callpat            frame->Xcallpat
       
   449 #define data               frame->Xdata
       
   450 #define next               frame->Xnext
       
   451 #define pp                 frame->Xpp
       
   452 #define prev               frame->Xprev
       
   453 #define saved_eptr         frame->Xsaved_eptr
       
   454 
       
   455 #define new_recursive      frame->Xnew_recursive
       
   456 
       
   457 #define cur_is_word        frame->Xcur_is_word
       
   458 #define condition          frame->Xcondition
       
   459 #define minimize           frame->Xminimize
       
   460 #define prev_is_word       frame->Xprev_is_word
       
   461 
       
   462 #define original_ims       frame->Xoriginal_ims
       
   463 
       
   464 #ifdef SUPPORT_UCP
       
   465 
       
   466 #define prop_type          frame->Xprop_type
       
   467 #define prop_fail_result   frame->Xprop_fail_result
       
   468 #define prop_category      frame->Xprop_category
       
   469 #define prop_chartype      frame->Xprop_chartype
       
   470 #define prop_othercase     frame->Xprop_othercase
       
   471 #define prop_test_against  frame->Xprop_test_against
       
   472 #define prop_test_variable frame->Xprop_test_variable
       
   473 
       
   474 #define repeat_othercase   frame->Xrepeat_othercase
       
   475 
       
   476 #endif
       
   477 
       
   478 #define ctype              frame->Xctype
       
   479 #define fc                 frame->Xfc
       
   480 #define fi                 frame->Xfi
       
   481 #define length             frame->Xlength
       
   482 #define max                frame->Xmax
       
   483 #define min                frame->Xmin
       
   484 #define number             frame->Xnumber
       
   485 #define offset             frame->Xoffset
       
   486 #define op                 frame->Xop
       
   487 #define save_capture_last  frame->Xsave_capture_last
       
   488 #define save_offset1       frame->Xsave_offset1
       
   489 #define save_offset2       frame->Xsave_offset2
       
   490 #define save_offset3       frame->Xsave_offset3
       
   491 #define stacksave          frame->Xstacksave
       
   492 
       
   493 #define newptrb            frame->Xnewptrb
       
   494 
       
   495 /* When recursion is being used, local variables are allocated on the stack and
       
   496 get preserved during recursion in the normal way. In this environment, fi and
       
   497 i, and fc and c, can be the same variables. */
       
   498 
       
   499 #else
       
   500 #define fi i
       
   501 #define fc c
       
   502 
       
   503 
       
   504 #if !PCRE_UTF16
       
   505 #ifdef SUPPORT_UTF8                /* Many of these variables are used ony */
       
   506 const uschar *charptr;             /* small blocks of the code. My normal  */
       
   507 #endif                             /* style of coding would have declared  */
       
   508 #endif
       
   509 const uschar *callpat;             /* them within each of those blocks.    */
       
   510 const uschar *data;                /* However, in order to accommodate the */
       
   511 const uschar *next;                /* version of this code that uses an    */
       
   512 const pcre_uchar *pp;              /* external "stack" implemented on the  */
       
   513 const uschar *prev;                /* heap, it is easier to declare them   */
       
   514 const pcre_uchar *saved_eptr;      /* all here, so the declarations can    */
       
   515                                    /* be cut out in a block. The only      */
       
   516 recursion_info new_recursive;      /* declarations within blocks below are */
       
   517                                    /* for variables that do not have to    */
       
   518 BOOL cur_is_word;                  /* be preserved over a recursive call   */
       
   519 BOOL condition;                    /* to RMATCH().                         */
       
   520 BOOL minimize;
       
   521 BOOL prev_is_word;
       
   522 
       
   523 unsigned long int original_ims;
       
   524 
       
   525 #ifdef SUPPORT_UCP
       
   526 
       
   527 int prop_type;
       
   528 int prop_fail_result;
       
   529 int prop_category;
       
   530 int prop_chartype;
       
   531 int prop_othercase;
       
   532 int prop_test_against;
       
   533 int *prop_test_variable;
       
   534 
       
   535 int repeat_othercase;
       
   536 
       
   537 #endif
       
   538 
       
   539 int ctype;
       
   540 int length;
       
   541 int max;
       
   542 int min;
       
   543 int number;
       
   544 int offset;
       
   545 int op;
       
   546 int save_capture_last;
       
   547 int save_offset1, save_offset2, save_offset3;
       
   548 int stacksave[REC_STACK_SAVE_MAX];
       
   549 
       
   550 eptrblock newptrb;
       
   551 #endif
       
   552 
       
   553 /* These statements are here to stop the compiler complaining about unitialized
       
   554 variables. */
       
   555 
       
   556 #ifdef SUPPORT_UCP
       
   557 prop_fail_result = 0;
       
   558 prop_test_against = 0;
       
   559 prop_test_variable = NULL;
       
   560 #endif
       
   561 
       
   562 /* OK, now we can get on with the real code of the function. Recursion is
       
   563 specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,
       
   564 these just turn into a recursive call to match() and a "return", respectively.
       
   565 However, RMATCH isn't like a function call because it's quite a complicated
       
   566 macro. It has to be used in one particular way. This shouldn't, however, impact
       
   567 performance when true recursion is being used. */
       
   568 
       
   569 utf8 = md->utf8;       /* Local copy of the flag */
       
   570 
       
   571 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
       
   572 
       
   573 original_ims = ims;    /* Save for resetting on ')' */
       
   574 
       
   575 /* At the start of a bracketed group, add the current subject pointer to the
       
   576 stack of such pointers, to be re-instated at the end of the group when we hit
       
   577 the closing ket. When match() is called in other circumstances, we don't add to
       
   578 this stack. */
       
   579 
       
   580 if ((flags & match_isgroup) != 0)
       
   581   {
       
   582   newptrb.epb_prev = eptrb;
       
   583   newptrb.epb_saved_eptr = eptr;
       
   584   eptrb = &newptrb;
       
   585   }
       
   586 
       
   587 /* Now start processing the operations. */
       
   588 
       
   589 for (;;)
       
   590   {
       
   591   op = *ecode;
       
   592   minimize = FALSE;
       
   593 
       
   594   /* For partial matching, remember if we ever hit the end of the subject after
       
   595   matching at least one subject character. */
       
   596 
       
   597   if (md->partial &&
       
   598       eptr >= md->end_subject &&
       
   599       eptr > md->start_match)
       
   600     md->hitend = TRUE;
       
   601 
       
   602   /* Opening capturing bracket. If there is space in the offset vector, save
       
   603   the current subject position in the working slot at the top of the vector. We
       
   604   mustn't change the current values of the data slot, because they may be set
       
   605   from a previous iteration of this group, and be referred to by a reference
       
   606   inside the group.
       
   607 
       
   608   If the bracket fails to match, we need to restore this value and also the
       
   609   values of the final offsets, in case they were set by a previous iteration of
       
   610   the same bracket.
       
   611 
       
   612   If there isn't enough space in the offset vector, treat this as if it were a
       
   613   non-capturing bracket. Don't worry about setting the flag for the error case
       
   614   here; that is handled in the code for KET. */
       
   615 
       
   616   if (op > OP_BRA)
       
   617     {
       
   618     number = op - OP_BRA;
       
   619 
       
   620     /* For extended extraction brackets (large number), we have to fish out the
       
   621     number from a dummy opcode at the start. */
       
   622 
       
   623     if (number > EXTRACT_BASIC_MAX)
       
   624       number = GET2(ecode, 2+LINK_SIZE);
       
   625     offset = number << 1;
       
   626 
       
   627 #ifdef DEBUG
       
   628     printf("start bracket %d subject=", number);
       
   629     pchars(eptr, 16, TRUE, md);
       
   630     printf("\n");
       
   631 #endif
       
   632 
       
   633     if (offset < md->offset_max)
       
   634       {
       
   635       save_offset1 = md->offset_vector[offset];
       
   636       save_offset2 = md->offset_vector[offset+1];
       
   637       save_offset3 = md->offset_vector[md->offset_end - number];
       
   638       save_capture_last = md->capture_last;
       
   639 
       
   640       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
       
   641       md->offset_vector[md->offset_end - number] = INT_CAST(eptr - md->start_subject);
       
   642 
       
   643       do
       
   644         {
       
   645         RMATCH(1, rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,
       
   646           match_isgroup);
       
   647         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
   648         md->capture_last = save_capture_last;
       
   649         ecode += GET(ecode, 1);
       
   650         }
       
   651       while (*ecode == OP_ALT);
       
   652 
       
   653       DPRINTF(("bracket %d failed\n", number));
       
   654 
       
   655       md->offset_vector[offset] = save_offset1;
       
   656       md->offset_vector[offset+1] = save_offset2;
       
   657       md->offset_vector[md->offset_end - number] = save_offset3;
       
   658 
       
   659       RRETURN(MATCH_NOMATCH);
       
   660       }
       
   661 
       
   662     /* Insufficient room for saving captured contents */
       
   663 
       
   664     else op = OP_BRA;
       
   665     }
       
   666 
       
   667   /* Other types of node can be handled by a switch */
       
   668 
       
   669   switch(op)
       
   670     {
       
   671     case OP_BRA:     /* Non-capturing bracket: optimized */
       
   672     DPRINTF(("start bracket 0\n"));
       
   673     do
       
   674       {
       
   675       RMATCH(2, rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,
       
   676         match_isgroup);
       
   677       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
   678       ecode += GET(ecode, 1);
       
   679       }
       
   680     while (*ecode == OP_ALT);
       
   681     DPRINTF(("bracket 0 failed\n"));
       
   682     RRETURN(MATCH_NOMATCH);
       
   683 
       
   684     /* Conditional group: compilation checked that there are no more than
       
   685     two branches. If the condition is false, skipping the first branch takes us
       
   686     past the end if there is only one branch, but that's OK because that is
       
   687     exactly what going to the ket would do. */
       
   688 
       
   689     case OP_COND:
       
   690     if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */
       
   691       {
       
   692       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
       
   693       condition = (offset == CREF_RECURSE * 2)?
       
   694         (md->recursive != NULL) :
       
   695         (offset < offset_top && md->offset_vector[offset] >= 0);
       
   696       RMATCH(3, rrc, eptr, ecode + (condition?
       
   697         (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),
       
   698         offset_top, md, ims, eptrb, match_isgroup);
       
   699       RRETURN(rrc);
       
   700       }
       
   701 
       
   702     /* The condition is an assertion. Call match() to evaluate it - setting
       
   703     the final argument TRUE causes it to stop at the end of an assertion. */
       
   704 
       
   705     else
       
   706       {
       
   707       RMATCH(4, rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
       
   708           match_condassert | match_isgroup);
       
   709       if (rrc == MATCH_MATCH)
       
   710         {
       
   711         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);
       
   712         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
       
   713         }
       
   714       else if (rrc != MATCH_NOMATCH)
       
   715         {
       
   716         RRETURN(rrc);         /* Need braces because of following else */
       
   717         }
       
   718       else ecode += GET(ecode, 1);
       
   719       RMATCH(5, rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,
       
   720         match_isgroup);
       
   721       RRETURN(rrc);
       
   722       }
       
   723     /* Control never reaches here */
       
   724 
       
   725     /* Skip over conditional reference or large extraction number data if
       
   726     encountered. */
       
   727 
       
   728     case OP_CREF:
       
   729     case OP_BRANUMBER:
       
   730     ecode += 3;
       
   731     break;
       
   732 
       
   733     /* End of the pattern. If we are in a recursion, we should restore the
       
   734     offsets appropriately and continue from after the call. */
       
   735 
       
   736     case OP_END:
       
   737     if (md->recursive != NULL && md->recursive->group_num == 0)
       
   738       {
       
   739       recursion_info *rec = md->recursive;
       
   740       DPRINTF(("Hit the end in a (?0) recursion\n"));
       
   741       md->recursive = rec->prevrec;
       
   742       memmove(md->offset_vector, rec->offset_save,
       
   743         rec->saved_max * sizeof(int));
       
   744       md->start_match = rec->save_start;
       
   745       ims = original_ims;
       
   746       ecode = rec->after_call;
       
   747       break;
       
   748       }
       
   749 
       
   750     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
       
   751     string - backtracking will then try other alternatives, if any. */
       
   752 
       
   753     if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);
       
   754     md->end_match_ptr = eptr;          /* Record where we ended */
       
   755     md->end_offset_top = offset_top;   /* and how many extracts were taken */
       
   756     RRETURN(MATCH_MATCH);
       
   757 
       
   758     /* Change option settings */
       
   759 
       
   760     case OP_OPT:
       
   761     ims = ecode[1];
       
   762     ecode += 2;
       
   763     DPRINTF(("ims set to %02lx\n", ims));
       
   764     break;
       
   765 
       
   766     /* Assertion brackets. Check the alternative branches in turn - the
       
   767     matching won't pass the KET for an assertion. If any one branch matches,
       
   768     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
       
   769     start of each branch to move the current point backwards, so the code at
       
   770     this level is identical to the lookahead case. */
       
   771 
       
   772     case OP_ASSERT:
       
   773     case OP_ASSERTBACK:
       
   774     do
       
   775       {
       
   776       RMATCH(6, rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
       
   777         match_isgroup);
       
   778       if (rrc == MATCH_MATCH) break;
       
   779       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
   780       ecode += GET(ecode, 1);
       
   781       }
       
   782     while (*ecode == OP_ALT);
       
   783     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
       
   784 
       
   785     /* If checking an assertion for a condition, return MATCH_MATCH. */
       
   786 
       
   787     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
       
   788 
       
   789     /* Continue from after the assertion, updating the offsets high water
       
   790     mark, since extracts may have been taken during the assertion. */
       
   791 
       
   792     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
       
   793     ecode += 1 + LINK_SIZE;
       
   794     offset_top = md->end_offset_top;
       
   795     continue;
       
   796 
       
   797     /* Negative assertion: all branches must fail to match */
       
   798 
       
   799     case OP_ASSERT_NOT:
       
   800     case OP_ASSERTBACK_NOT:
       
   801     do
       
   802       {
       
   803       RMATCH(7, rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
       
   804         match_isgroup);
       
   805       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
       
   806       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
   807       ecode += GET(ecode,1);
       
   808       }
       
   809     while (*ecode == OP_ALT);
       
   810 
       
   811     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
       
   812 
       
   813     ecode += 1 + LINK_SIZE;
       
   814     continue;
       
   815 
       
   816     /* Move the subject pointer back. This occurs only at the start of
       
   817     each branch of a lookbehind assertion. If we are too close to the start to
       
   818     move back, this match function fails. When working with UTF-8 we move
       
   819     back a number of characters, not bytes. */
       
   820 
       
   821     case OP_REVERSE:
       
   822 #ifdef SUPPORT_UTF8
       
   823     if (utf8)
       
   824       {
       
   825       c = GET(ecode,1);
       
   826       for (i = 0; i < c; i++)
       
   827         {
       
   828         eptr--;
       
   829         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
       
   830         BACKCHAR(eptr)
       
   831         }
       
   832       }
       
   833     else
       
   834 #endif
       
   835 
       
   836     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
       
   837 
       
   838       {
       
   839       eptr -= GET(ecode,1);
       
   840       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
       
   841       }
       
   842 
       
   843     /* Skip to next op code */
       
   844 
       
   845     ecode += 1 + LINK_SIZE;
       
   846     break;
       
   847 
       
   848     /* The callout item calls an external function, if one is provided, passing
       
   849     details of the match so far. This is mainly for debugging, though the
       
   850     function is able to force a failure. */
       
   851 
       
   852     case OP_CALLOUT:
       
   853     if (pcre_callout != NULL)
       
   854       {
       
   855       pcre_callout_block cb;
       
   856       cb.version          = 1;   /* Version 1 of the callout block */
       
   857       cb.callout_number   = ecode[1];
       
   858       cb.offset_vector    = md->offset_vector;
       
   859       cb.subject          = (const pcre_char *)md->start_subject;
       
   860       cb.subject_length   = INT_CAST(md->end_subject - md->start_subject);
       
   861       cb.start_match      = INT_CAST(md->start_match - md->start_subject);
       
   862       cb.current_position = INT_CAST(eptr - md->start_subject);
       
   863       cb.pattern_position = GET(ecode, 2);
       
   864       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
       
   865       cb.capture_top      = offset_top/2;
       
   866       cb.capture_last     = md->capture_last;
       
   867       cb.callout_data     = md->callout_data;
       
   868       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
       
   869       if (rrc < 0) RRETURN(rrc);
       
   870       }
       
   871     ecode += 2 + 2*LINK_SIZE;
       
   872     break;
       
   873 
       
   874     /* Recursion either matches the current regex, or some subexpression. The
       
   875     offset data is the offset to the starting bracket from the start of the
       
   876     whole pattern. (This is so that it works from duplicated subpatterns.)
       
   877 
       
   878     If there are any capturing brackets started but not finished, we have to
       
   879     save their starting points and reinstate them after the recursion. However,
       
   880     we don't know how many such there are (offset_top records the completed
       
   881     total) so we just have to save all the potential data. There may be up to
       
   882     65535 such values, which is too large to put on the stack, but using malloc
       
   883     for small numbers seems expensive. As a compromise, the stack is used when
       
   884     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
       
   885     is used. A problem is what to do if the malloc fails ... there is no way of
       
   886     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
       
   887     values on the stack, and accept that the rest may be wrong.
       
   888 
       
   889     There are also other values that have to be saved. We use a chained
       
   890     sequence of blocks that actually live on the stack. Thanks to Robin Houston
       
   891     for the original version of this logic. */
       
   892 
       
   893     case OP_RECURSE:
       
   894       {
       
   895       callpat = md->start_code + GET(ecode, 1);
       
   896       new_recursive.group_num = *callpat - OP_BRA;
       
   897 
       
   898       /* For extended extraction brackets (large number), we have to fish out
       
   899       the number from a dummy opcode at the start. */
       
   900 
       
   901       if (new_recursive.group_num > EXTRACT_BASIC_MAX)
       
   902         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);
       
   903 
       
   904       /* Add to "recursing stack" */
       
   905 
       
   906       new_recursive.prevrec = md->recursive;
       
   907       md->recursive = &new_recursive;
       
   908 
       
   909       /* Find where to continue from afterwards */
       
   910 
       
   911       ecode += 1 + LINK_SIZE;
       
   912       new_recursive.after_call = ecode;
       
   913 
       
   914       /* Now save the offset data. */
       
   915 
       
   916       new_recursive.saved_max = md->offset_end;
       
   917       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
       
   918         new_recursive.offset_save = stacksave;
       
   919       else
       
   920         {
       
   921         new_recursive.offset_save =
       
   922           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
       
   923         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
       
   924         }
       
   925 
       
   926       memcpy(new_recursive.offset_save, md->offset_vector,
       
   927             new_recursive.saved_max * sizeof(int));
       
   928       new_recursive.save_start = md->start_match;
       
   929       md->start_match = eptr;
       
   930 
       
   931       /* OK, now we can do the recursion. For each top-level alternative we
       
   932       restore the offset and recursion data. */
       
   933 
       
   934       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
       
   935       do
       
   936         {
       
   937         RMATCH(8, rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,
       
   938             eptrb, match_isgroup);
       
   939         if (rrc == MATCH_MATCH)
       
   940           {
       
   941           md->recursive = new_recursive.prevrec;
       
   942           if (new_recursive.offset_save != stacksave)
       
   943             (pcre_free)(new_recursive.offset_save);
       
   944           RRETURN(MATCH_MATCH);
       
   945           }
       
   946         else if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
   947 
       
   948         md->recursive = &new_recursive;
       
   949         memcpy(md->offset_vector, new_recursive.offset_save,
       
   950             new_recursive.saved_max * sizeof(int));
       
   951         callpat += GET(callpat, 1);
       
   952         }
       
   953       while (*callpat == OP_ALT);
       
   954 
       
   955       DPRINTF(("Recursion didn't match\n"));
       
   956       md->recursive = new_recursive.prevrec;
       
   957       if (new_recursive.offset_save != stacksave)
       
   958         (pcre_free)(new_recursive.offset_save);
       
   959       RRETURN(MATCH_NOMATCH);
       
   960       }
       
   961     /* Control never reaches here */
       
   962 
       
   963     /* "Once" brackets are like assertion brackets except that after a match,
       
   964     the point in the subject string is not moved back. Thus there can never be
       
   965     a move back into the brackets. Friedl calls these "atomic" subpatterns.
       
   966     Check the alternative branches in turn - the matching won't pass the KET
       
   967     for this kind of subpattern. If any one branch matches, we carry on as at
       
   968     the end of a normal bracket, leaving the subject pointer. */
       
   969 
       
   970     case OP_ONCE:
       
   971       {
       
   972       prev = ecode;
       
   973       saved_eptr = eptr;
       
   974 
       
   975       do
       
   976         {
       
   977         RMATCH(9, rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
       
   978           eptrb, match_isgroup);
       
   979         if (rrc == MATCH_MATCH) break;
       
   980         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
   981         ecode += GET(ecode,1);
       
   982         }
       
   983       while (*ecode == OP_ALT);
       
   984 
       
   985       /* If hit the end of the group (which could be repeated), fail */
       
   986 
       
   987       if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
       
   988 
       
   989       /* Continue as from after the assertion, updating the offsets high water
       
   990       mark, since extracts may have been taken. */
       
   991 
       
   992       do ecode += GET(ecode,1); while (*ecode == OP_ALT);
       
   993 
       
   994       offset_top = md->end_offset_top;
       
   995       eptr = md->end_match_ptr;
       
   996 
       
   997       /* For a non-repeating ket, just continue at this level. This also
       
   998       happens for a repeating ket if no characters were matched in the group.
       
   999       This is the forcible breaking of infinite loops as implemented in Perl
       
  1000       5.005. If there is an options reset, it will get obeyed in the normal
       
  1001       course of events. */
       
  1002 
       
  1003       if (*ecode == OP_KET || eptr == saved_eptr)
       
  1004         {
       
  1005         ecode += 1+LINK_SIZE;
       
  1006         break;
       
  1007         }
       
  1008 
       
  1009       /* The repeating kets try the rest of the pattern or restart from the
       
  1010       preceding bracket, in the appropriate order. We need to reset any options
       
  1011       that changed within the bracket before re-running it, so check the next
       
  1012       opcode. */
       
  1013 
       
  1014       if (ecode[1+LINK_SIZE] == OP_OPT)
       
  1015         {
       
  1016         ims = (ims & ~PCRE_IMS) | ecode[4];
       
  1017         DPRINTF(("ims set to %02lx at group repeat\n", ims));
       
  1018         }
       
  1019 
       
  1020       if (*ecode == OP_KETRMIN)
       
  1021         {
       
  1022         RMATCH(10, rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
       
  1023         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1024         RMATCH(11, rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
       
  1025         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1026         }
       
  1027       else  /* OP_KETRMAX */
       
  1028         {
       
  1029         RMATCH(12, rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
       
  1030         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1031         RMATCH(13, rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
       
  1032         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1033         }
       
  1034       }
       
  1035     RRETURN(MATCH_NOMATCH);
       
  1036 
       
  1037     /* An alternation is the end of a branch; scan along to find the end of the
       
  1038     bracketed group and go to there. */
       
  1039 
       
  1040     case OP_ALT:
       
  1041     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
       
  1042     break;
       
  1043 
       
  1044     /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
       
  1045     that it may occur zero times. It may repeat infinitely, or not at all -
       
  1046     i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
       
  1047     repeat limits are compiled as a number of copies, with the optional ones
       
  1048     preceded by BRAZERO or BRAMINZERO. */
       
  1049 
       
  1050     case OP_BRAZERO:
       
  1051       {
       
  1052       next = ecode+1;
       
  1053       RMATCH(14, rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);
       
  1054       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1055       do next += GET(next,1); while (*next == OP_ALT);
       
  1056       ecode = next + 1+LINK_SIZE;
       
  1057       }
       
  1058     break;
       
  1059 
       
  1060     case OP_BRAMINZERO:
       
  1061       {
       
  1062       next = ecode+1;
       
  1063       do next += GET(next,1); while (*next == OP_ALT);
       
  1064       RMATCH(15, rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,
       
  1065         match_isgroup);
       
  1066       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1067       ecode++;
       
  1068       }
       
  1069     break;
       
  1070 
       
  1071     /* End of a group, repeated or non-repeating. If we are at the end of
       
  1072     an assertion "group", stop matching and return MATCH_MATCH, but record the
       
  1073     current high water mark for use by positive assertions. Do this also
       
  1074     for the "once" (not-backup up) groups. */
       
  1075 
       
  1076     case OP_KET:
       
  1077     case OP_KETRMIN:
       
  1078     case OP_KETRMAX:
       
  1079       {
       
  1080       prev = ecode - GET(ecode, 1);
       
  1081       saved_eptr = eptrb->epb_saved_eptr;
       
  1082 
       
  1083       /* Back up the stack of bracket start pointers. */
       
  1084 
       
  1085       eptrb = eptrb->epb_prev;
       
  1086 
       
  1087       if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
       
  1088           *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
       
  1089           *prev == OP_ONCE)
       
  1090         {
       
  1091         md->end_match_ptr = eptr;      /* For ONCE */
       
  1092         md->end_offset_top = offset_top;
       
  1093         RRETURN(MATCH_MATCH);
       
  1094         }
       
  1095 
       
  1096       /* In all other cases except a conditional group we have to check the
       
  1097       group number back at the start and if necessary complete handling an
       
  1098       extraction by setting the offsets and bumping the high water mark. */
       
  1099 
       
  1100       if (*prev != OP_COND)
       
  1101         {
       
  1102         number = *prev - OP_BRA;
       
  1103 
       
  1104         /* For extended extraction brackets (large number), we have to fish out
       
  1105         the number from a dummy opcode at the start. */
       
  1106 
       
  1107         if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);
       
  1108         offset = number << 1;
       
  1109 
       
  1110 #ifdef DEBUG
       
  1111         printf("end bracket %d", number);
       
  1112         printf("\n");
       
  1113 #endif
       
  1114 
       
  1115         /* Test for a numbered group. This includes groups called as a result
       
  1116         of recursion. Note that whole-pattern recursion is coded as a recurse
       
  1117         into group 0, so it won't be picked up here. Instead, we catch it when
       
  1118         the OP_END is reached. */
       
  1119 
       
  1120         if (number > 0)
       
  1121           {
       
  1122           md->capture_last = number;
       
  1123           if (offset >= md->offset_max) md->offset_overflow = TRUE; else
       
  1124             {
       
  1125             md->offset_vector[offset] =
       
  1126               md->offset_vector[md->offset_end - number];
       
  1127             md->offset_vector[offset+1] = INT_CAST(eptr - md->start_subject);
       
  1128             if (offset_top <= offset) offset_top = offset + 2;
       
  1129             }
       
  1130 
       
  1131           /* Handle a recursively called group. Restore the offsets
       
  1132           appropriately and continue from after the call. */
       
  1133 
       
  1134           if (md->recursive != NULL && md->recursive->group_num == number)
       
  1135             {
       
  1136             recursion_info *rec = md->recursive;
       
  1137             DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
       
  1138             md->recursive = rec->prevrec;
       
  1139             md->start_match = rec->save_start;
       
  1140             memcpy(md->offset_vector, rec->offset_save,
       
  1141               rec->saved_max * sizeof(int));
       
  1142             ecode = rec->after_call;
       
  1143             ims = original_ims;
       
  1144             break;
       
  1145             }
       
  1146           }
       
  1147         }
       
  1148 
       
  1149       /* Reset the value of the ims flags, in case they got changed during
       
  1150       the group. */
       
  1151 
       
  1152       ims = original_ims;
       
  1153       DPRINTF(("ims reset to %02lx\n", ims));
       
  1154 
       
  1155       /* For a non-repeating ket, just continue at this level. This also
       
  1156       happens for a repeating ket if no characters were matched in the group.
       
  1157       This is the forcible breaking of infinite loops as implemented in Perl
       
  1158       5.005. If there is an options reset, it will get obeyed in the normal
       
  1159       course of events. */
       
  1160 
       
  1161       if (*ecode == OP_KET || eptr == saved_eptr)
       
  1162         {
       
  1163         ecode += 1 + LINK_SIZE;
       
  1164         break;
       
  1165         }
       
  1166 
       
  1167       /* The repeating kets try the rest of the pattern or restart from the
       
  1168       preceding bracket, in the appropriate order. */
       
  1169 
       
  1170       if (*ecode == OP_KETRMIN)
       
  1171         {
       
  1172         RMATCH(16, rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
       
  1173         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1174         RMATCH(17, rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
       
  1175         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1176         }
       
  1177       else  /* OP_KETRMAX */
       
  1178         {
       
  1179         RMATCH(18, rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
       
  1180         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1181         RMATCH(19, rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
       
  1182         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1183         }
       
  1184       }
       
  1185 
       
  1186     RRETURN(MATCH_NOMATCH);
       
  1187 
       
  1188     /* Start of subject unless notbol, or after internal newline if multiline */
       
  1189 
       
  1190     case OP_CIRC:
       
  1191     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
       
  1192     if ((ims & PCRE_MULTILINE) != 0)
       
  1193       {
       
  1194       if (eptr != md->start_subject && eptr[-1] != NEWLINE)
       
  1195         RRETURN(MATCH_NOMATCH);
       
  1196       ecode++;
       
  1197       break;
       
  1198       }
       
  1199     /* ... else fall through */
       
  1200 
       
  1201     /* Start of subject assertion */
       
  1202 
       
  1203     case OP_SOD:
       
  1204     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
       
  1205     ecode++;
       
  1206     break;
       
  1207 
       
  1208     /* Start of match assertion */
       
  1209 
       
  1210     case OP_SOM:
       
  1211     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
       
  1212     ecode++;
       
  1213     break;
       
  1214 
       
  1215     /* Assert before internal newline if multiline, or before a terminating
       
  1216     newline unless endonly is set, else end of subject unless noteol is set. */
       
  1217 
       
  1218     case OP_DOLL:
       
  1219     if ((ims & PCRE_MULTILINE) != 0)
       
  1220       {
       
  1221       if (eptr < md->end_subject)
       
  1222         { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }
       
  1223       else
       
  1224         { if (md->noteol) RRETURN(MATCH_NOMATCH); }
       
  1225       ecode++;
       
  1226       break;
       
  1227       }
       
  1228     else
       
  1229       {
       
  1230       if (md->noteol) RRETURN(MATCH_NOMATCH);
       
  1231       if (!md->endonly)
       
  1232         {
       
  1233         if (eptr < md->end_subject - 1 ||
       
  1234            (eptr == md->end_subject - 1 && *eptr != NEWLINE))
       
  1235           RRETURN(MATCH_NOMATCH);
       
  1236         ecode++;
       
  1237         break;
       
  1238         }
       
  1239       }
       
  1240     /* ... else fall through */
       
  1241 
       
  1242     /* End of subject assertion (\z) */
       
  1243 
       
  1244     case OP_EOD:
       
  1245     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1246     ecode++;
       
  1247     break;
       
  1248 
       
  1249     /* End of subject or ending \n assertion (\Z) */
       
  1250 
       
  1251     case OP_EODN:
       
  1252     if (eptr < md->end_subject - 1 ||
       
  1253        (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);
       
  1254     ecode++;
       
  1255     break;
       
  1256 
       
  1257     /* Word boundary assertions */
       
  1258 
       
  1259     case OP_NOT_WORD_BOUNDARY:
       
  1260     case OP_WORD_BOUNDARY:
       
  1261       {
       
  1262 
       
  1263       /* Find out if the previous and current characters are "word" characters.
       
  1264       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
       
  1265       be "non-word" characters. */
       
  1266 
       
  1267 #ifdef SUPPORT_UTF8
       
  1268       if (utf8)
       
  1269         {
       
  1270         if (eptr == md->start_subject) prev_is_word = FALSE; else
       
  1271           {
       
  1272           const pcre_uchar *lastptr = eptr - 1;
       
  1273           while(ISMIDCHAR(*lastptr)) lastptr--;
       
  1274           GETCHAR(c, lastptr);
       
  1275           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
       
  1276           }
       
  1277         if (eptr >= md->end_subject) cur_is_word = FALSE; else
       
  1278           {
       
  1279           GETCHAR(c, eptr);
       
  1280           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
       
  1281           }
       
  1282         }
       
  1283       else
       
  1284 #endif
       
  1285 
       
  1286       /* More streamlined when not in UTF-8 mode */
       
  1287 
       
  1288         {
       
  1289         prev_is_word = (eptr != md->start_subject) &&
       
  1290           ((md->ctypes[eptr[-1]] & ctype_word) != 0);
       
  1291         cur_is_word = (eptr < md->end_subject) &&
       
  1292           ((md->ctypes[*eptr] & ctype_word) != 0);
       
  1293         }
       
  1294 
       
  1295       /* Now see if the situation is what we want */
       
  1296 
       
  1297       if ((*ecode++ == OP_WORD_BOUNDARY)?
       
  1298            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
       
  1299         RRETURN(MATCH_NOMATCH);
       
  1300       }
       
  1301     break;
       
  1302 
       
  1303     /* Match a single character type; inline for speed */
       
  1304 
       
  1305     case OP_ANY:
       
  1306     if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)
       
  1307       RRETURN(MATCH_NOMATCH);
       
  1308     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1309 #ifdef SUPPORT_UTF8
       
  1310     if (utf8)
       
  1311       while (eptr < md->end_subject && ISMIDCHAR(*eptr)) eptr++;
       
  1312 #endif
       
  1313     ecode++;
       
  1314     break;
       
  1315 
       
  1316     /* Match a single byte, even in UTF-8 mode. This opcode really does match
       
  1317     any byte, even newline, independent of the setting of PCRE_DOTALL. */
       
  1318 
       
  1319     case OP_ANYBYTE:
       
  1320     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1321     ecode++;
       
  1322     break;
       
  1323 
       
  1324     case OP_NOT_DIGIT:
       
  1325     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1326     GETCHARINCTEST(c, eptr);
       
  1327     if (
       
  1328 #ifdef SUPPORT_UTF8
       
  1329        c < 256 &&
       
  1330 #endif
       
  1331        (md->ctypes[c] & ctype_digit) != 0
       
  1332        )
       
  1333       RRETURN(MATCH_NOMATCH);
       
  1334     ecode++;
       
  1335     break;
       
  1336 
       
  1337     case OP_DIGIT:
       
  1338     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1339     GETCHARINCTEST(c, eptr);
       
  1340     if (
       
  1341 #ifdef SUPPORT_UTF8
       
  1342        c >= 256 ||
       
  1343 #endif
       
  1344        (md->ctypes[c] & ctype_digit) == 0
       
  1345        )
       
  1346       RRETURN(MATCH_NOMATCH);
       
  1347     ecode++;
       
  1348     break;
       
  1349 
       
  1350     case OP_NOT_WHITESPACE:
       
  1351     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1352     GETCHARINCTEST(c, eptr);
       
  1353     if (
       
  1354 #ifdef SUPPORT_UTF8
       
  1355        c < 256 &&
       
  1356 #endif
       
  1357        (md->ctypes[c] & ctype_space) != 0
       
  1358        )
       
  1359       RRETURN(MATCH_NOMATCH);
       
  1360     ecode++;
       
  1361     break;
       
  1362 
       
  1363     case OP_WHITESPACE:
       
  1364     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1365     GETCHARINCTEST(c, eptr);
       
  1366     if (
       
  1367 #ifdef SUPPORT_UTF8
       
  1368        c >= 256 ||
       
  1369 #endif
       
  1370        (md->ctypes[c] & ctype_space) == 0
       
  1371        )
       
  1372       RRETURN(MATCH_NOMATCH);
       
  1373     ecode++;
       
  1374     break;
       
  1375 
       
  1376     case OP_NOT_WORDCHAR:
       
  1377     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1378     GETCHARINCTEST(c, eptr);
       
  1379     if (
       
  1380 #ifdef SUPPORT_UTF8
       
  1381        c < 256 &&
       
  1382 #endif
       
  1383        (md->ctypes[c] & ctype_word) != 0
       
  1384        )
       
  1385       RRETURN(MATCH_NOMATCH);
       
  1386     ecode++;
       
  1387     break;
       
  1388 
       
  1389     case OP_WORDCHAR:
       
  1390     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1391     GETCHARINCTEST(c, eptr);
       
  1392     if (
       
  1393 #ifdef SUPPORT_UTF8
       
  1394        c >= 256 ||
       
  1395 #endif
       
  1396        (md->ctypes[c] & ctype_word) == 0
       
  1397        )
       
  1398       RRETURN(MATCH_NOMATCH);
       
  1399     ecode++;
       
  1400     break;
       
  1401 
       
  1402 #ifdef SUPPORT_UCP
       
  1403     /* Check the next character by Unicode property. We will get here only
       
  1404     if the support is in the binary; otherwise a compile-time error occurs. */
       
  1405 
       
  1406     case OP_PROP:
       
  1407     case OP_NOTPROP:
       
  1408     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1409     GETCHARINCTEST(c, eptr);
       
  1410       {
       
  1411       int chartype, rqdtype;
       
  1412       int othercase;
       
  1413       int category = _pcre_ucp_findchar(c, &chartype, &othercase);
       
  1414 
       
  1415       rqdtype = *(++ecode);
       
  1416       ecode++;
       
  1417 
       
  1418       if (rqdtype >= 128)
       
  1419         {
       
  1420         if ((rqdtype - 128 != category) == (op == OP_PROP))
       
  1421           RRETURN(MATCH_NOMATCH);
       
  1422         }
       
  1423       else
       
  1424         {
       
  1425         if ((rqdtype != chartype) == (op == OP_PROP))
       
  1426           RRETURN(MATCH_NOMATCH);
       
  1427         }
       
  1428       }
       
  1429     break;
       
  1430 
       
  1431     /* Match an extended Unicode sequence. We will get here only if the support
       
  1432     is in the binary; otherwise a compile-time error occurs. */
       
  1433 
       
  1434     case OP_EXTUNI:
       
  1435     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1436     GETCHARINCTEST(c, eptr);
       
  1437       {
       
  1438       int chartype;
       
  1439       int othercase;
       
  1440       int category = _pcre_ucp_findchar(c, &chartype, &othercase);
       
  1441       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
       
  1442       while (eptr < md->end_subject)
       
  1443         {
       
  1444         int len = 1;
       
  1445         if (!utf8) c = *eptr; else
       
  1446           {
       
  1447           GETCHARLEN(c, eptr, len);
       
  1448           }
       
  1449         category = _pcre_ucp_findchar(c, &chartype, &othercase);
       
  1450         if (category != ucp_M) break;
       
  1451         eptr += len;
       
  1452         }
       
  1453       }
       
  1454     ecode++;
       
  1455     break;
       
  1456 #endif
       
  1457 
       
  1458 
       
  1459     /* Match a back reference, possibly repeatedly. Look past the end of the
       
  1460     item to see if there is repeat information following. The code is similar
       
  1461     to that for character classes, but repeated for efficiency. Then obey
       
  1462     similar code to character type repeats - written out again for speed.
       
  1463     However, if the referenced string is the empty string, always treat
       
  1464     it as matched, any number of times (otherwise there could be infinite
       
  1465     loops). */
       
  1466 
       
  1467     case OP_REF:
       
  1468       {
       
  1469       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
       
  1470       ecode += 3;                                 /* Advance past item */
       
  1471 
       
  1472       /* If the reference is unset, set the length to be longer than the amount
       
  1473       of subject left; this ensures that every attempt at a match fails. We
       
  1474       can't just fail here, because of the possibility of quantifiers with zero
       
  1475       minima. */
       
  1476 
       
  1477       length = (offset >= offset_top || md->offset_vector[offset] < 0)?
       
  1478 #if JAVASCRIPT
       
  1479         0 : /* in JavaScript these match the empty string */
       
  1480 #else
       
  1481         INT_CAST(md->end_subject - eptr + 1) :
       
  1482 #endif
       
  1483         md->offset_vector[offset+1] - md->offset_vector[offset];
       
  1484 
       
  1485       /* Set up for repetition, or handle the non-repeated case */
       
  1486 
       
  1487       switch (*ecode)
       
  1488         {
       
  1489         case OP_CRSTAR:
       
  1490         case OP_CRMINSTAR:
       
  1491         case OP_CRPLUS:
       
  1492         case OP_CRMINPLUS:
       
  1493         case OP_CRQUERY:
       
  1494         case OP_CRMINQUERY:
       
  1495         c = *ecode++ - OP_CRSTAR;
       
  1496         minimize = (c & 1) != 0;
       
  1497         min = rep_min[c];                 /* Pick up values from tables; */
       
  1498         max = rep_max[c];                 /* zero for max => infinity */
       
  1499         if (max == 0) max = INT_MAX;
       
  1500         break;
       
  1501 
       
  1502         case OP_CRRANGE:
       
  1503         case OP_CRMINRANGE:
       
  1504         minimize = (*ecode == OP_CRMINRANGE);
       
  1505         min = GET2(ecode, 1);
       
  1506         max = GET2(ecode, 3);
       
  1507         if (max == 0) max = INT_MAX;
       
  1508         ecode += 5;
       
  1509         break;
       
  1510 
       
  1511         default:               /* No repeat follows */
       
  1512         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
       
  1513         eptr += length;
       
  1514         continue;              /* With the main loop */
       
  1515         }
       
  1516 
       
  1517       /* If the length of the reference is zero, just continue with the
       
  1518       main loop. */
       
  1519 
       
  1520       if (length == 0) continue;
       
  1521 
       
  1522       /* First, ensure the minimum number of matches are present. We get back
       
  1523       the length of the reference string explicitly rather than passing the
       
  1524       address of eptr, so that eptr can be a register variable. */
       
  1525 
       
  1526       for (i = 1; i <= min; i++)
       
  1527         {
       
  1528         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
       
  1529         eptr += length;
       
  1530         }
       
  1531 
       
  1532       /* If min = max, continue at the same level without recursion.
       
  1533       They are not both allowed to be zero. */
       
  1534 
       
  1535       if (min == max) continue;
       
  1536 
       
  1537       /* If minimizing, keep trying and advancing the pointer */
       
  1538 
       
  1539       if (minimize)
       
  1540         {
       
  1541         for (fi = min;; fi++)
       
  1542           {
       
  1543           RMATCH(20, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  1544           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1545           if (fi >= max || !match_ref(offset, eptr, length, md, ims))
       
  1546             RRETURN(MATCH_NOMATCH);
       
  1547           eptr += length;
       
  1548           }
       
  1549         /* Control never gets here */
       
  1550         }
       
  1551 
       
  1552       /* If maximizing, find the longest string and work backwards */
       
  1553 
       
  1554       else
       
  1555         {
       
  1556         pp = eptr;
       
  1557         for (i = min; i < max; i++)
       
  1558           {
       
  1559           if (!match_ref(offset, eptr, length, md, ims)) break;
       
  1560           eptr += length;
       
  1561           }
       
  1562         while (eptr >= pp)
       
  1563           {
       
  1564           RMATCH(21, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  1565           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1566           eptr -= length;
       
  1567           }
       
  1568         RRETURN(MATCH_NOMATCH);
       
  1569         }
       
  1570       }
       
  1571     /* Control never gets here */
       
  1572 
       
  1573 
       
  1574 
       
  1575     /* Match a bit-mapped character class, possibly repeatedly. This op code is
       
  1576     used when all the characters in the class have values in the range 0-255,
       
  1577     and either the matching is caseful, or the characters are in the range
       
  1578     0-127 when UTF-8 processing is enabled. The only difference between
       
  1579     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
       
  1580     encountered.
       
  1581 
       
  1582     First, look past the end of the item to see if there is repeat information
       
  1583     following. Then obey similar code to character type repeats - written out
       
  1584     again for speed. */
       
  1585 
       
  1586     case OP_NCLASS:
       
  1587     case OP_CLASS:
       
  1588       {
       
  1589       data = ecode + 1;                /* Save for matching */
       
  1590       ecode += 33;                     /* Advance past the item */
       
  1591 
       
  1592       switch (*ecode)
       
  1593         {
       
  1594         case OP_CRSTAR:
       
  1595         case OP_CRMINSTAR:
       
  1596         case OP_CRPLUS:
       
  1597         case OP_CRMINPLUS:
       
  1598         case OP_CRQUERY:
       
  1599         case OP_CRMINQUERY:
       
  1600         c = *ecode++ - OP_CRSTAR;
       
  1601         minimize = (c & 1) != 0;
       
  1602         min = rep_min[c];                 /* Pick up values from tables; */
       
  1603         max = rep_max[c];                 /* zero for max => infinity */
       
  1604         if (max == 0) max = INT_MAX;
       
  1605         break;
       
  1606 
       
  1607         case OP_CRRANGE:
       
  1608         case OP_CRMINRANGE:
       
  1609         minimize = (*ecode == OP_CRMINRANGE);
       
  1610         min = GET2(ecode, 1);
       
  1611         max = GET2(ecode, 3);
       
  1612         if (max == 0) max = INT_MAX;
       
  1613         ecode += 5;
       
  1614         break;
       
  1615 
       
  1616         default:               /* No repeat follows */
       
  1617         min = max = 1;
       
  1618         break;
       
  1619         }
       
  1620 
       
  1621       /* First, ensure the minimum number of matches are present. */
       
  1622 
       
  1623 #ifdef SUPPORT_UTF8
       
  1624       /* UTF-8 mode */
       
  1625       if (utf8)
       
  1626         {
       
  1627         for (i = 1; i <= min; i++)
       
  1628           {
       
  1629           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1630           GETCHARINC(c, eptr);
       
  1631           if (c > 255)
       
  1632             {
       
  1633             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
       
  1634             }
       
  1635           else
       
  1636             {
       
  1637             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
       
  1638             }
       
  1639           }
       
  1640         }
       
  1641       else
       
  1642 #endif
       
  1643       /* Not UTF-8 mode */
       
  1644         {
       
  1645         for (i = 1; i <= min; i++)
       
  1646           {
       
  1647           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1648           c = *eptr++;
       
  1649           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
       
  1650           }
       
  1651         }
       
  1652 
       
  1653       /* If max == min we can continue with the main loop without the
       
  1654       need to recurse. */
       
  1655 
       
  1656       if (min == max) continue;
       
  1657 
       
  1658       /* If minimizing, keep testing the rest of the expression and advancing
       
  1659       the pointer while it matches the class. */
       
  1660 
       
  1661       if (minimize)
       
  1662         {
       
  1663 #ifdef SUPPORT_UTF8
       
  1664         /* UTF-8 mode */
       
  1665         if (utf8)
       
  1666           {
       
  1667           for (fi = min;; fi++)
       
  1668             {
       
  1669             RMATCH(22, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  1670             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1671             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1672             GETCHARINC(c, eptr);
       
  1673             if (c > 255)
       
  1674               {
       
  1675               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
       
  1676               }
       
  1677             else
       
  1678               {
       
  1679               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
       
  1680               }
       
  1681             }
       
  1682           }
       
  1683         else
       
  1684 #endif
       
  1685         /* Not UTF-8 mode */
       
  1686           {
       
  1687           for (fi = min;; fi++)
       
  1688             {
       
  1689             RMATCH(23, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  1690             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1691             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1692             c = *eptr++;
       
  1693             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
       
  1694             }
       
  1695           }
       
  1696         /* Control never gets here */
       
  1697         }
       
  1698 
       
  1699       /* If maximizing, find the longest possible run, then work backwards. */
       
  1700 
       
  1701       else
       
  1702         {
       
  1703         pp = eptr;
       
  1704 
       
  1705 #ifdef SUPPORT_UTF8
       
  1706         /* UTF-8 mode */
       
  1707         if (utf8)
       
  1708           {
       
  1709           for (i = min; i < max; i++)
       
  1710             {
       
  1711             int len = 1;
       
  1712             if (eptr >= md->end_subject) break;
       
  1713             GETCHARLEN(c, eptr, len);
       
  1714             if (c > 255)
       
  1715               {
       
  1716               if (op == OP_CLASS) break;
       
  1717               }
       
  1718             else
       
  1719               {
       
  1720               if ((data[c/8] & (1 << (c&7))) == 0) break;
       
  1721               }
       
  1722             eptr += len;
       
  1723             }
       
  1724           for (;;)
       
  1725             {
       
  1726             RMATCH(24, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  1727             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1728             if (eptr-- == pp) break;        /* Stop if tried at original pos */
       
  1729             BACKCHAR(eptr);
       
  1730             }
       
  1731           }
       
  1732         else
       
  1733 #endif
       
  1734           /* Not UTF-8 mode */
       
  1735           {
       
  1736           for (i = min; i < max; i++)
       
  1737             {
       
  1738             if (eptr >= md->end_subject) break;
       
  1739             c = *eptr;
       
  1740             if ((data[c/8] & (1 << (c&7))) == 0) break;
       
  1741             eptr++;
       
  1742             }
       
  1743           while (eptr >= pp)
       
  1744             {
       
  1745             RMATCH(25, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  1746             eptr--;
       
  1747             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1748             }
       
  1749           }
       
  1750 
       
  1751         RRETURN(MATCH_NOMATCH);
       
  1752         }
       
  1753       }
       
  1754     /* Control never gets here */
       
  1755 
       
  1756 
       
  1757     /* Match an extended character class. This opcode is encountered only
       
  1758     in UTF-8 mode, because that's the only time it is compiled. */
       
  1759 
       
  1760 #ifdef SUPPORT_UTF8
       
  1761     case OP_XCLASS:
       
  1762       {
       
  1763       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
       
  1764       ecode += GET(ecode, 1);                      /* Advance past the item */
       
  1765 
       
  1766       switch (*ecode)
       
  1767         {
       
  1768         case OP_CRSTAR:
       
  1769         case OP_CRMINSTAR:
       
  1770         case OP_CRPLUS:
       
  1771         case OP_CRMINPLUS:
       
  1772         case OP_CRQUERY:
       
  1773         case OP_CRMINQUERY:
       
  1774         c = *ecode++ - OP_CRSTAR;
       
  1775         minimize = (c & 1) != 0;
       
  1776         min = rep_min[c];                 /* Pick up values from tables; */
       
  1777         max = rep_max[c];                 /* zero for max => infinity */
       
  1778         if (max == 0) max = INT_MAX;
       
  1779         break;
       
  1780 
       
  1781         case OP_CRRANGE:
       
  1782         case OP_CRMINRANGE:
       
  1783         minimize = (*ecode == OP_CRMINRANGE);
       
  1784         min = GET2(ecode, 1);
       
  1785         max = GET2(ecode, 3);
       
  1786         if (max == 0) max = INT_MAX;
       
  1787         ecode += 5;
       
  1788         break;
       
  1789 
       
  1790         default:               /* No repeat follows */
       
  1791         min = max = 1;
       
  1792         break;
       
  1793         }
       
  1794 
       
  1795       /* First, ensure the minimum number of matches are present. */
       
  1796 
       
  1797       for (i = 1; i <= min; i++)
       
  1798         {
       
  1799         if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1800         GETCHARINC(c, eptr);
       
  1801         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
       
  1802         }
       
  1803 
       
  1804       /* If max == min we can continue with the main loop without the
       
  1805       need to recurse. */
       
  1806 
       
  1807       if (min == max) continue;
       
  1808 
       
  1809       /* If minimizing, keep testing the rest of the expression and advancing
       
  1810       the pointer while it matches the class. */
       
  1811 
       
  1812       if (minimize)
       
  1813         {
       
  1814         for (fi = min;; fi++)
       
  1815           {
       
  1816           RMATCH(26, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  1817           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1818           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  1819           GETCHARINC(c, eptr);
       
  1820           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
       
  1821           }
       
  1822         /* Control never gets here */
       
  1823         }
       
  1824 
       
  1825       /* If maximizing, find the longest possible run, then work backwards. */
       
  1826 
       
  1827       else
       
  1828         {
       
  1829         pp = eptr;
       
  1830         for (i = min; i < max; i++)
       
  1831           {
       
  1832           int len = 1;
       
  1833           if (eptr >= md->end_subject) break;
       
  1834           GETCHARLEN(c, eptr, len);
       
  1835           if (!_pcre_xclass(c, data)) break;
       
  1836           eptr += len;
       
  1837           }
       
  1838         for(;;)
       
  1839           {
       
  1840           RMATCH(27, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  1841           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  1842           if (eptr-- == pp) break;        /* Stop if tried at original pos */
       
  1843           BACKCHAR(eptr)
       
  1844           }
       
  1845         RRETURN(MATCH_NOMATCH);
       
  1846         }
       
  1847 
       
  1848       /* Control never gets here */
       
  1849       }
       
  1850 #endif    /* End of XCLASS */
       
  1851 
       
  1852     /* Match a single character, casefully */
       
  1853 
       
  1854     case OP_CHAR:
       
  1855 #ifdef SUPPORT_UTF8
       
  1856     if (utf8)
       
  1857       {
       
  1858       length = 1;
       
  1859       ecode++;
       
  1860       GETUTF8CHARLEN(fc, ecode, length);
       
  1861 #if PCRE_UTF16
       
  1862       {
       
  1863         int dc;
       
  1864         ecode += length;
       
  1865         switch (md->end_subject - eptr)
       
  1866         {
       
  1867           case 0:
       
  1868             RRETURN(MATCH_NOMATCH);
       
  1869           case 1:
       
  1870             dc = *eptr++;
       
  1871             if (IS_LEADING_SURROGATE(dc))
       
  1872               RRETURN(MATCH_NOMATCH);
       
  1873             break;
       
  1874           default:
       
  1875             GETCHARINC(dc, eptr);
       
  1876         }
       
  1877         if (fc != dc) RRETURN(MATCH_NOMATCH);
       
  1878      }  
       
  1879 #else
       
  1880       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
       
  1881       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
       
  1882 #endif
       
  1883       }
       
  1884     else
       
  1885 #endif
       
  1886 
       
  1887     /* Non-UTF-8 mode */
       
  1888       {
       
  1889       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
       
  1890       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
       
  1891       ecode += 2;
       
  1892       }
       
  1893     break;
       
  1894 
       
  1895     /* Match a single character, caselessly */
       
  1896 
       
  1897     case OP_CHARNC:
       
  1898 #ifdef SUPPORT_UTF8
       
  1899     if (utf8)
       
  1900       {
       
  1901       length = 1;
       
  1902       ecode++;
       
  1903       GETUTF8CHARLEN(fc, ecode, length);
       
  1904 
       
  1905 #if PCRE_UTF16
       
  1906       if (md->end_subject - eptr == 0) RRETURN(MATCH_NOMATCH);
       
  1907 #else
       
  1908       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
       
  1909 #endif
       
  1910 
       
  1911       /* If the pattern character's value is < 128, we have only one byte, and
       
  1912       can use the fast lookup table. */
       
  1913 
       
  1914       if (fc < 128)
       
  1915         {
       
  1916 #if PCRE_UTF16
       
  1917         int dc;
       
  1918         ecode++;
       
  1919         dc = *eptr++;
       
  1920         if (dc >= 128 || md->lcc[fc] != md->lcc[dc]) RRETURN(MATCH_NOMATCH);
       
  1921 #else
       
  1922         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
       
  1923 #endif
       
  1924         }
       
  1925 
       
  1926       /* Otherwise we must pick up the subject character */
       
  1927 
       
  1928       else
       
  1929         {
       
  1930         int dc;
       
  1931 #if PCRE_UTF16
       
  1932         if (md->end_subject - eptr == 1) {
       
  1933           dc = *eptr++;
       
  1934           if (IS_LEADING_SURROGATE(dc))
       
  1935             RRETURN(MATCH_NOMATCH);
       
  1936         } else
       
  1937 #endif
       
  1938           GETCHARINC(dc, eptr);
       
  1939         ecode += length;
       
  1940 
       
  1941         /* If we have Unicode property support, we can use it to test the other
       
  1942         case of the character, if there is one. The result of _pcre_ucp_findchar() is
       
  1943         < 0 if the char isn't found, and othercase is returned as zero if there
       
  1944         isn't one. */
       
  1945 
       
  1946         if (fc != dc)
       
  1947           {
       
  1948 #ifdef SUPPORT_UCP
       
  1949           int chartype;
       
  1950           int othercase;
       
  1951           if (_pcre_ucp_findchar(fc, &chartype, &othercase) != ucp_L || dc != othercase)
       
  1952 #endif
       
  1953             RRETURN(MATCH_NOMATCH);
       
  1954           }
       
  1955         }
       
  1956       }
       
  1957     else
       
  1958 #endif   /* SUPPORT_UTF8 */
       
  1959 
       
  1960     /* Non-UTF-8 mode */
       
  1961       {
       
  1962       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
       
  1963       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
       
  1964       ecode += 2;
       
  1965       }
       
  1966     break;
       
  1967 
       
  1968     /* Match a single character repeatedly; different opcodes share code. */
       
  1969 
       
  1970     case OP_EXACT:
       
  1971     min = max = GET2(ecode, 1);
       
  1972     ecode += 3;
       
  1973     goto REPEATCHAR;
       
  1974 
       
  1975     case OP_UPTO:
       
  1976     case OP_MINUPTO:
       
  1977     min = 0;
       
  1978     max = GET2(ecode, 1);
       
  1979     minimize = *ecode == OP_MINUPTO;
       
  1980     ecode += 3;
       
  1981     goto REPEATCHAR;
       
  1982 
       
  1983     case OP_STAR:
       
  1984     case OP_MINSTAR:
       
  1985     case OP_PLUS:
       
  1986     case OP_MINPLUS:
       
  1987     case OP_QUERY:
       
  1988     case OP_MINQUERY:
       
  1989     c = *ecode++ - OP_STAR;
       
  1990     minimize = (c & 1) != 0;
       
  1991     min = rep_min[c];                 /* Pick up values from tables; */
       
  1992     max = rep_max[c];                 /* zero for max => infinity */
       
  1993     if (max == 0) max = INT_MAX;
       
  1994 
       
  1995     /* Common code for all repeated single-character matches. We can give
       
  1996     up quickly if there are fewer than the minimum number of characters left in
       
  1997     the subject. */
       
  1998 
       
  1999     REPEATCHAR:
       
  2000 #ifdef SUPPORT_UTF8
       
  2001 #if PCRE_UTF16
       
  2002 
       
  2003       length = 1;
       
  2004       GETUTF8CHARLEN(fc, ecode, length);
       
  2005       {
       
  2006       if (min * (fc > 0xFFFF ? 2 : 1) > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
       
  2007       ecode += length;
       
  2008 
       
  2009       if (fc <= 0xFFFF)
       
  2010         {
       
  2011         int othercase;
       
  2012         int chartype;
       
  2013         if ((ims & PCRE_CASELESS) == 0 || _pcre_ucp_findchar(fc, &chartype, &othercase) != ucp_L)
       
  2014           othercase = -1; /* Guaranteed to not match any character */
       
  2015 
       
  2016         for (i = 1; i <= min; i++)
       
  2017           {
       
  2018           if (*eptr != fc && *eptr != othercase) RRETURN(MATCH_NOMATCH);
       
  2019           ++eptr;
       
  2020           }
       
  2021 
       
  2022         if (min == max) continue;
       
  2023 
       
  2024         if (minimize)
       
  2025           {
       
  2026           repeat_othercase = othercase;
       
  2027           for (fi = min;; fi++)
       
  2028             {
       
  2029             RMATCH(28, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2030             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2031             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2032             if (*eptr != fc && *eptr != repeat_othercase) RRETURN(MATCH_NOMATCH);
       
  2033             ++eptr;
       
  2034             }
       
  2035           /* Control never gets here */
       
  2036           }
       
  2037         else
       
  2038           {
       
  2039           pp = eptr;
       
  2040           for (i = min; i < max; i++)
       
  2041             {
       
  2042             if (eptr >= md->end_subject) break;
       
  2043             if (*eptr != fc && *eptr != othercase) break;
       
  2044             ++eptr;
       
  2045             }
       
  2046           while (eptr >= pp)
       
  2047            {
       
  2048            RMATCH(29, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2049            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2050            --eptr;
       
  2051            }
       
  2052           RRETURN(MATCH_NOMATCH);
       
  2053           }
       
  2054         /* Control never gets here */
       
  2055         }
       
  2056       else
       
  2057         {
       
  2058         /* No case on surrogate pairs, so no need to bother with "othercase". */
       
  2059 
       
  2060         for (i = 1; i <= min; i++)
       
  2061           {
       
  2062           int nc;
       
  2063           GETCHAR(nc, eptr);
       
  2064           if (nc != fc) RRETURN(MATCH_NOMATCH);
       
  2065           eptr += 2;
       
  2066           }
       
  2067 
       
  2068         if (min == max) continue;
       
  2069 
       
  2070         if (minimize)
       
  2071           {
       
  2072           for (fi = min;; fi++)
       
  2073             {
       
  2074             int nc;
       
  2075             RMATCH(30, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2076             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2077             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2078             GETCHAR(nc, eptr);
       
  2079             if (*eptr != fc) RRETURN(MATCH_NOMATCH);
       
  2080             eptr += 2;
       
  2081             }
       
  2082           /* Control never gets here */
       
  2083           }
       
  2084         else
       
  2085           {
       
  2086           pp = eptr;
       
  2087           for (i = min; i < max; i++)
       
  2088             {
       
  2089             int nc;
       
  2090             if (eptr > md->end_subject - 2) break;
       
  2091             GETCHAR(nc, eptr);
       
  2092             if (*eptr != fc) break;
       
  2093             eptr += 2;
       
  2094             }
       
  2095           while (eptr >= pp)
       
  2096            {
       
  2097            RMATCH(31, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2098            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2099            eptr -= 2;
       
  2100            }
       
  2101           RRETURN(MATCH_NOMATCH);
       
  2102           }
       
  2103           /* Control never gets here */
       
  2104         }
       
  2105         /* Control never gets here */
       
  2106         }
       
  2107 #else
       
  2108     if (utf8)
       
  2109       {
       
  2110       length = 1;
       
  2111       charptr = ecode;
       
  2112       GETCHARLEN(fc, ecode, length);
       
  2113       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
       
  2114       ecode += length;
       
  2115 
       
  2116       /* Handle multibyte character matching specially here. There is
       
  2117       support for caseless matching if UCP support is present. */
       
  2118 
       
  2119       if (length > 1)
       
  2120         {
       
  2121         int oclength = 0;
       
  2122         uschar occhars[8];
       
  2123 
       
  2124 #ifdef SUPPORT_UCP
       
  2125         int othercase;
       
  2126         int chartype;
       
  2127         if ((ims & PCRE_CASELESS) != 0 &&
       
  2128              _pcre_ucp_findchar(fc, &chartype, &othercase) == ucp_L &&
       
  2129              othercase > 0)
       
  2130           oclength = _pcre_ord2utf8(othercase, occhars);
       
  2131 #endif  /* SUPPORT_UCP */
       
  2132 
       
  2133         for (i = 1; i <= min; i++)
       
  2134           {
       
  2135           if (memcmp(eptr, charptr, length) == 0) eptr += length;
       
  2136           /* Need braces because of following else */
       
  2137           else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
       
  2138           else
       
  2139             {
       
  2140             if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
       
  2141             eptr += oclength;
       
  2142             }
       
  2143           }
       
  2144 
       
  2145         if (min == max) continue;
       
  2146 
       
  2147         if (minimize)
       
  2148           {
       
  2149           for (fi = min;; fi++)
       
  2150             {
       
  2151             // FIXME: This could blow away occhars and occlength in the NO_RECURSE case.
       
  2152             RMATCH(32, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2153             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2154             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2155             if (memcmp(eptr, charptr, length) == 0) eptr += length;
       
  2156             /* Need braces because of following else */
       
  2157             else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
       
  2158             else
       
  2159               {
       
  2160               if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
       
  2161               eptr += oclength;
       
  2162               }
       
  2163             }
       
  2164           /* Control never gets here */
       
  2165           }
       
  2166         else
       
  2167           {
       
  2168           pp = eptr;
       
  2169           for (i = min; i < max; i++)
       
  2170             {
       
  2171             if (eptr > md->end_subject - length) break;
       
  2172             if (memcmp(eptr, charptr, length) == 0) eptr += length;
       
  2173             else if (oclength == 0) break;
       
  2174             else
       
  2175               {
       
  2176               if (memcmp(eptr, occhars, oclength) != 0) break;
       
  2177               eptr += oclength;
       
  2178               }
       
  2179             }
       
  2180           while (eptr >= pp)
       
  2181            {
       
  2182            RMATCH(33, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2183            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2184            eptr -= length;
       
  2185            }
       
  2186           RRETURN(MATCH_NOMATCH);
       
  2187           }
       
  2188         /* Control never gets here */
       
  2189         }
       
  2190 
       
  2191       /* If the length of a UTF-8 character is 1, we fall through here, and
       
  2192       obey the code as for non-UTF-8 characters below, though in this case the
       
  2193       value of fc will always be < 128. */
       
  2194       }
       
  2195     else
       
  2196 #endif
       
  2197 #endif  /* SUPPORT_UTF8 */
       
  2198 
       
  2199 #if !PCRE_UTF16
       
  2200     /* When not in UTF-8 mode, load a single-byte character. */
       
  2201       {
       
  2202       if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
       
  2203       fc = *ecode++;
       
  2204       }
       
  2205 
       
  2206     /* The value of fc at this point is always less than 256, though we may or
       
  2207     may not be in UTF-8 mode. The code is duplicated for the caseless and
       
  2208     caseful cases, for speed, since matching characters is likely to be quite
       
  2209     common. First, ensure the minimum number of matches are present. If min =
       
  2210     max, continue at the same level without recursing. Otherwise, if
       
  2211     minimizing, keep trying the rest of the expression and advancing one
       
  2212     matching character if failing, up to the maximum. Alternatively, if
       
  2213     maximizing, find the maximum number of characters and work backwards. */
       
  2214 
       
  2215     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
       
  2216       max, eptr));
       
  2217 
       
  2218     if ((ims & PCRE_CASELESS) != 0)
       
  2219       {
       
  2220       fc = md->lcc[fc];
       
  2221       for (i = 1; i <= min; i++)
       
  2222         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
       
  2223       if (min == max) continue;
       
  2224       if (minimize)
       
  2225         {
       
  2226         for (fi = min;; fi++)
       
  2227           {
       
  2228           RMATCH(34, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2229           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2230           if (fi >= max || eptr >= md->end_subject ||
       
  2231               fc != md->lcc[*eptr++])
       
  2232             RRETURN(MATCH_NOMATCH);
       
  2233           }
       
  2234         /* Control never gets here */
       
  2235         }
       
  2236       else
       
  2237         {
       
  2238         pp = eptr;
       
  2239         for (i = min; i < max; i++)
       
  2240           {
       
  2241           if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
       
  2242           eptr++;
       
  2243           }
       
  2244         while (eptr >= pp)
       
  2245           {
       
  2246           RMATCH(35, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2247           eptr--;
       
  2248           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2249           }
       
  2250         RRETURN(MATCH_NOMATCH);
       
  2251         }
       
  2252       /* Control never gets here */
       
  2253       }
       
  2254 
       
  2255     /* Caseful comparisons (includes all multi-byte characters) */
       
  2256 
       
  2257     else
       
  2258       {
       
  2259       for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
       
  2260       if (min == max) continue;
       
  2261       if (minimize)
       
  2262         {
       
  2263         for (fi = min;; fi++)
       
  2264           {
       
  2265           RMATCH(36, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2266           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2267           if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
       
  2268             RRETURN(MATCH_NOMATCH);
       
  2269           }
       
  2270         /* Control never gets here */
       
  2271         }
       
  2272       else
       
  2273         {
       
  2274         pp = eptr;
       
  2275         for (i = min; i < max; i++)
       
  2276           {
       
  2277           if (eptr >= md->end_subject || fc != *eptr) break;
       
  2278           eptr++;
       
  2279           }
       
  2280         while (eptr >= pp)
       
  2281           {
       
  2282           RMATCH(37, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2283           eptr--;
       
  2284           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2285           }
       
  2286         RRETURN(MATCH_NOMATCH);
       
  2287         }
       
  2288       }
       
  2289     /* Control never gets here */
       
  2290 #endif
       
  2291 
       
  2292     /* Match a negated single one-byte character. The character we are
       
  2293     checking can be multibyte. */
       
  2294 
       
  2295     case OP_NOT:
       
  2296     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2297     ecode++;
       
  2298     GETCHARINCTEST(c, eptr);
       
  2299     if ((ims & PCRE_CASELESS) != 0)
       
  2300       {
       
  2301 #ifdef SUPPORT_UTF8
       
  2302       if (c < 256)
       
  2303 #endif
       
  2304       c = md->lcc[c];
       
  2305       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
       
  2306       }
       
  2307     else
       
  2308       {
       
  2309       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
       
  2310       }
       
  2311     break;
       
  2312 
       
  2313     /* Match a negated single one-byte character repeatedly. This is almost a
       
  2314     repeat of the code for a repeated single character, but I haven't found a
       
  2315     nice way of commoning these up that doesn't require a test of the
       
  2316     positive/negative option for each character match. Maybe that wouldn't add
       
  2317     very much to the time taken, but character matching *is* what this is all
       
  2318     about... */
       
  2319 
       
  2320     case OP_NOTEXACT:
       
  2321     min = max = GET2(ecode, 1);
       
  2322     ecode += 3;
       
  2323     goto REPEATNOTCHAR;
       
  2324 
       
  2325     case OP_NOTUPTO:
       
  2326     case OP_NOTMINUPTO:
       
  2327     min = 0;
       
  2328     max = GET2(ecode, 1);
       
  2329     minimize = *ecode == OP_NOTMINUPTO;
       
  2330     ecode += 3;
       
  2331     goto REPEATNOTCHAR;
       
  2332 
       
  2333     case OP_NOTSTAR:
       
  2334     case OP_NOTMINSTAR:
       
  2335     case OP_NOTPLUS:
       
  2336     case OP_NOTMINPLUS:
       
  2337     case OP_NOTQUERY:
       
  2338     case OP_NOTMINQUERY:
       
  2339     c = *ecode++ - OP_NOTSTAR;
       
  2340     minimize = (c & 1) != 0;
       
  2341     min = rep_min[c];                 /* Pick up values from tables; */
       
  2342     max = rep_max[c];                 /* zero for max => infinity */
       
  2343     if (max == 0) max = INT_MAX;
       
  2344 
       
  2345     /* Common code for all repeated single-byte matches. We can give up quickly
       
  2346     if there are fewer than the minimum number of bytes left in the
       
  2347     subject. */
       
  2348 
       
  2349     REPEATNOTCHAR:
       
  2350     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
       
  2351     fc = *ecode++;
       
  2352 
       
  2353     /* The code is duplicated for the caseless and caseful cases, for speed,
       
  2354     since matching characters is likely to be quite common. First, ensure the
       
  2355     minimum number of matches are present. If min = max, continue at the same
       
  2356     level without recursing. Otherwise, if minimizing, keep trying the rest of
       
  2357     the expression and advancing one matching character if failing, up to the
       
  2358     maximum. Alternatively, if maximizing, find the maximum number of
       
  2359     characters and work backwards. */
       
  2360 
       
  2361 #if PCRE_UTF16
       
  2362     DPRINTF(("negative matching %c{%d,%d}\n", fc, min, max));
       
  2363 #else
       
  2364     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
       
  2365       max, eptr));
       
  2366 #endif
       
  2367 
       
  2368     if ((ims & PCRE_CASELESS) != 0)
       
  2369       {
       
  2370       fc = md->lcc[fc];
       
  2371 
       
  2372 #ifdef SUPPORT_UTF8
       
  2373       /* UTF-8 mode */
       
  2374       if (utf8)
       
  2375         {
       
  2376         register int d;
       
  2377         for (i = 1; i <= min; i++)
       
  2378           {
       
  2379           GETCHARINC(d, eptr);
       
  2380           if (d < 256) d = md->lcc[d];
       
  2381           if (fc == d) RRETURN(MATCH_NOMATCH);
       
  2382           }
       
  2383         }
       
  2384       else
       
  2385 #endif
       
  2386 
       
  2387       /* Not UTF-8 mode */
       
  2388         {
       
  2389         for (i = 1; i <= min; i++)
       
  2390           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
       
  2391         }
       
  2392 
       
  2393       if (min == max) continue;
       
  2394 
       
  2395       if (minimize)
       
  2396         {
       
  2397 #ifdef SUPPORT_UTF8
       
  2398         /* UTF-8 mode */
       
  2399         if (utf8)
       
  2400           {
       
  2401           register int d;
       
  2402           for (fi = min;; fi++)
       
  2403             {
       
  2404             RMATCH(38, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2405             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2406             GETCHARINC(d, eptr);
       
  2407             if (d < 256) d = md->lcc[d];
       
  2408             if (fi >= max || eptr >= md->end_subject || fc == d)
       
  2409               RRETURN(MATCH_NOMATCH);
       
  2410             }
       
  2411           }
       
  2412         else
       
  2413 #endif
       
  2414         /* Not UTF-8 mode */
       
  2415           {
       
  2416           for (fi = min;; fi++)
       
  2417             {
       
  2418             RMATCH(39, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2419             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2420             if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
       
  2421               RRETURN(MATCH_NOMATCH);
       
  2422             }
       
  2423           }
       
  2424         /* Control never gets here */
       
  2425         }
       
  2426 
       
  2427       /* Maximize case */
       
  2428 
       
  2429       else
       
  2430         {
       
  2431         pp = eptr;
       
  2432 
       
  2433 #ifdef SUPPORT_UTF8
       
  2434         /* UTF-8 mode */
       
  2435         if (utf8)
       
  2436           {
       
  2437           register int d;
       
  2438           for (i = min; i < max; i++)
       
  2439             {
       
  2440             int len = 1;
       
  2441             if (eptr >= md->end_subject) break;
       
  2442             GETCHARLEN(d, eptr, len);
       
  2443             if (d < 256) d = md->lcc[d];
       
  2444             if (fc == d) break;
       
  2445             eptr += len;
       
  2446             }
       
  2447           for(;;)
       
  2448             {
       
  2449             RMATCH(40, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2450             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2451             if (eptr-- == pp) break;        /* Stop if tried at original pos */
       
  2452             BACKCHAR(eptr);
       
  2453             }
       
  2454           }
       
  2455         else
       
  2456 #endif
       
  2457         /* Not UTF-8 mode */
       
  2458           {
       
  2459           for (i = min; i < max; i++)
       
  2460             {
       
  2461             if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
       
  2462             eptr++;
       
  2463             }
       
  2464           while (eptr >= pp)
       
  2465             {
       
  2466             RMATCH(41, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2467             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2468             eptr--;
       
  2469             }
       
  2470           }
       
  2471 
       
  2472         RRETURN(MATCH_NOMATCH);
       
  2473         }
       
  2474       /* Control never gets here */
       
  2475       }
       
  2476 
       
  2477     /* Caseful comparisons */
       
  2478 
       
  2479     else
       
  2480       {
       
  2481 #ifdef SUPPORT_UTF8
       
  2482       /* UTF-8 mode */
       
  2483       if (utf8)
       
  2484         {
       
  2485         register int d;
       
  2486         for (i = 1; i <= min; i++)
       
  2487           {
       
  2488           GETCHARINC(d, eptr);
       
  2489           if (fc == d) RRETURN(MATCH_NOMATCH);
       
  2490           }
       
  2491         }
       
  2492       else
       
  2493 #endif
       
  2494       /* Not UTF-8 mode */
       
  2495         {
       
  2496         for (i = 1; i <= min; i++)
       
  2497           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
       
  2498         }
       
  2499 
       
  2500       if (min == max) continue;
       
  2501 
       
  2502       if (minimize)
       
  2503         {
       
  2504 #ifdef SUPPORT_UTF8
       
  2505         /* UTF-8 mode */
       
  2506         if (utf8)
       
  2507           {
       
  2508           register int d;
       
  2509           for (fi = min;; fi++)
       
  2510             {
       
  2511             RMATCH(42, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2512             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2513             GETCHARINC(d, eptr);
       
  2514             if (fi >= max || eptr >= md->end_subject || fc == d)
       
  2515               RRETURN(MATCH_NOMATCH);
       
  2516             }
       
  2517           }
       
  2518         else
       
  2519 #endif
       
  2520         /* Not UTF-8 mode */
       
  2521           {
       
  2522           for (fi = min;; fi++)
       
  2523             {
       
  2524             RMATCH(43, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2525             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2526             if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
       
  2527               RRETURN(MATCH_NOMATCH);
       
  2528             }
       
  2529           }
       
  2530         /* Control never gets here */
       
  2531         }
       
  2532 
       
  2533       /* Maximize case */
       
  2534 
       
  2535       else
       
  2536         {
       
  2537         pp = eptr;
       
  2538 
       
  2539 #ifdef SUPPORT_UTF8
       
  2540         /* UTF-8 mode */
       
  2541         if (utf8)
       
  2542           {
       
  2543           register int d;
       
  2544           for (i = min; i < max; i++)
       
  2545             {
       
  2546             int len = 1;
       
  2547             if (eptr >= md->end_subject) break;
       
  2548             GETCHARLEN(d, eptr, len);
       
  2549             if (fc == d) break;
       
  2550             eptr += len;
       
  2551             }
       
  2552           for(;;)
       
  2553             {
       
  2554             RMATCH(44, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2555             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2556             if (eptr-- == pp) break;        /* Stop if tried at original pos */
       
  2557             BACKCHAR(eptr);
       
  2558             }
       
  2559           }
       
  2560         else
       
  2561 #endif
       
  2562         /* Not UTF-8 mode */
       
  2563           {
       
  2564           for (i = min; i < max; i++)
       
  2565             {
       
  2566             if (eptr >= md->end_subject || fc == *eptr) break;
       
  2567             eptr++;
       
  2568             }
       
  2569           while (eptr >= pp)
       
  2570             {
       
  2571             RMATCH(45, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2572             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2573             eptr--;
       
  2574             }
       
  2575           }
       
  2576 
       
  2577         RRETURN(MATCH_NOMATCH);
       
  2578         }
       
  2579       }
       
  2580     /* Control never gets here */
       
  2581 
       
  2582     /* Match a single character type repeatedly; several different opcodes
       
  2583     share code. This is very similar to the code for single characters, but we
       
  2584     repeat it in the interests of efficiency. */
       
  2585 
       
  2586     case OP_TYPEEXACT:
       
  2587     min = max = GET2(ecode, 1);
       
  2588     minimize = TRUE;
       
  2589     ecode += 3;
       
  2590     goto REPEATTYPE;
       
  2591 
       
  2592     case OP_TYPEUPTO:
       
  2593     case OP_TYPEMINUPTO:
       
  2594     min = 0;
       
  2595     max = GET2(ecode, 1);
       
  2596     minimize = *ecode == OP_TYPEMINUPTO;
       
  2597     ecode += 3;
       
  2598     goto REPEATTYPE;
       
  2599 
       
  2600     case OP_TYPESTAR:
       
  2601     case OP_TYPEMINSTAR:
       
  2602     case OP_TYPEPLUS:
       
  2603     case OP_TYPEMINPLUS:
       
  2604     case OP_TYPEQUERY:
       
  2605     case OP_TYPEMINQUERY:
       
  2606     c = *ecode++ - OP_TYPESTAR;
       
  2607     minimize = (c & 1) != 0;
       
  2608     min = rep_min[c];                 /* Pick up values from tables; */
       
  2609     max = rep_max[c];                 /* zero for max => infinity */
       
  2610     if (max == 0) max = INT_MAX;
       
  2611 
       
  2612     /* Common code for all repeated single character type matches. Note that
       
  2613     in UTF-8 mode, '.' matches a character of any length, but for the other
       
  2614     character types, the valid characters are all one-byte long. */
       
  2615 
       
  2616     REPEATTYPE:
       
  2617     ctype = *ecode++;      /* Code for the character type */
       
  2618 
       
  2619 #ifdef SUPPORT_UCP
       
  2620     if (ctype == OP_PROP || ctype == OP_NOTPROP)
       
  2621       {
       
  2622       prop_fail_result = ctype == OP_NOTPROP;
       
  2623       prop_type = *ecode++;
       
  2624       if (prop_type >= 128)
       
  2625         {
       
  2626         prop_test_against = prop_type - 128;
       
  2627         prop_test_variable = &prop_category;
       
  2628         }
       
  2629       else
       
  2630         {
       
  2631         prop_test_against = prop_type;
       
  2632         prop_test_variable = &prop_chartype;
       
  2633         }
       
  2634       }
       
  2635     else prop_type = -1;
       
  2636 #endif
       
  2637 
       
  2638     /* First, ensure the minimum number of matches are present. Use inline
       
  2639     code for maximizing the speed, and do the type test once at the start
       
  2640     (i.e. keep it out of the loop). Also we can test that there are at least
       
  2641     the minimum number of bytes before we start. This isn't as effective in
       
  2642     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
       
  2643     is tidier. Also separate the UCP code, which can be the same for both UTF-8
       
  2644     and single-bytes. */
       
  2645 
       
  2646     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
       
  2647     if (min > 0)
       
  2648       {
       
  2649 #ifdef SUPPORT_UCP
       
  2650       if (prop_type > 0)
       
  2651         {
       
  2652         for (i = 1; i <= min; i++)
       
  2653           {
       
  2654           GETCHARINC(c, eptr);
       
  2655           prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
       
  2656           if ((*prop_test_variable == prop_test_against) == prop_fail_result)
       
  2657             RRETURN(MATCH_NOMATCH);
       
  2658           }
       
  2659         }
       
  2660 
       
  2661       /* Match extended Unicode sequences. We will get here only if the
       
  2662       support is in the binary; otherwise a compile-time error occurs. */
       
  2663 
       
  2664       else if (ctype == OP_EXTUNI)
       
  2665         {
       
  2666         for (i = 1; i <= min; i++)
       
  2667           {
       
  2668           GETCHARINCTEST(c, eptr);
       
  2669           prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
       
  2670           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
       
  2671           while (eptr < md->end_subject)
       
  2672             {
       
  2673             int len = 1;
       
  2674             if (!utf8) c = *eptr; else
       
  2675               {
       
  2676               GETCHARLEN(c, eptr, len);
       
  2677               }
       
  2678             prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
       
  2679             if (prop_category != ucp_M) break;
       
  2680             eptr += len;
       
  2681             }
       
  2682           }
       
  2683         }
       
  2684 
       
  2685       else
       
  2686 #endif     /* SUPPORT_UCP */
       
  2687 
       
  2688 /* Handle all other cases when the coding is UTF-8 */
       
  2689 
       
  2690 #ifdef SUPPORT_UTF8
       
  2691       if (utf8) switch(ctype)
       
  2692         {
       
  2693         case OP_ANY:
       
  2694         for (i = 1; i <= min; i++)
       
  2695           {
       
  2696           if (eptr >= md->end_subject ||
       
  2697              (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))
       
  2698             RRETURN(MATCH_NOMATCH);
       
  2699           while (eptr < md->end_subject && ISMIDCHAR(*eptr)) eptr++;
       
  2700           }
       
  2701         break;
       
  2702 
       
  2703         case OP_ANYBYTE:
       
  2704         eptr += min;
       
  2705         break;
       
  2706 
       
  2707         case OP_NOT_DIGIT:
       
  2708         for (i = 1; i <= min; i++)
       
  2709           {
       
  2710           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2711           GETCHARINC(c, eptr);
       
  2712           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
       
  2713             RRETURN(MATCH_NOMATCH);
       
  2714           }
       
  2715         break;
       
  2716 
       
  2717         case OP_DIGIT:
       
  2718         for (i = 1; i <= min; i++)
       
  2719           {
       
  2720           if (eptr >= md->end_subject ||
       
  2721              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
       
  2722             RRETURN(MATCH_NOMATCH);
       
  2723           /* No need to skip more bytes - we know it's a 1-byte character */
       
  2724           }
       
  2725         break;
       
  2726 
       
  2727         case OP_NOT_WHITESPACE:
       
  2728         for (i = 1; i <= min; i++)
       
  2729           {
       
  2730           if (eptr >= md->end_subject ||
       
  2731              (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
       
  2732             RRETURN(MATCH_NOMATCH);
       
  2733           while (eptr < md->end_subject && ISMIDCHAR(*eptr)) eptr++;
       
  2734           }
       
  2735         break;
       
  2736 
       
  2737         case OP_WHITESPACE:
       
  2738         for (i = 1; i <= min; i++)
       
  2739           {
       
  2740           if (eptr >= md->end_subject ||
       
  2741              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
       
  2742             RRETURN(MATCH_NOMATCH);
       
  2743           /* No need to skip more bytes - we know it's a 1-byte character */
       
  2744           }
       
  2745         break;
       
  2746 
       
  2747         case OP_NOT_WORDCHAR:
       
  2748         for (i = 1; i <= min; i++)
       
  2749           {
       
  2750           if (eptr >= md->end_subject ||
       
  2751              (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
       
  2752             RRETURN(MATCH_NOMATCH);
       
  2753           while (eptr < md->end_subject && ISMIDCHAR(*eptr)) eptr++;
       
  2754           }
       
  2755         break;
       
  2756 
       
  2757         case OP_WORDCHAR:
       
  2758         for (i = 1; i <= min; i++)
       
  2759           {
       
  2760           if (eptr >= md->end_subject ||
       
  2761              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
       
  2762             RRETURN(MATCH_NOMATCH);
       
  2763           /* No need to skip more bytes - we know it's a 1-byte character */
       
  2764           }
       
  2765         break;
       
  2766 
       
  2767         default:
       
  2768         RRETURN(PCRE_ERROR_INTERNAL);
       
  2769         }  /* End switch(ctype) */
       
  2770 
       
  2771       else
       
  2772 #endif     /* SUPPORT_UTF8 */
       
  2773 
       
  2774       /* Code for the non-UTF-8 case for minimum matching of operators other
       
  2775       than OP_PROP and OP_NOTPROP. */
       
  2776 
       
  2777       switch(ctype)
       
  2778         {
       
  2779         case OP_ANY:
       
  2780         if ((ims & PCRE_DOTALL) == 0)
       
  2781           {
       
  2782           for (i = 1; i <= min; i++)
       
  2783             if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);
       
  2784           }
       
  2785         else eptr += min;
       
  2786         break;
       
  2787 
       
  2788         case OP_ANYBYTE:
       
  2789         eptr += min;
       
  2790         break;
       
  2791 
       
  2792         case OP_NOT_DIGIT:
       
  2793         for (i = 1; i <= min; i++)
       
  2794           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
       
  2795         break;
       
  2796 
       
  2797         case OP_DIGIT:
       
  2798         for (i = 1; i <= min; i++)
       
  2799           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
       
  2800         break;
       
  2801 
       
  2802         case OP_NOT_WHITESPACE:
       
  2803         for (i = 1; i <= min; i++)
       
  2804           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
       
  2805         break;
       
  2806 
       
  2807         case OP_WHITESPACE:
       
  2808         for (i = 1; i <= min; i++)
       
  2809           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
       
  2810         break;
       
  2811 
       
  2812         case OP_NOT_WORDCHAR:
       
  2813         for (i = 1; i <= min; i++)
       
  2814           if ((md->ctypes[*eptr++] & ctype_word) != 0)
       
  2815             RRETURN(MATCH_NOMATCH);
       
  2816         break;
       
  2817 
       
  2818         case OP_WORDCHAR:
       
  2819         for (i = 1; i <= min; i++)
       
  2820           if ((md->ctypes[*eptr++] & ctype_word) == 0)
       
  2821             RRETURN(MATCH_NOMATCH);
       
  2822         break;
       
  2823 
       
  2824         default:
       
  2825         RRETURN(PCRE_ERROR_INTERNAL);
       
  2826         }
       
  2827       }
       
  2828 
       
  2829     /* If min = max, continue at the same level without recursing */
       
  2830 
       
  2831     if (min == max) continue;
       
  2832 
       
  2833     /* If minimizing, we have to test the rest of the pattern before each
       
  2834     subsequent match. Again, separate the UTF-8 case for speed, and also
       
  2835     separate the UCP cases. */
       
  2836 
       
  2837     if (minimize)
       
  2838       {
       
  2839 #ifdef SUPPORT_UCP
       
  2840       if (prop_type > 0)
       
  2841         {
       
  2842         for (fi = min;; fi++)
       
  2843           {
       
  2844           RMATCH(46, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2845           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2846           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2847           GETCHARINC(c, eptr);
       
  2848           prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
       
  2849           if ((*prop_test_variable == prop_test_against) == prop_fail_result)
       
  2850             RRETURN(MATCH_NOMATCH);
       
  2851           }
       
  2852         }
       
  2853 
       
  2854       /* Match extended Unicode sequences. We will get here only if the
       
  2855       support is in the binary; otherwise a compile-time error occurs. */
       
  2856 
       
  2857       else if (ctype == OP_EXTUNI)
       
  2858         {
       
  2859         for (fi = min;; fi++)
       
  2860           {
       
  2861           RMATCH(47, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2862           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2863           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2864           GETCHARINCTEST(c, eptr);
       
  2865           prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
       
  2866           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
       
  2867           while (eptr < md->end_subject)
       
  2868             {
       
  2869             int len = 1;
       
  2870             if (!utf8) c = *eptr; else
       
  2871               {
       
  2872               GETCHARLEN(c, eptr, len);
       
  2873               }
       
  2874             prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
       
  2875             if (prop_category != ucp_M) break;
       
  2876             eptr += len;
       
  2877             }
       
  2878           }
       
  2879         }
       
  2880 
       
  2881       else
       
  2882 #endif     /* SUPPORT_UCP */
       
  2883 
       
  2884 #ifdef SUPPORT_UTF8
       
  2885       /* UTF-8 mode */
       
  2886       if (utf8)
       
  2887         {
       
  2888         for (fi = min;; fi++)
       
  2889           {
       
  2890           RMATCH(48, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2891           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2892           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2893 
       
  2894           GETCHARINC(c, eptr);
       
  2895           switch(ctype)
       
  2896             {
       
  2897             case OP_ANY:
       
  2898             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);
       
  2899             break;
       
  2900 
       
  2901             case OP_ANYBYTE:
       
  2902             break;
       
  2903 
       
  2904             case OP_NOT_DIGIT:
       
  2905             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
       
  2906               RRETURN(MATCH_NOMATCH);
       
  2907             break;
       
  2908 
       
  2909             case OP_DIGIT:
       
  2910             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
       
  2911               RRETURN(MATCH_NOMATCH);
       
  2912             break;
       
  2913 
       
  2914             case OP_NOT_WHITESPACE:
       
  2915             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
       
  2916               RRETURN(MATCH_NOMATCH);
       
  2917             break;
       
  2918 
       
  2919             case OP_WHITESPACE:
       
  2920             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
       
  2921               RRETURN(MATCH_NOMATCH);
       
  2922             break;
       
  2923 
       
  2924             case OP_NOT_WORDCHAR:
       
  2925             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
       
  2926               RRETURN(MATCH_NOMATCH);
       
  2927             break;
       
  2928 
       
  2929             case OP_WORDCHAR:
       
  2930             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
       
  2931               RRETURN(MATCH_NOMATCH);
       
  2932             break;
       
  2933 
       
  2934             default:
       
  2935             RRETURN(PCRE_ERROR_INTERNAL);
       
  2936             }
       
  2937           }
       
  2938         }
       
  2939       else
       
  2940 #endif
       
  2941       /* Not UTF-8 mode */
       
  2942         {
       
  2943         for (fi = min;; fi++)
       
  2944           {
       
  2945           RMATCH(49, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  2946           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  2947           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
       
  2948           c = *eptr++;
       
  2949           switch(ctype)
       
  2950             {
       
  2951             case OP_ANY:
       
  2952             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);
       
  2953             break;
       
  2954 
       
  2955             case OP_ANYBYTE:
       
  2956             break;
       
  2957 
       
  2958             case OP_NOT_DIGIT:
       
  2959             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
       
  2960             break;
       
  2961 
       
  2962             case OP_DIGIT:
       
  2963             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
       
  2964             break;
       
  2965 
       
  2966             case OP_NOT_WHITESPACE:
       
  2967             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
       
  2968             break;
       
  2969 
       
  2970             case OP_WHITESPACE:
       
  2971             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
       
  2972             break;
       
  2973 
       
  2974             case OP_NOT_WORDCHAR:
       
  2975             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
       
  2976             break;
       
  2977 
       
  2978             case OP_WORDCHAR:
       
  2979             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
       
  2980             break;
       
  2981 
       
  2982             default:
       
  2983             RRETURN(PCRE_ERROR_INTERNAL);
       
  2984             }
       
  2985           }
       
  2986         }
       
  2987       /* Control never gets here */
       
  2988       }
       
  2989 
       
  2990     /* If maximizing it is worth using inline code for speed, doing the type
       
  2991     test once at the start (i.e. keep it out of the loop). Again, keep the
       
  2992     UTF-8 and UCP stuff separate. */
       
  2993 
       
  2994     else
       
  2995       {
       
  2996       pp = eptr;  /* Remember where we started */
       
  2997 
       
  2998 #ifdef SUPPORT_UCP
       
  2999       if (prop_type > 0)
       
  3000         {
       
  3001         for (i = min; i < max; i++)
       
  3002           {
       
  3003           int len = 1;
       
  3004           if (eptr >= md->end_subject) break;
       
  3005           GETCHARLEN(c, eptr, len);
       
  3006           prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
       
  3007           if ((*prop_test_variable == prop_test_against) == prop_fail_result)
       
  3008             break;
       
  3009           eptr+= len;
       
  3010           }
       
  3011 
       
  3012         /* eptr is now past the end of the maximum run */
       
  3013 
       
  3014         for(;;)
       
  3015           {
       
  3016           RMATCH(50, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  3017           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  3018           if (eptr-- == pp) break;        /* Stop if tried at original pos */
       
  3019           BACKCHAR(eptr);
       
  3020           }
       
  3021         }
       
  3022 
       
  3023       /* Match extended Unicode sequences. We will get here only if the
       
  3024       support is in the binary; otherwise a compile-time error occurs. */
       
  3025 
       
  3026       else if (ctype == OP_EXTUNI)
       
  3027         {
       
  3028         for (i = min; i < max; i++)
       
  3029           {
       
  3030           if (eptr >= md->end_subject) break;
       
  3031           GETCHARINCTEST(c, eptr);
       
  3032           prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
       
  3033           if (prop_category == ucp_M) break;
       
  3034           while (eptr < md->end_subject)
       
  3035             {
       
  3036             int len = 1;
       
  3037             if (!utf8) c = *eptr; else
       
  3038               {
       
  3039               GETCHARLEN(c, eptr, len);
       
  3040               }
       
  3041             prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
       
  3042             if (prop_category != ucp_M) break;
       
  3043             eptr += len;
       
  3044             }
       
  3045           }
       
  3046 
       
  3047         /* eptr is now past the end of the maximum run */
       
  3048 
       
  3049         for(;;)
       
  3050           {
       
  3051           RMATCH(51, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  3052           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  3053           if (eptr-- == pp) break;        /* Stop if tried at original pos */
       
  3054           for (;;)                        /* Move back over one extended */
       
  3055             {
       
  3056             int len = 1;
       
  3057             BACKCHAR(eptr);
       
  3058             if (!utf8) c = *eptr; else
       
  3059               {
       
  3060               GETCHARLEN(c, eptr, len);
       
  3061               }
       
  3062             prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
       
  3063             if (prop_category != ucp_M) break;
       
  3064             eptr--;
       
  3065             }
       
  3066           }
       
  3067         }
       
  3068 
       
  3069       else
       
  3070 #endif   /* SUPPORT_UCP */
       
  3071 
       
  3072 #ifdef SUPPORT_UTF8
       
  3073       /* UTF-8 mode */
       
  3074 
       
  3075       if (utf8)
       
  3076         {
       
  3077         switch(ctype)
       
  3078           {
       
  3079           case OP_ANY:
       
  3080 
       
  3081           /* Special code is required for UTF8, but when the maximum is unlimited
       
  3082           we don't need it, so we repeat the non-UTF8 code. This is probably
       
  3083           worth it, because .* is quite a common idiom. */
       
  3084 
       
  3085           if (max < INT_MAX)
       
  3086             {
       
  3087             if ((ims & PCRE_DOTALL) == 0)
       
  3088               {
       
  3089               for (i = min; i < max; i++)
       
  3090                 {
       
  3091                 if (eptr >= md->end_subject || *eptr == NEWLINE) break;
       
  3092                 eptr++;
       
  3093                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       
  3094                 }
       
  3095               }
       
  3096             else
       
  3097               {
       
  3098               for (i = min; i < max; i++)
       
  3099                 {
       
  3100                 eptr++;
       
  3101                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       
  3102                 }
       
  3103               }
       
  3104             }
       
  3105 
       
  3106           /* Handle unlimited UTF-8 repeat */
       
  3107 
       
  3108           else
       
  3109             {
       
  3110             if ((ims & PCRE_DOTALL) == 0)
       
  3111               {
       
  3112               for (i = min; i < max; i++)
       
  3113                 {
       
  3114                 if (eptr >= md->end_subject || *eptr == NEWLINE) break;
       
  3115                 eptr++;
       
  3116                 }
       
  3117               break;
       
  3118               }
       
  3119             else
       
  3120               {
       
  3121               c = max - min;
       
  3122               if (c > md->end_subject - eptr) c = INT_CAST(md->end_subject - eptr);
       
  3123               eptr += c;
       
  3124               }
       
  3125             }
       
  3126           break;
       
  3127 
       
  3128           /* The byte case is the same as non-UTF8 */
       
  3129 
       
  3130           case OP_ANYBYTE:
       
  3131           c = max - min;
       
  3132           if (c > md->end_subject - eptr) c = INT_CAST(md->end_subject - eptr);
       
  3133           eptr += c;
       
  3134           break;
       
  3135 
       
  3136           case OP_NOT_DIGIT:
       
  3137           for (i = min; i < max; i++)
       
  3138             {
       
  3139             int len = 1;
       
  3140             if (eptr >= md->end_subject) break;
       
  3141             GETCHARLEN(c, eptr, len);
       
  3142             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
       
  3143             eptr+= len;
       
  3144             }
       
  3145           break;
       
  3146 
       
  3147           case OP_DIGIT:
       
  3148           for (i = min; i < max; i++)
       
  3149             {
       
  3150             int len = 1;
       
  3151             if (eptr >= md->end_subject) break;
       
  3152             GETCHARLEN(c, eptr, len);
       
  3153             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
       
  3154             eptr+= len;
       
  3155             }
       
  3156           break;
       
  3157 
       
  3158           case OP_NOT_WHITESPACE:
       
  3159           for (i = min; i < max; i++)
       
  3160             {
       
  3161             int len = 1;
       
  3162             if (eptr >= md->end_subject) break;
       
  3163             GETCHARLEN(c, eptr, len);
       
  3164             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
       
  3165             eptr+= len;
       
  3166             }
       
  3167           break;
       
  3168 
       
  3169           case OP_WHITESPACE:
       
  3170           for (i = min; i < max; i++)
       
  3171             {
       
  3172             int len = 1;
       
  3173             if (eptr >= md->end_subject) break;
       
  3174             GETCHARLEN(c, eptr, len);
       
  3175             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
       
  3176             eptr+= len;
       
  3177             }
       
  3178           break;
       
  3179 
       
  3180           case OP_NOT_WORDCHAR:
       
  3181           for (i = min; i < max; i++)
       
  3182             {
       
  3183             int len = 1;
       
  3184             if (eptr >= md->end_subject) break;
       
  3185             GETCHARLEN(c, eptr, len);
       
  3186             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
       
  3187             eptr+= len;
       
  3188             }
       
  3189           break;
       
  3190 
       
  3191           case OP_WORDCHAR:
       
  3192           for (i = min; i < max; i++)
       
  3193             {
       
  3194             int len = 1;
       
  3195             if (eptr >= md->end_subject) break;
       
  3196             GETCHARLEN(c, eptr, len);
       
  3197             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
       
  3198             eptr+= len;
       
  3199             }
       
  3200           break;
       
  3201 
       
  3202           default:
       
  3203           RRETURN(PCRE_ERROR_INTERNAL);
       
  3204           }
       
  3205 
       
  3206         /* eptr is now past the end of the maximum run */
       
  3207 
       
  3208         for(;;)
       
  3209           {
       
  3210           RMATCH(52, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  3211           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  3212           if (eptr-- == pp) break;        /* Stop if tried at original pos */
       
  3213           BACKCHAR(eptr);
       
  3214           }
       
  3215         }
       
  3216       else
       
  3217 #endif
       
  3218 
       
  3219       /* Not UTF-8 mode */
       
  3220         {
       
  3221         switch(ctype)
       
  3222           {
       
  3223           case OP_ANY:
       
  3224           if ((ims & PCRE_DOTALL) == 0)
       
  3225             {
       
  3226             for (i = min; i < max; i++)
       
  3227               {
       
  3228               if (eptr >= md->end_subject || *eptr == NEWLINE) break;
       
  3229               eptr++;
       
  3230               }
       
  3231             break;
       
  3232             }
       
  3233           /* For DOTALL case, fall through and treat as \C */
       
  3234 
       
  3235           case OP_ANYBYTE:
       
  3236           c = max - min;
       
  3237           if (c > md->end_subject - eptr) c = INT_CAST(md->end_subject - eptr);
       
  3238           eptr += c;
       
  3239           break;
       
  3240 
       
  3241           case OP_NOT_DIGIT:
       
  3242           for (i = min; i < max; i++)
       
  3243             {
       
  3244             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
       
  3245               break;
       
  3246             eptr++;
       
  3247             }
       
  3248           break;
       
  3249 
       
  3250           case OP_DIGIT:
       
  3251           for (i = min; i < max; i++)
       
  3252             {
       
  3253             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
       
  3254               break;
       
  3255             eptr++;
       
  3256             }
       
  3257           break;
       
  3258 
       
  3259           case OP_NOT_WHITESPACE:
       
  3260           for (i = min; i < max; i++)
       
  3261             {
       
  3262             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
       
  3263               break;
       
  3264             eptr++;
       
  3265             }
       
  3266           break;
       
  3267 
       
  3268           case OP_WHITESPACE:
       
  3269           for (i = min; i < max; i++)
       
  3270             {
       
  3271             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
       
  3272               break;
       
  3273             eptr++;
       
  3274             }
       
  3275           break;
       
  3276 
       
  3277           case OP_NOT_WORDCHAR:
       
  3278           for (i = min; i < max; i++)
       
  3279             {
       
  3280             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
       
  3281               break;
       
  3282             eptr++;
       
  3283             }
       
  3284           break;
       
  3285 
       
  3286           case OP_WORDCHAR:
       
  3287           for (i = min; i < max; i++)
       
  3288             {
       
  3289             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
       
  3290               break;
       
  3291             eptr++;
       
  3292             }
       
  3293           break;
       
  3294 
       
  3295           default:
       
  3296           RRETURN(PCRE_ERROR_INTERNAL);
       
  3297           }
       
  3298 
       
  3299         /* eptr is now past the end of the maximum run */
       
  3300 
       
  3301         while (eptr >= pp)
       
  3302           {
       
  3303           RMATCH(53, rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
       
  3304           eptr--;
       
  3305           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       
  3306           }
       
  3307         }
       
  3308 
       
  3309       /* Get here if we can't make it match with any permitted repetitions */
       
  3310 
       
  3311       RRETURN(MATCH_NOMATCH);
       
  3312       }
       
  3313     /* Control never gets here */
       
  3314 
       
  3315     /* There's been some horrible disaster. Since all codes > OP_BRA are
       
  3316     for capturing brackets, and there shouldn't be any gaps between 0 and
       
  3317     OP_BRA, arrival here can only mean there is something seriously wrong
       
  3318     in the code above or the OP_xxx definitions. */
       
  3319 
       
  3320     default:
       
  3321     DPRINTF(("Unknown opcode %d\n", *ecode));
       
  3322     RRETURN(PCRE_ERROR_UNKNOWN_NODE);
       
  3323     }
       
  3324 
       
  3325   /* Do not stick any code in here without much thought; it is assumed
       
  3326   that "continue" in the code above comes out to here to repeat the main
       
  3327   loop. */
       
  3328 
       
  3329   }             /* End of main loop */
       
  3330 /* Control never reaches here */
       
  3331 
       
  3332 #ifdef NO_RECURSE
       
  3333 #ifndef __GNUC__
       
  3334 
       
  3335 RRETURN_SWITCH:
       
  3336 switch (frame->Xwhere)
       
  3337   {
       
  3338   case 1: goto RRETURN_1;
       
  3339   case 2: goto RRETURN_2;
       
  3340   case 3: goto RRETURN_3;
       
  3341   case 4: goto RRETURN_4;
       
  3342   case 5: goto RRETURN_5;
       
  3343   case 6: goto RRETURN_6;
       
  3344   case 7: goto RRETURN_7;
       
  3345   case 8: goto RRETURN_8;
       
  3346   case 9: goto RRETURN_9;
       
  3347   case 10: goto RRETURN_10;
       
  3348   case 11: goto RRETURN_11;
       
  3349   case 12: goto RRETURN_12;
       
  3350   case 13: goto RRETURN_13;
       
  3351   case 14: goto RRETURN_14;
       
  3352   case 15: goto RRETURN_15;
       
  3353   case 16: goto RRETURN_16;
       
  3354   case 17: goto RRETURN_17;
       
  3355   case 18: goto RRETURN_18;
       
  3356   case 19: goto RRETURN_19;
       
  3357   case 20: goto RRETURN_20;
       
  3358   case 21: goto RRETURN_21;
       
  3359   case 22: goto RRETURN_22;
       
  3360   case 23: goto RRETURN_23;
       
  3361   case 24: goto RRETURN_24;
       
  3362   case 25: goto RRETURN_25;
       
  3363   case 26: goto RRETURN_26;
       
  3364   case 27: goto RRETURN_27;
       
  3365   case 28: goto RRETURN_28;
       
  3366   case 29: goto RRETURN_29;
       
  3367   case 30: goto RRETURN_30;
       
  3368   case 31: goto RRETURN_31;
       
  3369   case 32: goto RRETURN_32;
       
  3370   case 33: goto RRETURN_33;
       
  3371   case 34: goto RRETURN_34;
       
  3372   case 35: goto RRETURN_35;
       
  3373   case 36: goto RRETURN_36;
       
  3374   case 37: goto RRETURN_37;
       
  3375   case 38: goto RRETURN_38;
       
  3376   case 39: goto RRETURN_39;
       
  3377   case 40: goto RRETURN_40;
       
  3378   case 41: goto RRETURN_41;
       
  3379   case 42: goto RRETURN_42;
       
  3380   case 43: goto RRETURN_43;
       
  3381   case 44: goto RRETURN_44;
       
  3382   case 45: goto RRETURN_45;
       
  3383   case 46: goto RRETURN_46;
       
  3384   case 47: goto RRETURN_47;
       
  3385   case 48: goto RRETURN_48;
       
  3386   case 49: goto RRETURN_49;
       
  3387   case 50: goto RRETURN_50;
       
  3388   case 51: goto RRETURN_51;
       
  3389   case 52: goto RRETURN_52;
       
  3390   case 53: goto RRETURN_53;
       
  3391   }
       
  3392 
       
  3393 #if PCRE_UTF16
       
  3394 /* It's safer to have the extra symbols here than to try to ifdef the switch statement above,
       
  3395 because we'll get warnings or errors if we have multiply defined symbols but a runtime failure
       
  3396 if we leave something out of the switch statement. */
       
  3397 RRETURN_32:
       
  3398 RRETURN_33:
       
  3399 RRETURN_34:
       
  3400 RRETURN_35:
       
  3401 RRETURN_36:
       
  3402 RRETURN_37:
       
  3403 #endif
       
  3404 
       
  3405 abort();
       
  3406 return 0;
       
  3407 
       
  3408 #endif
       
  3409 #endif
       
  3410 
       
  3411 }
       
  3412 
       
  3413 
       
  3414 /***************************************************************************
       
  3415 ****************************************************************************
       
  3416                    RECURSION IN THE match() FUNCTION
       
  3417 
       
  3418 Undefine all the macros that were defined above to handle this. */
       
  3419 
       
  3420 #ifdef NO_RECURSE
       
  3421 #undef eptr
       
  3422 #undef ecode
       
  3423 #undef offset_top
       
  3424 #undef ims
       
  3425 #undef eptrb
       
  3426 #undef flags
       
  3427 
       
  3428 #undef callpat
       
  3429 #undef charptr
       
  3430 #undef data
       
  3431 #undef next
       
  3432 #undef pp
       
  3433 #undef prev
       
  3434 #undef saved_eptr
       
  3435 
       
  3436 #undef new_recursive
       
  3437 
       
  3438 #undef cur_is_word
       
  3439 #undef condition
       
  3440 #undef minimize
       
  3441 #undef prev_is_word
       
  3442 
       
  3443 #undef original_ims
       
  3444 
       
  3445 #undef ctype
       
  3446 #undef length
       
  3447 #undef max
       
  3448 #undef min
       
  3449 #undef number
       
  3450 #undef offset
       
  3451 #undef op
       
  3452 #undef save_capture_last
       
  3453 #undef save_offset1
       
  3454 #undef save_offset2
       
  3455 #undef save_offset3
       
  3456 #undef stacksave
       
  3457 
       
  3458 #undef newptrb
       
  3459 
       
  3460 #endif
       
  3461 
       
  3462 /* These two are defined as macros in both cases */
       
  3463 
       
  3464 #undef fc
       
  3465 #undef fi
       
  3466 
       
  3467 /***************************************************************************
       
  3468 ***************************************************************************/
       
  3469 
       
  3470 
       
  3471 
       
  3472 /*************************************************
       
  3473 *         Execute a Regular Expression           *
       
  3474 *************************************************/
       
  3475 
       
  3476 /* This function applies a compiled re to a subject string and picks out
       
  3477 portions of the string if it matches. Two elements in the vector are set for
       
  3478 each substring: the offsets to the start and end of the substring.
       
  3479 
       
  3480 Arguments:
       
  3481   argument_re     points to the compiled expression
       
  3482   extra_data      points to extra data or is NULL
       
  3483   subject         points to the subject string
       
  3484   length          length of subject string (may contain binary zeros)
       
  3485   start_offset    where to start in the subject string
       
  3486   options         option bits
       
  3487   offsets         points to a vector of ints to be filled in with offsets
       
  3488   offsetcount     the number of elements in the vector
       
  3489 
       
  3490 Returns:          > 0 => success; value is the number of elements filled in
       
  3491                   = 0 => success, but offsets is not big enough
       
  3492                    -1 => failed to match
       
  3493                  < -1 => some kind of unexpected problem
       
  3494 */
       
  3495 
       
  3496 PCRE_EXPORT int
       
  3497 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
       
  3498   const pcre_char *subject, int length, int start_offset, int options, int *offsets,
       
  3499   int offsetcount)
       
  3500 {
       
  3501 int rc, resetcount, ocount;
       
  3502 int first_byte = -1;
       
  3503 int req_byte = -1;
       
  3504 int req_byte2 = -1;
       
  3505 unsigned long int ims = 0;
       
  3506 BOOL using_temporary_offsets = FALSE;
       
  3507 BOOL anchored;
       
  3508 BOOL startline;
       
  3509 BOOL firstline;
       
  3510 BOOL first_byte_caseless = FALSE;
       
  3511 BOOL req_byte_caseless = FALSE;
       
  3512 match_data match_block;
       
  3513 const uschar *tables;
       
  3514 const uschar *start_bits = NULL;
       
  3515 const pcre_uchar *start_match = (const pcre_uchar *)subject + start_offset;
       
  3516 const pcre_uchar *end_subject;
       
  3517 const pcre_uchar *req_byte_ptr = start_match - 1;
       
  3518 
       
  3519 pcre_study_data internal_study;
       
  3520 const pcre_study_data *study;
       
  3521 
       
  3522 real_pcre internal_re;
       
  3523 const real_pcre *external_re = (const real_pcre *)argument_re;
       
  3524 const real_pcre *re = external_re;
       
  3525 
       
  3526 /* Plausibility checks */
       
  3527 
       
  3528 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
       
  3529 if (re == NULL || subject == NULL ||
       
  3530    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
       
  3531 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
       
  3532 
       
  3533 /* Fish out the optional data from the extra_data structure, first setting
       
  3534 the default values. */
       
  3535 
       
  3536 study = NULL;
       
  3537 match_block.match_limit = MATCH_LIMIT;
       
  3538 match_block.callout_data = NULL;
       
  3539 
       
  3540 /* The table pointer is always in native byte order. */
       
  3541 
       
  3542 tables = external_re->tables;
       
  3543 
       
  3544 if (extra_data != NULL)
       
  3545   {
       
  3546   register unsigned long flags = extra_data->flags;
       
  3547   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
       
  3548     study = (const pcre_study_data *)extra_data->study_data;
       
  3549   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
       
  3550     match_block.match_limit = extra_data->match_limit;
       
  3551   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
       
  3552     match_block.callout_data = extra_data->callout_data;
       
  3553   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
       
  3554   }
       
  3555 
       
  3556 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
       
  3557 is a feature that makes it possible to save compiled regex and re-use them
       
  3558 in other programs later. */
       
  3559 
       
  3560 if (tables == NULL) tables = _pcre_default_tables;
       
  3561 
       
  3562 /* Check that the first field in the block is the magic number. If it is not,
       
  3563 test for a regex that was compiled on a host of opposite endianness. If this is
       
  3564 the case, flipped values are put in internal_re and internal_study if there was
       
  3565 study data too. */
       
  3566 
       
  3567 if (re->magic_number != MAGIC_NUMBER)
       
  3568   {
       
  3569   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
       
  3570   if (re == NULL) return PCRE_ERROR_BADMAGIC;
       
  3571   if (study != NULL) study = &internal_study;
       
  3572   }
       
  3573 
       
  3574 /* Set up other data */
       
  3575 
       
  3576 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
       
  3577 startline = (re->options & PCRE_STARTLINE) != 0;
       
  3578 firstline = (re->options & PCRE_FIRSTLINE) != 0;
       
  3579 
       
  3580 /* The code starts after the real_pcre block and the capture name table. */
       
  3581 
       
  3582 match_block.start_code = (const uschar *)external_re + re->name_table_offset +
       
  3583   re->name_count * re->name_entry_size;
       
  3584 
       
  3585 match_block.start_subject = (const pcre_uchar *)subject;
       
  3586 match_block.start_offset = start_offset;
       
  3587 match_block.end_subject = match_block.start_subject + length;
       
  3588 end_subject = match_block.end_subject;
       
  3589 
       
  3590 match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
       
  3591 match_block.utf8 = (re->options & PCRE_UTF8) != 0;
       
  3592 
       
  3593 match_block.notbol = (options & PCRE_NOTBOL) != 0;
       
  3594 match_block.noteol = (options & PCRE_NOTEOL) != 0;
       
  3595 match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
       
  3596 match_block.partial = (options & PCRE_PARTIAL) != 0;
       
  3597 match_block.hitend = FALSE;
       
  3598 
       
  3599 match_block.recursive = NULL;                   /* No recursion at top level */
       
  3600 
       
  3601 match_block.lcc = tables + lcc_offset;
       
  3602 match_block.ctypes = tables + ctypes_offset;
       
  3603 
       
  3604 /* Partial matching is supported only for a restricted set of regexes at the
       
  3605 moment. */
       
  3606 
       
  3607 if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)
       
  3608   return PCRE_ERROR_BADPARTIAL;
       
  3609 
       
  3610 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
       
  3611 back the character offset. */
       
  3612 
       
  3613 #if !PCRE_UTF16
       
  3614 #ifdef SUPPORT_UTF8
       
  3615 if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
       
  3616   {
       
  3617   if (_pcre_valid_utf8((pcre_uchar *)subject, length) >= 0)
       
  3618     return PCRE_ERROR_BADUTF8;
       
  3619   if (start_offset > 0 && start_offset < length)
       
  3620     {
       
  3621     int tb = ((pcre_uchar *)subject)[start_offset];
       
  3622     if (tb > 127)
       
  3623       {
       
  3624       tb &= 0xc0;
       
  3625       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
       
  3626       }
       
  3627     }
       
  3628   }
       
  3629 #endif
       
  3630 #endif
       
  3631 
       
  3632 /* The ims options can vary during the matching as a result of the presence
       
  3633 of (?ims) items in the pattern. They are kept in a local variable so that
       
  3634 restoring at the exit of a group is easy. */
       
  3635 
       
  3636 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
       
  3637 
       
  3638 /* If the expression has got more back references than the offsets supplied can
       
  3639 hold, we get a temporary chunk of working store to use during the matching.
       
  3640 Otherwise, we can use the vector supplied, rounding down its size to a multiple
       
  3641 of 3. */
       
  3642 
       
  3643 ocount = offsetcount - (offsetcount % 3);
       
  3644 
       
  3645 if (re->top_backref > 0 && re->top_backref >= ocount/3)
       
  3646   {
       
  3647   ocount = re->top_backref * 3 + 3;
       
  3648   match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
       
  3649   if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
       
  3650   using_temporary_offsets = TRUE;
       
  3651   DPRINTF(("Got memory to hold back references\n"));
       
  3652   }
       
  3653 else match_block.offset_vector = offsets;
       
  3654 
       
  3655 match_block.offset_end = ocount;
       
  3656 match_block.offset_max = (2*ocount)/3;
       
  3657 match_block.offset_overflow = FALSE;
       
  3658 match_block.capture_last = -1;
       
  3659 
       
  3660 /* Compute the minimum number of offsets that we need to reset each time. Doing
       
  3661 this makes a huge difference to execution time when there aren't many brackets
       
  3662 in the pattern. */
       
  3663 
       
  3664 resetcount = 2 + re->top_bracket * 2;
       
  3665 if (resetcount > offsetcount) resetcount = ocount;
       
  3666 
       
  3667 /* Reset the working variable associated with each extraction. These should
       
  3668 never be used unless previously set, but they get saved and restored, and so we
       
  3669 initialize them to avoid reading uninitialized locations. */
       
  3670 
       
  3671 if (match_block.offset_vector != NULL)
       
  3672   {
       
  3673   register int *iptr = match_block.offset_vector + ocount;
       
  3674   register int *iend = iptr - resetcount/2 + 1;
       
  3675   while (--iptr >= iend) *iptr = -1;
       
  3676   }
       
  3677 
       
  3678 /* Set up the first character to match, if available. The first_byte value is
       
  3679 never set for an anchored regular expression, but the anchoring may be forced
       
  3680 at run time, so we have to test for anchoring. The first char may be unset for
       
  3681 an unanchored pattern, of course. If there's no first char and the pattern was
       
  3682 studied, there may be a bitmap of possible first characters. */
       
  3683 
       
  3684 if (!anchored)
       
  3685   {
       
  3686   if ((re->options & PCRE_FIRSTSET) != 0)
       
  3687     {
       
  3688     first_byte = re->first_byte & 255;
       
  3689     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
       
  3690       first_byte = match_block.lcc[first_byte];
       
  3691     }
       
  3692   else
       
  3693     if (!startline && study != NULL &&
       
  3694       (study->options & PCRE_STUDY_MAPPED) != 0)
       
  3695         start_bits = study->start_bits;
       
  3696   }
       
  3697 
       
  3698 /* For anchored or unanchored matches, there may be a "last known required
       
  3699 character" set. */
       
  3700 
       
  3701 if ((re->options & PCRE_REQCHSET) != 0)
       
  3702   {
       
  3703   req_byte = re->req_byte & 255;
       
  3704   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
       
  3705   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
       
  3706   }
       
  3707 
       
  3708 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
       
  3709 the loop runs just once. */
       
  3710 
       
  3711 do
       
  3712   {
       
  3713   const pcre_uchar *save_end_subject = end_subject;
       
  3714 
       
  3715   /* Reset the maximum number of extractions we might see. */
       
  3716 
       
  3717   if (match_block.offset_vector != NULL)
       
  3718     {
       
  3719     register int *iptr = match_block.offset_vector;
       
  3720     register int *iend = iptr + resetcount;
       
  3721     while (iptr < iend) *iptr++ = -1;
       
  3722     }
       
  3723 
       
  3724   /* Advance to a unique first char if possible. If firstline is TRUE, the
       
  3725   start of the match is constrained to the first line of a multiline string.
       
  3726   Implement this by temporarily adjusting end_subject so that we stop scanning
       
  3727   at a newline. If the match fails at the newline, later code breaks this loop.
       
  3728   */
       
  3729 
       
  3730   if (firstline)
       
  3731     {
       
  3732     const pcre_uchar *t = start_match;
       
  3733     while (t < save_end_subject && *t != '\n') t++;
       
  3734     end_subject = t;
       
  3735     }
       
  3736 
       
  3737   /* Now test for a unique first byte */
       
  3738 
       
  3739   if (first_byte >= 0)
       
  3740     {
       
  3741     pcre_uchar first_char = first_byte;
       
  3742     if (first_byte_caseless)
       
  3743       while (start_match < end_subject)
       
  3744         {
       
  3745         int sm = *start_match;
       
  3746 #if PCRE_UTF16
       
  3747         if (sm > 127)
       
  3748           break;
       
  3749 #endif
       
  3750         if (match_block.lcc[sm] == first_char)
       
  3751           break;
       
  3752         start_match++;
       
  3753         }
       
  3754     else
       
  3755       while (start_match < end_subject && *start_match != first_char)
       
  3756         start_match++;
       
  3757     }
       
  3758 
       
  3759   /* Or to just after \n for a multiline match if possible */
       
  3760 
       
  3761   else if (startline)
       
  3762     {
       
  3763     if (start_match > match_block.start_subject + start_offset)
       
  3764       {
       
  3765       while (start_match < end_subject && start_match[-1] != NEWLINE)
       
  3766         start_match++;
       
  3767       }
       
  3768     }
       
  3769 
       
  3770   /* Or to a non-unique first char after study */
       
  3771 
       
  3772   else if (start_bits != NULL)
       
  3773     {
       
  3774     while (start_match < end_subject)
       
  3775       {
       
  3776       register unsigned int c = *start_match;
       
  3777       if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
       
  3778       }
       
  3779     }
       
  3780 
       
  3781   /* Restore fudged end_subject */
       
  3782 
       
  3783   end_subject = save_end_subject;
       
  3784 
       
  3785 #ifdef DEBUG  /* Sigh. Some compilers never learn. */
       
  3786   printf(">>>> Match against: ");
       
  3787   pchars(start_match, end_subject - start_match, TRUE, &match_block);
       
  3788   printf("\n");
       
  3789 #endif
       
  3790 
       
  3791   /* If req_byte is set, we know that that character must appear in the subject
       
  3792   for the match to succeed. If the first character is set, req_byte must be
       
  3793   later in the subject; otherwise the test starts at the match point. This
       
  3794   optimization can save a huge amount of backtracking in patterns with nested
       
  3795   unlimited repeats that aren't going to match. Writing separate code for
       
  3796   cased/caseless versions makes it go faster, as does using an autoincrement
       
  3797   and backing off on a match.
       
  3798 
       
  3799   HOWEVER: when the subject string is very, very long, searching to its end can
       
  3800   take a long time, and give bad performance on quite ordinary patterns. This
       
  3801   showed up when somebody was matching /^C/ on a 32-megabyte string... so we
       
  3802   don't do this when the string is sufficiently long.
       
  3803 
       
  3804   ALSO: this processing is disabled when partial matching is requested.
       
  3805   */
       
  3806 
       
  3807   if (req_byte >= 0 &&
       
  3808       end_subject - start_match < REQ_BYTE_MAX &&
       
  3809       !match_block.partial)
       
  3810     {
       
  3811     register const pcre_uchar *p = start_match + ((first_byte >= 0)? 1 : 0);
       
  3812 
       
  3813     /* We don't need to repeat the search if we haven't yet reached the
       
  3814     place we found it at last time. */
       
  3815 
       
  3816     if (p > req_byte_ptr)
       
  3817       {
       
  3818       if (req_byte_caseless)
       
  3819         {
       
  3820         while (p < end_subject)
       
  3821           {
       
  3822           register int pp = *p++;
       
  3823           if (pp == req_byte || pp == req_byte2) { p--; break; }
       
  3824           }
       
  3825         }
       
  3826       else
       
  3827         {
       
  3828         while (p < end_subject)
       
  3829           {
       
  3830           if (*p++ == req_byte) { p--; break; }
       
  3831           }
       
  3832         }
       
  3833 
       
  3834       /* If we can't find the required character, break the matching loop */
       
  3835 
       
  3836       if (p >= end_subject) break;
       
  3837 
       
  3838       /* If we have found the required character, save the point where we
       
  3839       found it, so that we don't search again next time round the loop if
       
  3840       the start hasn't passed this character yet. */
       
  3841 
       
  3842       req_byte_ptr = p;
       
  3843       }
       
  3844     }
       
  3845 
       
  3846   /* When a match occurs, substrings will be set for all internal extractions;
       
  3847   we just need to set up the whole thing as substring 0 before returning. If
       
  3848   there were too many extractions, set the return code to zero. In the case
       
  3849   where we had to get some local store to hold offsets for backreferences, copy
       
  3850   those back references that we can. In this case there need not be overflow
       
  3851   if certain parts of the pattern were not used. */
       
  3852 
       
  3853   match_block.start_match = start_match;
       
  3854   match_block.match_call_count = 0;
       
  3855 
       
  3856   rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,
       
  3857     match_isgroup);
       
  3858 
       
  3859   /* When the result is no match, if the subject's first character was a
       
  3860   newline and the PCRE_FIRSTLINE option is set, break (which will return
       
  3861   PCRE_ERROR_NOMATCH). The option requests that a match occur before the first
       
  3862   newline in the subject. Otherwise, advance the pointer to the next character
       
  3863   and continue - but the continuation will actually happen only when the
       
  3864   pattern is not anchored. */
       
  3865 
       
  3866   if (rc == MATCH_NOMATCH)
       
  3867     {
       
  3868     if (firstline && *start_match == NEWLINE) break;
       
  3869     start_match++;
       
  3870 #ifdef SUPPORT_UTF8
       
  3871     if (match_block.utf8)
       
  3872       while(start_match < end_subject && ISMIDCHAR(*start_match))
       
  3873         start_match++;
       
  3874 #endif
       
  3875     continue;
       
  3876     }
       
  3877 
       
  3878   if (rc != MATCH_MATCH)
       
  3879     {
       
  3880     DPRINTF((">>>> error: returning %d\n", rc));
       
  3881     return rc;
       
  3882     }
       
  3883 
       
  3884   /* We have a match! Copy the offset information from temporary store if
       
  3885   necessary */
       
  3886 
       
  3887   if (using_temporary_offsets)
       
  3888     {
       
  3889     if (offsetcount >= 4)
       
  3890       {
       
  3891       memcpy(offsets + 2, match_block.offset_vector + 2,
       
  3892         (offsetcount - 2) * sizeof(int));
       
  3893       DPRINTF(("Copied offsets from temporary memory\n"));
       
  3894       }
       
  3895     if (match_block.end_offset_top > offsetcount)
       
  3896       match_block.offset_overflow = TRUE;
       
  3897 
       
  3898     DPRINTF(("Freeing temporary memory\n"));
       
  3899     (pcre_free)(match_block.offset_vector);
       
  3900     }
       
  3901 
       
  3902   rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
       
  3903 
       
  3904   if (offsetcount < 2) rc = 0; else
       
  3905     {
       
  3906     offsets[0] = INT_CAST(start_match - match_block.start_subject);
       
  3907     offsets[1] = INT_CAST(match_block.end_match_ptr - match_block.start_subject);
       
  3908     }
       
  3909 
       
  3910   DPRINTF((">>>> returning %d\n", rc));
       
  3911   return rc;
       
  3912   }
       
  3913 
       
  3914 /* This "while" is the end of the "do" above */
       
  3915 
       
  3916 while (!anchored && start_match <= end_subject);
       
  3917 
       
  3918 if (using_temporary_offsets)
       
  3919   {
       
  3920   DPRINTF(("Freeing temporary memory\n"));
       
  3921   (pcre_free)(match_block.offset_vector);
       
  3922   }
       
  3923 
       
  3924 if (match_block.partial && match_block.hitend)
       
  3925   {
       
  3926   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
       
  3927   return PCRE_ERROR_PARTIAL;
       
  3928   }
       
  3929 else
       
  3930   {
       
  3931   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
       
  3932   return PCRE_ERROR_NOMATCH;
       
  3933   }
       
  3934 }
       
  3935 
       
  3936 /* End of pcre_exec.c */