libraries/spcre/libpcre/pcre/pcretest.c
changeset 0 7f656887cf89
equal deleted inserted replaced
-1:000000000000 0:7f656887cf89
       
     1 /*************************************************
       
     2 *             PCRE testing program               *
       
     3 *************************************************/
       
     4 
       
     5 /* This program was hacked up as a tester for PCRE. I really should have
       
     6 written it more tidily in the first place. Will I ever learn? It has grown and
       
     7 been extended and consequently is now rather, er, *very* untidy in places.
       
     8 
       
     9 -----------------------------------------------------------------------------
       
    10 Redistribution and use in source and binary forms, with or without
       
    11 modification, are permitted provided that the following conditions are met:
       
    12 
       
    13     * Redistributions of source code must retain the above copyright notice,
       
    14       this list of conditions and the following disclaimer.
       
    15 
       
    16     * Redistributions in binary form must reproduce the above copyright
       
    17       notice, this list of conditions and the following disclaimer in the
       
    18       documentation and/or other materials provided with the distribution.
       
    19 
       
    20     * Neither the name of the University of Cambridge nor the names of its
       
    21       contributors may be used to endorse or promote products derived from
       
    22       this software without specific prior written permission.
       
    23 
       
    24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
       
    25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
       
    28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       
    29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
       
    30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
       
    31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
       
    32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
       
    33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    34 POSSIBILITY OF SUCH DAMAGE.
       
    35 -----------------------------------------------------------------------------
       
    36 */
       
    37 
       
    38 
       
    39 #ifdef HAVE_CONFIG_H
       
    40 #include "config.h"
       
    41 #endif
       
    42 
       
    43 #include <ctype.h>
       
    44 #include <stdio.h>
       
    45 #include <string.h>
       
    46 #include <stdlib.h>
       
    47 #include <time.h>
       
    48 #include <locale.h>
       
    49 #include <errno.h>
       
    50 
       
    51 #ifdef SUPPORT_LIBREADLINE
       
    52 #ifdef HAVE_UNISTD_H
       
    53 #include <unistd.h>
       
    54 #endif
       
    55 #include <readline/readline.h>
       
    56 #include <readline/history.h>
       
    57 #endif
       
    58 
       
    59 
       
    60 /* A number of things vary for Windows builds. Originally, pcretest opened its
       
    61 input and output without "b"; then I was told that "b" was needed in some
       
    62 environments, so it was added for release 5.0 to both the input and output. (It
       
    63 makes no difference on Unix-like systems.) Later I was told that it is wrong
       
    64 for the input on Windows. I've now abstracted the modes into two macros that
       
    65 are set here, to make it easier to fiddle with them, and removed "b" from the
       
    66 input mode under Windows. */
       
    67 
       
    68 #if defined(_WIN32) || defined(WIN32)
       
    69 #include <io.h>                /* For _setmode() */
       
    70 #include <fcntl.h>             /* For _O_BINARY */
       
    71 #define INPUT_MODE   "r"
       
    72 #define OUTPUT_MODE  "wb"
       
    73 
       
    74 #define isatty _isatty         /* This is what Windows calls them, I'm told */
       
    75 #define fileno _fileno
       
    76 
       
    77 #else
       
    78 #include <sys/time.h>          /* These two includes are needed */
       
    79 #include <sys/resource.h>      /* for setrlimit(). */
       
    80 #define INPUT_MODE   "rb"
       
    81 #define OUTPUT_MODE  "wb"
       
    82 #endif
       
    83 
       
    84 
       
    85 /* We have to include pcre_internal.h because we need the internal info for
       
    86 displaying the results of pcre_study() and we also need to know about the
       
    87 internal macros, structures, and other internal data values; pcretest has
       
    88 "inside information" compared to a program that strictly follows the PCRE API.
       
    89 
       
    90 Although pcre_internal.h does itself include pcre.h, we explicitly include it
       
    91 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
       
    92 appropriately for an application, not for building PCRE. */
       
    93 
       
    94 #include "pcre.h"
       
    95 #include "pcre_internal.h"
       
    96 
       
    97 /* We need access to some of the data tables that PCRE uses. So as not to have
       
    98 to keep two copies, we include the source file here, changing the names of the
       
    99 external symbols to prevent clashes. */
       
   100 
       
   101 #define _pcre_ucp_gentype      ucp_gentype
       
   102 #define _pcre_utf8_table1      utf8_table1
       
   103 #define _pcre_utf8_table1_size utf8_table1_size
       
   104 #define _pcre_utf8_table2      utf8_table2
       
   105 #define _pcre_utf8_table3      utf8_table3
       
   106 #define _pcre_utf8_table4      utf8_table4
       
   107 #define _pcre_utt              utt
       
   108 #define _pcre_utt_size         utt_size
       
   109 #define _pcre_utt_names        utt_names
       
   110 #define _pcre_OP_lengths       OP_lengths
       
   111 
       
   112 #include "pcre_tables.c"
       
   113 
       
   114 /* We also need the pcre_printint() function for printing out compiled
       
   115 patterns. This function is in a separate file so that it can be included in
       
   116 pcre_compile.c when that module is compiled with debugging enabled.
       
   117 
       
   118 The definition of the macro PRINTABLE, which determines whether to print an
       
   119 output character as-is or as a hex value when showing compiled patterns, is
       
   120 contained in this file. We uses it here also, in cases when the locale has not
       
   121 been explicitly changed, so as to get consistent output from systems that
       
   122 differ in their output from isprint() even in the "C" locale. */
       
   123 
       
   124 #include "pcre_printint.src"
       
   125 
       
   126 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
       
   127 
       
   128 
       
   129 /* It is possible to compile this test program without including support for
       
   130 testing the POSIX interface, though this is not available via the standard
       
   131 Makefile. */
       
   132 
       
   133 #if !defined NOPOSIX
       
   134 #include "pcreposix.h"
       
   135 #endif
       
   136 
       
   137 /* It is also possible, for the benefit of the version currently imported into
       
   138 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
       
   139 interface to the DFA matcher (NODFA), and without the doublecheck of the old
       
   140 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
       
   141 UTF8 support if PCRE is built without it. */
       
   142 
       
   143 #ifndef SUPPORT_UTF8
       
   144 #ifndef NOUTF8
       
   145 #define NOUTF8
       
   146 #endif
       
   147 #endif
       
   148 
       
   149 
       
   150 /* Other parameters */
       
   151 
       
   152 #ifndef CLOCKS_PER_SEC
       
   153 #ifdef CLK_TCK
       
   154 #define CLOCKS_PER_SEC CLK_TCK
       
   155 #else
       
   156 #define CLOCKS_PER_SEC 100
       
   157 #endif
       
   158 #endif
       
   159 
       
   160 /* This is the default loop count for timing. */
       
   161 
       
   162 #define LOOPREPEAT 500000
       
   163 
       
   164 /* Static variables */
       
   165 
       
   166 static FILE *outfile;
       
   167 static int log_store = 0;
       
   168 static int callout_count;
       
   169 static int callout_extra;
       
   170 static int callout_fail_count;
       
   171 static int callout_fail_id;
       
   172 static int debug_lengths;
       
   173 static int first_callout;
       
   174 static int locale_set = 0;
       
   175 static int show_malloc;
       
   176 static int use_utf8;
       
   177 static size_t gotten_store;
       
   178 
       
   179 /* The buffers grow automatically if very long input lines are encountered. */
       
   180 
       
   181 static int buffer_size = 50000;
       
   182 static uschar *buffer = NULL;
       
   183 static uschar *dbuffer = NULL;
       
   184 static uschar *pbuffer = NULL;
       
   185 
       
   186 
       
   187 
       
   188 /*************************************************
       
   189 *        Read or extend an input line            *
       
   190 *************************************************/
       
   191 
       
   192 /* Input lines are read into buffer, but both patterns and data lines can be
       
   193 continued over multiple input lines. In addition, if the buffer fills up, we
       
   194 want to automatically expand it so as to be able to handle extremely large
       
   195 lines that are needed for certain stress tests. When the input buffer is
       
   196 expanded, the other two buffers must also be expanded likewise, and the
       
   197 contents of pbuffer, which are a copy of the input for callouts, must be
       
   198 preserved (for when expansion happens for a data line). This is not the most
       
   199 optimal way of handling this, but hey, this is just a test program!
       
   200 
       
   201 Arguments:
       
   202   f            the file to read
       
   203   start        where in buffer to start (this *must* be within buffer)
       
   204   prompt       for stdin or readline()
       
   205 
       
   206 Returns:       pointer to the start of new data
       
   207                could be a copy of start, or could be moved
       
   208                NULL if no data read and EOF reached
       
   209 */
       
   210 
       
   211 static uschar *
       
   212 extend_inputline(FILE *f, uschar *start, const char *prompt)
       
   213 {
       
   214 uschar *here = start;
       
   215 
       
   216 for (;;)
       
   217   {
       
   218   int rlen = buffer_size - (here - buffer);
       
   219 
       
   220   if (rlen > 1000)
       
   221     {
       
   222     int dlen;
       
   223 
       
   224     /* If libreadline support is required, use readline() to read a line if the
       
   225     input is a terminal. Note that readline() removes the trailing newline, so
       
   226     we must put it back again, to be compatible with fgets(). */
       
   227 
       
   228 #ifdef SUPPORT_LIBREADLINE
       
   229     if (isatty(fileno(f)))
       
   230       {
       
   231       size_t len;
       
   232       char *s = readline(prompt);
       
   233       if (s == NULL) return (here == start)? NULL : start;
       
   234       len = strlen(s);
       
   235       if (len > 0) add_history(s);
       
   236       if (len > rlen - 1) len = rlen - 1;
       
   237       memcpy(here, s, len);
       
   238       here[len] = '\n';
       
   239       here[len+1] = 0;
       
   240       free(s);
       
   241       }
       
   242     else
       
   243 #endif
       
   244 
       
   245     /* Read the next line by normal means, prompting if the file is stdin. */
       
   246 
       
   247       {
       
   248       if (f == stdin) printf(prompt);
       
   249       if (fgets((char *)here, rlen,  f) == NULL)
       
   250         return (here == start)? NULL : start;
       
   251       }
       
   252 
       
   253     dlen = (int)strlen((char *)here);
       
   254     if (dlen > 0 && here[dlen - 1] == '\n') return start;
       
   255     here += dlen;
       
   256     }
       
   257 
       
   258   else
       
   259     {
       
   260     int new_buffer_size = 2*buffer_size;
       
   261     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
       
   262     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
       
   263     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
       
   264 
       
   265     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
       
   266       {
       
   267       fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
       
   268       exit(1);
       
   269       }
       
   270 
       
   271     memcpy(new_buffer, buffer, buffer_size);
       
   272     memcpy(new_pbuffer, pbuffer, buffer_size);
       
   273 
       
   274     buffer_size = new_buffer_size;
       
   275 
       
   276     start = new_buffer + (start - buffer);
       
   277     here = new_buffer + (here - buffer);
       
   278 
       
   279     free(buffer);
       
   280     free(dbuffer);
       
   281     free(pbuffer);
       
   282 
       
   283     buffer = new_buffer;
       
   284     dbuffer = new_dbuffer;
       
   285     pbuffer = new_pbuffer;
       
   286     }
       
   287   }
       
   288 
       
   289 return NULL;  /* Control never gets here */
       
   290 }
       
   291 
       
   292 
       
   293 
       
   294 
       
   295 
       
   296 
       
   297 
       
   298 /*************************************************
       
   299 *          Read number from string               *
       
   300 *************************************************/
       
   301 
       
   302 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
       
   303 around with conditional compilation, just do the job by hand. It is only used
       
   304 for unpicking arguments, so just keep it simple.
       
   305 
       
   306 Arguments:
       
   307   str           string to be converted
       
   308   endptr        where to put the end pointer
       
   309 
       
   310 Returns:        the unsigned long
       
   311 */
       
   312 
       
   313 static int
       
   314 get_value(unsigned char *str, unsigned char **endptr)
       
   315 {
       
   316 int result = 0;
       
   317 while(*str != 0 && isspace(*str)) str++;
       
   318 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
       
   319 *endptr = str;
       
   320 return(result);
       
   321 }
       
   322 
       
   323 
       
   324 
       
   325 
       
   326 /*************************************************
       
   327 *            Convert UTF-8 string to value       *
       
   328 *************************************************/
       
   329 
       
   330 /* This function takes one or more bytes that represents a UTF-8 character,
       
   331 and returns the value of the character.
       
   332 
       
   333 Argument:
       
   334   utf8bytes   a pointer to the byte vector
       
   335   vptr        a pointer to an int to receive the value
       
   336 
       
   337 Returns:      >  0 => the number of bytes consumed
       
   338               -6 to 0 => malformed UTF-8 character at offset = (-return)
       
   339 */
       
   340 
       
   341 #if !defined NOUTF8
       
   342 
       
   343 static int
       
   344 utf82ord(unsigned char *utf8bytes, int *vptr)
       
   345 {
       
   346 int c = *utf8bytes++;
       
   347 int d = c;
       
   348 int i, j, s;
       
   349 
       
   350 for (i = -1; i < 6; i++)               /* i is number of additional bytes */
       
   351   {
       
   352   if ((d & 0x80) == 0) break;
       
   353   d <<= 1;
       
   354   }
       
   355 
       
   356 if (i == -1) { *vptr = c; return 1; }  /* ascii character */
       
   357 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
       
   358 
       
   359 /* i now has a value in the range 1-5 */
       
   360 
       
   361 s = 6*i;
       
   362 d = (c & utf8_table3[i]) << s;
       
   363 
       
   364 for (j = 0; j < i; j++)
       
   365   {
       
   366   c = *utf8bytes++;
       
   367   if ((c & 0xc0) != 0x80) return -(j+1);
       
   368   s -= 6;
       
   369   d |= (c & 0x3f) << s;
       
   370   }
       
   371 
       
   372 /* Check that encoding was the correct unique one */
       
   373 
       
   374 for (j = 0; j < utf8_table1_size; j++)
       
   375   if (d <= utf8_table1[j]) break;
       
   376 if (j != i) return -(i+1);
       
   377 
       
   378 /* Valid value */
       
   379 
       
   380 *vptr = d;
       
   381 return i+1;
       
   382 }
       
   383 
       
   384 #endif
       
   385 
       
   386 
       
   387 
       
   388 /*************************************************
       
   389 *       Convert character value to UTF-8         *
       
   390 *************************************************/
       
   391 
       
   392 /* This function takes an integer value in the range 0 - 0x7fffffff
       
   393 and encodes it as a UTF-8 character in 0 to 6 bytes.
       
   394 
       
   395 Arguments:
       
   396   cvalue     the character value
       
   397   utf8bytes  pointer to buffer for result - at least 6 bytes long
       
   398 
       
   399 Returns:     number of characters placed in the buffer
       
   400 */
       
   401 
       
   402 #if !defined NOUTF8
       
   403 
       
   404 static int
       
   405 ord2utf8(int cvalue, uschar *utf8bytes)
       
   406 {
       
   407 register int i, j;
       
   408 for (i = 0; i < utf8_table1_size; i++)
       
   409   if (cvalue <= utf8_table1[i]) break;
       
   410 utf8bytes += i;
       
   411 for (j = i; j > 0; j--)
       
   412  {
       
   413  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
       
   414  cvalue >>= 6;
       
   415  }
       
   416 *utf8bytes = utf8_table2[i] | cvalue;
       
   417 return i + 1;
       
   418 }
       
   419 
       
   420 #endif
       
   421 
       
   422 
       
   423 
       
   424 /*************************************************
       
   425 *             Print character string             *
       
   426 *************************************************/
       
   427 
       
   428 /* Character string printing function. Must handle UTF-8 strings in utf8
       
   429 mode. Yields number of characters printed. If handed a NULL file, just counts
       
   430 chars without printing. */
       
   431 
       
   432 static int pchars(unsigned char *p, int length, FILE *f)
       
   433 {
       
   434 int c = 0;
       
   435 int yield = 0;
       
   436 
       
   437 while (length-- > 0)
       
   438   {
       
   439 #if !defined NOUTF8
       
   440   if (use_utf8)
       
   441     {
       
   442     int rc = utf82ord(p, &c);
       
   443 
       
   444     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
       
   445       {
       
   446       length -= rc - 1;
       
   447       p += rc;
       
   448       if (PRINTHEX(c))
       
   449         {
       
   450         if (f != NULL) fprintf(f, "%c", c);
       
   451         yield++;
       
   452         }
       
   453       else
       
   454         {
       
   455         int n = 4;
       
   456         if (f != NULL) fprintf(f, "\\x{%02x}", c);
       
   457         yield += (n <= 0x000000ff)? 2 :
       
   458                  (n <= 0x00000fff)? 3 :
       
   459                  (n <= 0x0000ffff)? 4 :
       
   460                  (n <= 0x000fffff)? 5 : 6;
       
   461         }
       
   462       continue;
       
   463       }
       
   464     }
       
   465 #endif
       
   466 
       
   467    /* Not UTF-8, or malformed UTF-8  */
       
   468 
       
   469   c = *p++;
       
   470   if (PRINTHEX(c))
       
   471     {
       
   472     if (f != NULL) fprintf(f, "%c", c);
       
   473     yield++;
       
   474     }
       
   475   else
       
   476     {
       
   477     if (f != NULL) fprintf(f, "\\x%02x", c);
       
   478     yield += 4;
       
   479     }
       
   480   }
       
   481 
       
   482 return yield;
       
   483 }
       
   484 
       
   485 
       
   486 
       
   487 /*************************************************
       
   488 *              Callout function                  *
       
   489 *************************************************/
       
   490 
       
   491 /* Called from PCRE as a result of the (?C) item. We print out where we are in
       
   492 the match. Yield zero unless more callouts than the fail count, or the callout
       
   493 data is not zero. */
       
   494 
       
   495 static int callout(pcre_callout_block *cb)
       
   496 {
       
   497 FILE *f = (first_callout | callout_extra)? outfile : NULL;
       
   498 int i, pre_start, post_start, subject_length;
       
   499 
       
   500 if (callout_extra)
       
   501   {
       
   502   fprintf(f, "Callout %d: last capture = %d\n",
       
   503     cb->callout_number, cb->capture_last);
       
   504 
       
   505   for (i = 0; i < cb->capture_top * 2; i += 2)
       
   506     {
       
   507     if (cb->offset_vector[i] < 0)
       
   508       fprintf(f, "%2d: <unset>\n", i/2);
       
   509     else
       
   510       {
       
   511       fprintf(f, "%2d: ", i/2);
       
   512       (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
       
   513         cb->offset_vector[i+1] - cb->offset_vector[i], f);
       
   514       fprintf(f, "\n");
       
   515       }
       
   516     }
       
   517   }
       
   518 
       
   519 /* Re-print the subject in canonical form, the first time or if giving full
       
   520 datails. On subsequent calls in the same match, we use pchars just to find the
       
   521 printed lengths of the substrings. */
       
   522 
       
   523 if (f != NULL) fprintf(f, "--->");
       
   524 
       
   525 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
       
   526 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
       
   527   cb->current_position - cb->start_match, f);
       
   528 
       
   529 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
       
   530 
       
   531 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
       
   532   cb->subject_length - cb->current_position, f);
       
   533 
       
   534 if (f != NULL) fprintf(f, "\n");
       
   535 
       
   536 /* Always print appropriate indicators, with callout number if not already
       
   537 shown. For automatic callouts, show the pattern offset. */
       
   538 
       
   539 if (cb->callout_number == 255)
       
   540   {
       
   541   fprintf(outfile, "%+3d ", cb->pattern_position);
       
   542   if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
       
   543   }
       
   544 else
       
   545   {
       
   546   if (callout_extra) fprintf(outfile, "    ");
       
   547     else fprintf(outfile, "%3d ", cb->callout_number);
       
   548   }
       
   549 
       
   550 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
       
   551 fprintf(outfile, "^");
       
   552 
       
   553 if (post_start > 0)
       
   554   {
       
   555   for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
       
   556   fprintf(outfile, "^");
       
   557   }
       
   558 
       
   559 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
       
   560   fprintf(outfile, " ");
       
   561 
       
   562 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
       
   563   pbuffer + cb->pattern_position);
       
   564 
       
   565 fprintf(outfile, "\n");
       
   566 first_callout = 0;
       
   567 
       
   568 if (cb->callout_data != NULL)
       
   569   {
       
   570   int callout_data = *((int *)(cb->callout_data));
       
   571   if (callout_data != 0)
       
   572     {
       
   573     fprintf(outfile, "Callout data = %d\n", callout_data);
       
   574     return callout_data;
       
   575     }
       
   576   }
       
   577 
       
   578 return (cb->callout_number != callout_fail_id)? 0 :
       
   579        (++callout_count >= callout_fail_count)? 1 : 0;
       
   580 }
       
   581 
       
   582 
       
   583 /*************************************************
       
   584 *            Local malloc functions              *
       
   585 *************************************************/
       
   586 
       
   587 /* Alternative malloc function, to test functionality and show the size of the
       
   588 compiled re. */
       
   589 
       
   590 static void *new_malloc(size_t size)
       
   591 {
       
   592 void *block = malloc(size);
       
   593 gotten_store = size;
       
   594 if (show_malloc)
       
   595   fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
       
   596 return block;
       
   597 }
       
   598 
       
   599 static void new_free(void *block)
       
   600 {
       
   601 if (show_malloc)
       
   602   fprintf(outfile, "free             %p\n", block);
       
   603 free(block);
       
   604 }
       
   605 
       
   606 
       
   607 /* For recursion malloc/free, to test stacking calls */
       
   608 
       
   609 static void *stack_malloc(size_t size)
       
   610 {
       
   611 void *block = malloc(size);
       
   612 if (show_malloc)
       
   613   fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
       
   614 return block;
       
   615 }
       
   616 
       
   617 static void stack_free(void *block)
       
   618 {
       
   619 if (show_malloc)
       
   620   fprintf(outfile, "stack_free       %p\n", block);
       
   621 free(block);
       
   622 }
       
   623 
       
   624 
       
   625 /*************************************************
       
   626 *          Call pcre_fullinfo()                  *
       
   627 *************************************************/
       
   628 
       
   629 /* Get one piece of information from the pcre_fullinfo() function */
       
   630 
       
   631 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
       
   632 {
       
   633 int rc;
       
   634 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
       
   635   fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
       
   636 }
       
   637 
       
   638 
       
   639 
       
   640 /*************************************************
       
   641 *         Byte flipping function                 *
       
   642 *************************************************/
       
   643 
       
   644 static unsigned long int
       
   645 byteflip(unsigned long int value, int n)
       
   646 {
       
   647 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
       
   648 return ((value & 0x000000ff) << 24) |
       
   649        ((value & 0x0000ff00) <<  8) |
       
   650        ((value & 0x00ff0000) >>  8) |
       
   651        ((value & 0xff000000) >> 24);
       
   652 }
       
   653 
       
   654 
       
   655 
       
   656 
       
   657 /*************************************************
       
   658 *        Check match or recursion limit          *
       
   659 *************************************************/
       
   660 
       
   661 static int
       
   662 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
       
   663   int start_offset, int options, int *use_offsets, int use_size_offsets,
       
   664   int flag, unsigned long int *limit, int errnumber, const char *msg)
       
   665 {
       
   666 int count;
       
   667 int min = 0;
       
   668 int mid = 64;
       
   669 int max = -1;
       
   670 
       
   671 extra->flags |= flag;
       
   672 
       
   673 for (;;)
       
   674   {
       
   675   *limit = mid;
       
   676 
       
   677   count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
       
   678     use_offsets, use_size_offsets);
       
   679 
       
   680   if (count == errnumber)
       
   681     {
       
   682     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
       
   683     min = mid;
       
   684     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
       
   685     }
       
   686 
       
   687   else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
       
   688                          count == PCRE_ERROR_PARTIAL)
       
   689     {
       
   690     if (mid == min + 1)
       
   691       {
       
   692       fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
       
   693       break;
       
   694       }
       
   695     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
       
   696     max = mid;
       
   697     mid = (min + mid)/2;
       
   698     }
       
   699   else break;    /* Some other error */
       
   700   }
       
   701 
       
   702 extra->flags &= ~flag;
       
   703 return count;
       
   704 }
       
   705 
       
   706 
       
   707 
       
   708 /*************************************************
       
   709 *         Case-independent strncmp() function    *
       
   710 *************************************************/
       
   711 
       
   712 /*
       
   713 Arguments:
       
   714   s         first string
       
   715   t         second string
       
   716   n         number of characters to compare
       
   717 
       
   718 Returns:    < 0, = 0, or > 0, according to the comparison
       
   719 */
       
   720 
       
   721 static int
       
   722 strncmpic(uschar *s, uschar *t, int n)
       
   723 {
       
   724 while (n--)
       
   725   {
       
   726   int c = tolower(*s++) - tolower(*t++);
       
   727   if (c) return c;
       
   728   }
       
   729 return 0;
       
   730 }
       
   731 
       
   732 
       
   733 
       
   734 /*************************************************
       
   735 *         Check newline indicator                *
       
   736 *************************************************/
       
   737 
       
   738 /* This is used both at compile and run-time to check for <xxx> escapes, where
       
   739 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
       
   740 no match.
       
   741 
       
   742 Arguments:
       
   743   p           points after the leading '<'
       
   744   f           file for error message
       
   745 
       
   746 Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
       
   747 */
       
   748 
       
   749 static int
       
   750 check_newline(uschar *p, FILE *f)
       
   751 {
       
   752 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
       
   753 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
       
   754 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
       
   755 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
       
   756 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
       
   757 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
       
   758 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
       
   759 fprintf(f, "Unknown newline type at: <%s\n", p);
       
   760 return 0;
       
   761 }
       
   762 
       
   763 
       
   764 
       
   765 /*************************************************
       
   766 *             Usage function                     *
       
   767 *************************************************/
       
   768 
       
   769 static void
       
   770 usage(void)
       
   771 {
       
   772 printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
       
   773 printf("Input and output default to stdin and stdout.\n");
       
   774 #ifdef SUPPORT_LIBREADLINE
       
   775 printf("If input is a terminal, readline() is used to read from it.\n");
       
   776 #else
       
   777 printf("This version of pcretest is not linked with readline().\n");
       
   778 #endif
       
   779 printf("\nOptions:\n");
       
   780 printf("  -b       show compiled code (bytecode)\n");
       
   781 printf("  -C       show PCRE compile-time options and exit\n");
       
   782 printf("  -d       debug: show compiled code and information (-b and -i)\n");
       
   783 #if !defined NODFA
       
   784 printf("  -dfa     force DFA matching for all subjects\n");
       
   785 #endif
       
   786 printf("  -help    show usage information\n");
       
   787 printf("  -i       show information about compiled patterns\n"
       
   788        "  -m       output memory used information\n"
       
   789        "  -o <n>   set size of offsets vector to <n>\n");
       
   790 #if !defined NOPOSIX
       
   791 printf("  -p       use POSIX interface\n");
       
   792 #endif
       
   793 printf("  -q       quiet: do not output PCRE version number at start\n");
       
   794 printf("  -S <n>   set stack size to <n> megabytes\n");
       
   795 printf("  -s       output store (memory) used information\n"
       
   796        "  -t       time compilation and execution\n");
       
   797 printf("  -t <n>   time compilation and execution, repeating <n> times\n");
       
   798 printf("  -tm      time execution (matching) only\n");
       
   799 printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
       
   800 }
       
   801 
       
   802 
       
   803 
       
   804 /*************************************************
       
   805 *                Main Program                    *
       
   806 *************************************************/
       
   807 
       
   808 /* Read lines from named file or stdin and write to named file or stdout; lines
       
   809 consist of a regular expression, in delimiters and optionally followed by
       
   810 options, followed by a set of test data, terminated by an empty line. */
       
   811 
       
   812 int main(int argc, char **argv)
       
   813 {
       
   814 FILE *infile = stdin;
       
   815 int options = 0;
       
   816 int study_options = 0;
       
   817 int op = 1;
       
   818 int timeit = 0;
       
   819 int timeitm = 0;
       
   820 int showinfo = 0;
       
   821 int showstore = 0;
       
   822 int quiet = 0;
       
   823 int size_offsets = 45;
       
   824 int size_offsets_max;
       
   825 int *offsets = NULL;
       
   826 #if !defined NOPOSIX
       
   827 int posix = 0;
       
   828 #endif
       
   829 int debug = 0;
       
   830 int done = 0;
       
   831 int all_use_dfa = 0;
       
   832 int yield = 0;
       
   833 int stack_size;
       
   834 
       
   835 /* These vectors store, end-to-end, a list of captured substring names. Assume
       
   836 that 1024 is plenty long enough for the few names we'll be testing. */
       
   837 
       
   838 uschar copynames[1024];
       
   839 uschar getnames[1024];
       
   840 
       
   841 uschar *copynamesptr;
       
   842 uschar *getnamesptr;
       
   843 
       
   844 /* Get buffers from malloc() so that Electric Fence will check their misuse
       
   845 when I am debugging. They grow automatically when very long lines are read. */
       
   846 
       
   847 buffer = (unsigned char *)malloc(buffer_size);
       
   848 dbuffer = (unsigned char *)malloc(buffer_size);
       
   849 pbuffer = (unsigned char *)malloc(buffer_size);
       
   850 
       
   851 /* The outfile variable is static so that new_malloc can use it. */
       
   852 
       
   853 outfile = stdout;
       
   854 
       
   855 /* The following  _setmode() stuff is some Windows magic that tells its runtime
       
   856 library to translate CRLF into a single LF character. At least, that's what
       
   857 I've been told: never having used Windows I take this all on trust. Originally
       
   858 it set 0x8000, but then I was advised that _O_BINARY was better. */
       
   859 
       
   860 #if defined(_WIN32) || defined(WIN32)
       
   861 _setmode( _fileno( stdout ), _O_BINARY );
       
   862 #endif
       
   863 
       
   864 /* Scan options */
       
   865 
       
   866 while (argc > 1 && argv[op][0] == '-')
       
   867   {
       
   868   unsigned char *endptr;
       
   869 
       
   870   if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
       
   871     showstore = 1;
       
   872   else if (strcmp(argv[op], "-q") == 0) quiet = 1;
       
   873   else if (strcmp(argv[op], "-b") == 0) debug = 1;
       
   874   else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
       
   875   else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
       
   876 #if !defined NODFA
       
   877   else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
       
   878 #endif
       
   879   else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
       
   880       ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
       
   881         *endptr == 0))
       
   882     {
       
   883     op++;
       
   884     argc--;
       
   885     }
       
   886   else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
       
   887     {
       
   888     int both = argv[op][2] == 0;
       
   889     int temp;
       
   890     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
       
   891                      *endptr == 0))
       
   892       {
       
   893       timeitm = temp;
       
   894       op++;
       
   895       argc--;
       
   896       }
       
   897     else timeitm = LOOPREPEAT;
       
   898     if (both) timeit = timeitm;
       
   899     }
       
   900   else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
       
   901       ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
       
   902         *endptr == 0))
       
   903     {
       
   904 #if defined(_WIN32) || defined(WIN32)
       
   905     printf("PCRE: -S not supported on this OS\n");
       
   906     exit(1);
       
   907 #else
       
   908     int rc;
       
   909     struct rlimit rlim;
       
   910     getrlimit(RLIMIT_STACK, &rlim);
       
   911     rlim.rlim_cur = stack_size * 1024 * 1024;
       
   912     rc = setrlimit(RLIMIT_STACK, &rlim);
       
   913     if (rc != 0)
       
   914       {
       
   915     printf("PCRE: setrlimit() failed with error %d\n", rc);
       
   916     exit(1);
       
   917       }
       
   918     op++;
       
   919     argc--;
       
   920 #endif
       
   921     }
       
   922 #if !defined NOPOSIX
       
   923   else if (strcmp(argv[op], "-p") == 0) posix = 1;
       
   924 #endif
       
   925   else if (strcmp(argv[op], "-C") == 0)
       
   926     {
       
   927     int rc;
       
   928     printf("PCRE version %s\n", pcre_version());
       
   929     printf("Compiled with\n");
       
   930     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
       
   931     printf("  %sUTF-8 support\n", rc? "" : "No ");
       
   932     (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
       
   933     printf("  %sUnicode properties support\n", rc? "" : "No ");
       
   934     (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
       
   935     printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
       
   936       (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
       
   937       (rc == -2)? "ANYCRLF" :
       
   938       (rc == -1)? "ANY" : "???");
       
   939     (void)pcre_config(PCRE_CONFIG_BSR, &rc);
       
   940     printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
       
   941                                      "all Unicode newlines");
       
   942     (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
       
   943     printf("  Internal link size = %d\n", rc);
       
   944     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
       
   945     printf("  POSIX malloc threshold = %d\n", rc);
       
   946     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
       
   947     printf("  Default match limit = %d\n", rc);
       
   948     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
       
   949     printf("  Default recursion depth limit = %d\n", rc);
       
   950     (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
       
   951     printf("  Match recursion uses %s\n", rc? "stack" : "heap");
       
   952     goto EXIT;
       
   953     }
       
   954   else if (strcmp(argv[op], "-help") == 0 ||
       
   955            strcmp(argv[op], "--help") == 0)
       
   956     {
       
   957     usage();
       
   958     goto EXIT;
       
   959     }
       
   960   else
       
   961     {
       
   962     printf("** Unknown or malformed option %s\n", argv[op]);
       
   963     usage();
       
   964     yield = 1;
       
   965     goto EXIT;
       
   966     }
       
   967   op++;
       
   968   argc--;
       
   969   }
       
   970 
       
   971 /* Get the store for the offsets vector, and remember what it was */
       
   972 
       
   973 size_offsets_max = size_offsets;
       
   974 offsets = (int *)malloc(size_offsets_max * sizeof(int));
       
   975 if (offsets == NULL)
       
   976   {
       
   977   printf("** Failed to get %d bytes of memory for offsets vector\n",
       
   978     (int)(size_offsets_max * sizeof(int)));
       
   979   yield = 1;
       
   980   goto EXIT;
       
   981   }
       
   982 
       
   983 /* Sort out the input and output files */
       
   984 
       
   985 if (argc > 1)
       
   986   {
       
   987   infile = fopen(argv[op], INPUT_MODE);
       
   988   if (infile == NULL)
       
   989     {
       
   990     printf("** Failed to open %s\n", argv[op]);
       
   991     yield = 1;
       
   992     goto EXIT;
       
   993     }
       
   994   }
       
   995 
       
   996 if (argc > 2)
       
   997   {
       
   998   outfile = fopen(argv[op+1], OUTPUT_MODE);
       
   999   if (outfile == NULL)
       
  1000     {
       
  1001     printf("** Failed to open %s\n", argv[op+1]);
       
  1002     yield = 1;
       
  1003     goto EXIT;
       
  1004     }
       
  1005   }
       
  1006 
       
  1007 /* Set alternative malloc function */
       
  1008 
       
  1009 pcre_malloc = new_malloc;
       
  1010 pcre_free = new_free;
       
  1011 pcre_stack_malloc = stack_malloc;
       
  1012 pcre_stack_free = stack_free;
       
  1013 
       
  1014 /* Heading line unless quiet, then prompt for first regex if stdin */
       
  1015 
       
  1016 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
       
  1017 
       
  1018 /* Main loop */
       
  1019 
       
  1020 while (!done)
       
  1021   {
       
  1022   pcre *re = NULL;
       
  1023   pcre_extra *extra = NULL;
       
  1024 
       
  1025 #if !defined NOPOSIX  /* There are still compilers that require no indent */
       
  1026   regex_t preg;
       
  1027   int do_posix = 0;
       
  1028 #endif
       
  1029 
       
  1030   const char *error;
       
  1031   unsigned char *p, *pp, *ppp;
       
  1032   unsigned char *to_file = NULL;
       
  1033   const unsigned char *tables = NULL;
       
  1034   unsigned long int true_size, true_study_size = 0;
       
  1035   size_t size, regex_gotten_store;
       
  1036   int do_study = 0;
       
  1037   int do_debug = debug;
       
  1038   int do_G = 0;
       
  1039   int do_g = 0;
       
  1040   int do_showinfo = showinfo;
       
  1041   int do_showrest = 0;
       
  1042   int do_flip = 0;
       
  1043   int erroroffset, len, delimiter, poffset;
       
  1044 
       
  1045   use_utf8 = 0;
       
  1046   debug_lengths = 1;
       
  1047 
       
  1048   if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
       
  1049   if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
       
  1050   fflush(outfile);
       
  1051 
       
  1052   p = buffer;
       
  1053   while (isspace(*p)) p++;
       
  1054   if (*p == 0) continue;
       
  1055 
       
  1056   /* See if the pattern is to be loaded pre-compiled from a file. */
       
  1057 
       
  1058   if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
       
  1059     {
       
  1060     unsigned long int magic, get_options;
       
  1061     uschar sbuf[8];
       
  1062     FILE *f;
       
  1063 
       
  1064     p++;
       
  1065     pp = p + (int)strlen((char *)p);
       
  1066     while (isspace(pp[-1])) pp--;
       
  1067     *pp = 0;
       
  1068 
       
  1069     f = fopen((char *)p, "rb");
       
  1070     if (f == NULL)
       
  1071       {
       
  1072       fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
       
  1073       continue;
       
  1074       }
       
  1075 
       
  1076     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
       
  1077 
       
  1078     true_size =
       
  1079       (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
       
  1080     true_study_size =
       
  1081       (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
       
  1082 
       
  1083     re = (real_pcre *)new_malloc(true_size);
       
  1084     regex_gotten_store = gotten_store;
       
  1085 
       
  1086     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
       
  1087 
       
  1088     magic = ((real_pcre *)re)->magic_number;
       
  1089     if (magic != MAGIC_NUMBER)
       
  1090       {
       
  1091       if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
       
  1092         {
       
  1093         do_flip = 1;
       
  1094         }
       
  1095       else
       
  1096         {
       
  1097         fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
       
  1098         fclose(f);
       
  1099         continue;
       
  1100         }
       
  1101       }
       
  1102 
       
  1103     fprintf(outfile, "Compiled regex%s loaded from %s\n",
       
  1104       do_flip? " (byte-inverted)" : "", p);
       
  1105 
       
  1106     /* Need to know if UTF-8 for printing data strings */
       
  1107 
       
  1108     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
       
  1109     use_utf8 = (get_options & PCRE_UTF8) != 0;
       
  1110 
       
  1111     /* Now see if there is any following study data */
       
  1112 
       
  1113     if (true_study_size != 0)
       
  1114       {
       
  1115       pcre_study_data *psd;
       
  1116 
       
  1117       extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
       
  1118       extra->flags = PCRE_EXTRA_STUDY_DATA;
       
  1119 
       
  1120       psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
       
  1121       extra->study_data = psd;
       
  1122 
       
  1123       if (fread(psd, 1, true_study_size, f) != true_study_size)
       
  1124         {
       
  1125         FAIL_READ:
       
  1126         fprintf(outfile, "Failed to read data from %s\n", p);
       
  1127         if (extra != NULL) new_free(extra);
       
  1128         if (re != NULL) new_free(re);
       
  1129         fclose(f);
       
  1130         continue;
       
  1131         }
       
  1132       fprintf(outfile, "Study data loaded from %s\n", p);
       
  1133       do_study = 1;     /* To get the data output if requested */
       
  1134       }
       
  1135     else fprintf(outfile, "No study data\n");
       
  1136 
       
  1137     fclose(f);
       
  1138     goto SHOW_INFO;
       
  1139     }
       
  1140 
       
  1141   /* In-line pattern (the usual case). Get the delimiter and seek the end of
       
  1142   the pattern; if is isn't complete, read more. */
       
  1143 
       
  1144   delimiter = *p++;
       
  1145 
       
  1146   if (isalnum(delimiter) || delimiter == '\\')
       
  1147     {
       
  1148     fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
       
  1149     goto SKIP_DATA;
       
  1150     }
       
  1151 
       
  1152   pp = p;
       
  1153   poffset = p - buffer;
       
  1154 
       
  1155   for(;;)
       
  1156     {
       
  1157     while (*pp != 0)
       
  1158       {
       
  1159       if (*pp == '\\' && pp[1] != 0) pp++;
       
  1160         else if (*pp == delimiter) break;
       
  1161       pp++;
       
  1162       }
       
  1163     if (*pp != 0) break;
       
  1164     if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
       
  1165       {
       
  1166       fprintf(outfile, "** Unexpected EOF\n");
       
  1167       done = 1;
       
  1168       goto CONTINUE;
       
  1169       }
       
  1170     if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
       
  1171     }
       
  1172 
       
  1173   /* The buffer may have moved while being extended; reset the start of data
       
  1174   pointer to the correct relative point in the buffer. */
       
  1175 
       
  1176   p = buffer + poffset;
       
  1177 
       
  1178   /* If the first character after the delimiter is backslash, make
       
  1179   the pattern end with backslash. This is purely to provide a way
       
  1180   of testing for the error message when a pattern ends with backslash. */
       
  1181 
       
  1182   if (pp[1] == '\\') *pp++ = '\\';
       
  1183 
       
  1184   /* Terminate the pattern at the delimiter, and save a copy of the pattern
       
  1185   for callouts. */
       
  1186 
       
  1187   *pp++ = 0;
       
  1188   strcpy((char *)pbuffer, (char *)p);
       
  1189 
       
  1190   /* Look for options after final delimiter */
       
  1191 
       
  1192   options = 0;
       
  1193   study_options = 0;
       
  1194   log_store = showstore;  /* default from command line */
       
  1195 
       
  1196   while (*pp != 0)
       
  1197     {
       
  1198     switch (*pp++)
       
  1199       {
       
  1200       case 'f': options |= PCRE_FIRSTLINE; break;
       
  1201       case 'g': do_g = 1; break;
       
  1202       case 'i': options |= PCRE_CASELESS; break;
       
  1203       case 'm': options |= PCRE_MULTILINE; break;
       
  1204       case 's': options |= PCRE_DOTALL; break;
       
  1205       case 'x': options |= PCRE_EXTENDED; break;
       
  1206 
       
  1207       case '+': do_showrest = 1; break;
       
  1208       case 'A': options |= PCRE_ANCHORED; break;
       
  1209       case 'B': do_debug = 1; break;
       
  1210       case 'C': options |= PCRE_AUTO_CALLOUT; break;
       
  1211       case 'D': do_debug = do_showinfo = 1; break;
       
  1212       case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
       
  1213       case 'F': do_flip = 1; break;
       
  1214       case 'G': do_G = 1; break;
       
  1215       case 'I': do_showinfo = 1; break;
       
  1216       case 'J': options |= PCRE_DUPNAMES; break;
       
  1217       case 'M': log_store = 1; break;
       
  1218       case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
       
  1219 
       
  1220 #if !defined NOPOSIX
       
  1221       case 'P': do_posix = 1; break;
       
  1222 #endif
       
  1223 
       
  1224       case 'S': do_study = 1; break;
       
  1225       case 'U': options |= PCRE_UNGREEDY; break;
       
  1226       case 'X': options |= PCRE_EXTRA; break;
       
  1227       case 'Z': debug_lengths = 0; break;
       
  1228       case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
       
  1229       case '?': options |= PCRE_NO_UTF8_CHECK; break;
       
  1230 
       
  1231       case 'L':
       
  1232       ppp = pp;
       
  1233       /* The '\r' test here is so that it works on Windows. */
       
  1234       /* The '0' test is just in case this is an unterminated line. */
       
  1235       while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
       
  1236       *ppp = 0;
       
  1237       if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
       
  1238         {
       
  1239         fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
       
  1240         goto SKIP_DATA;
       
  1241         }
       
  1242       locale_set = 1;
       
  1243       tables = pcre_maketables();
       
  1244       pp = ppp;
       
  1245       break;
       
  1246 
       
  1247       case '>':
       
  1248       to_file = pp;
       
  1249       while (*pp != 0) pp++;
       
  1250       while (isspace(pp[-1])) pp--;
       
  1251       *pp = 0;
       
  1252       break;
       
  1253 
       
  1254       case '<':
       
  1255         {
       
  1256         if (strncmp((char *)pp, "JS>", 3) == 0)
       
  1257           {
       
  1258           options |= PCRE_JAVASCRIPT_COMPAT;
       
  1259           pp += 3;
       
  1260           }
       
  1261         else
       
  1262           {
       
  1263           int x = check_newline(pp, outfile);
       
  1264           if (x == 0) goto SKIP_DATA;
       
  1265           options |= x;
       
  1266           while (*pp++ != '>');
       
  1267           }
       
  1268         }
       
  1269       break;
       
  1270 
       
  1271       case '\r':                      /* So that it works in Windows */
       
  1272       case '\n':
       
  1273       case ' ':
       
  1274       break;
       
  1275 
       
  1276       default:
       
  1277       fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
       
  1278       goto SKIP_DATA;
       
  1279       }
       
  1280     }
       
  1281 
       
  1282   /* Handle compiling via the POSIX interface, which doesn't support the
       
  1283   timing, showing, or debugging options, nor the ability to pass over
       
  1284   local character tables. */
       
  1285 
       
  1286 #if !defined NOPOSIX
       
  1287   if (posix || do_posix)
       
  1288     {
       
  1289     int rc;
       
  1290     int cflags = 0;
       
  1291 
       
  1292     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
       
  1293     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
       
  1294     if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
       
  1295     if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
       
  1296     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
       
  1297 
       
  1298     rc = regcomp(&preg, (char *)p, cflags);
       
  1299 
       
  1300     /* Compilation failed; go back for another re, skipping to blank line
       
  1301     if non-interactive. */
       
  1302 
       
  1303     if (rc != 0)
       
  1304       {
       
  1305       (void)regerror(rc, &preg, (char *)buffer, buffer_size);
       
  1306       fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
       
  1307       goto SKIP_DATA;
       
  1308       }
       
  1309     }
       
  1310 
       
  1311   /* Handle compiling via the native interface */
       
  1312 
       
  1313   else
       
  1314 #endif  /* !defined NOPOSIX */
       
  1315 
       
  1316     {
       
  1317     if (timeit > 0)
       
  1318       {
       
  1319       register int i;
       
  1320       clock_t time_taken;
       
  1321       clock_t start_time = clock();
       
  1322       for (i = 0; i < timeit; i++)
       
  1323         {
       
  1324         re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
       
  1325         if (re != NULL) free(re);
       
  1326         }
       
  1327       time_taken = clock() - start_time;
       
  1328       fprintf(outfile, "Compile time %.4f milliseconds\n",
       
  1329         (((double)time_taken * 1000.0) / (double)timeit) /
       
  1330           (double)CLOCKS_PER_SEC);
       
  1331       }
       
  1332 
       
  1333     re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
       
  1334 
       
  1335     /* Compilation failed; go back for another re, skipping to blank line
       
  1336     if non-interactive. */
       
  1337 
       
  1338     if (re == NULL)
       
  1339       {
       
  1340       fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
       
  1341       SKIP_DATA:
       
  1342       if (infile != stdin)
       
  1343         {
       
  1344         for (;;)
       
  1345           {
       
  1346           if (extend_inputline(infile, buffer, NULL) == NULL)
       
  1347             {
       
  1348             done = 1;
       
  1349             goto CONTINUE;
       
  1350             }
       
  1351           len = (int)strlen((char *)buffer);
       
  1352           while (len > 0 && isspace(buffer[len-1])) len--;
       
  1353           if (len == 0) break;
       
  1354           }
       
  1355         fprintf(outfile, "\n");
       
  1356         }
       
  1357       goto CONTINUE;
       
  1358       }
       
  1359 
       
  1360     /* Compilation succeeded; print data if required. There are now two
       
  1361     info-returning functions. The old one has a limited interface and
       
  1362     returns only limited data. Check that it agrees with the newer one. */
       
  1363 
       
  1364     if (log_store)
       
  1365       fprintf(outfile, "Memory allocation (code space): %d\n",
       
  1366         (int)(gotten_store -
       
  1367               sizeof(real_pcre) -
       
  1368               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
       
  1369 
       
  1370     /* Extract the size for possible writing before possibly flipping it,
       
  1371     and remember the store that was got. */
       
  1372 
       
  1373     true_size = ((real_pcre *)re)->size;
       
  1374     regex_gotten_store = gotten_store;
       
  1375 
       
  1376     /* If /S was present, study the regexp to generate additional info to
       
  1377     help with the matching. */
       
  1378 
       
  1379     if (do_study)
       
  1380       {
       
  1381       if (timeit > 0)
       
  1382         {
       
  1383         register int i;
       
  1384         clock_t time_taken;
       
  1385         clock_t start_time = clock();
       
  1386         for (i = 0; i < timeit; i++)
       
  1387           extra = pcre_study(re, study_options, &error);
       
  1388         time_taken = clock() - start_time;
       
  1389         if (extra != NULL) free(extra);
       
  1390         fprintf(outfile, "  Study time %.4f milliseconds\n",
       
  1391           (((double)time_taken * 1000.0) / (double)timeit) /
       
  1392             (double)CLOCKS_PER_SEC);
       
  1393         }
       
  1394       extra = pcre_study(re, study_options, &error);
       
  1395       if (error != NULL)
       
  1396         fprintf(outfile, "Failed to study: %s\n", error);
       
  1397       else if (extra != NULL)
       
  1398         true_study_size = ((pcre_study_data *)(extra->study_data))->size;
       
  1399       }
       
  1400 
       
  1401     /* If the 'F' option was present, we flip the bytes of all the integer
       
  1402     fields in the regex data block and the study block. This is to make it
       
  1403     possible to test PCRE's handling of byte-flipped patterns, e.g. those
       
  1404     compiled on a different architecture. */
       
  1405 
       
  1406     if (do_flip)
       
  1407       {
       
  1408       real_pcre *rre = (real_pcre *)re;
       
  1409       rre->magic_number =
       
  1410         byteflip(rre->magic_number, sizeof(rre->magic_number));
       
  1411       rre->size = byteflip(rre->size, sizeof(rre->size));
       
  1412       rre->options = byteflip(rre->options, sizeof(rre->options));
       
  1413       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
       
  1414       rre->top_bracket =
       
  1415         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
       
  1416       rre->top_backref =
       
  1417         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
       
  1418       rre->first_byte =
       
  1419         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
       
  1420       rre->req_byte =
       
  1421         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
       
  1422       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
       
  1423         sizeof(rre->name_table_offset));
       
  1424       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
       
  1425         sizeof(rre->name_entry_size));
       
  1426       rre->name_count = (pcre_uint16)byteflip(rre->name_count,
       
  1427         sizeof(rre->name_count));
       
  1428 
       
  1429       if (extra != NULL)
       
  1430         {
       
  1431         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
       
  1432         rsd->size = byteflip(rsd->size, sizeof(rsd->size));
       
  1433         rsd->options = byteflip(rsd->options, sizeof(rsd->options));
       
  1434         }
       
  1435       }
       
  1436 
       
  1437     /* Extract information from the compiled data if required */
       
  1438 
       
  1439     SHOW_INFO:
       
  1440 
       
  1441     if (do_debug)
       
  1442       {
       
  1443       fprintf(outfile, "------------------------------------------------------------------\n");
       
  1444       pcre_printint(re, outfile, debug_lengths);
       
  1445       }
       
  1446 
       
  1447     if (do_showinfo)
       
  1448       {
       
  1449       unsigned long int get_options, all_options;
       
  1450 #if !defined NOINFOCHECK
       
  1451       int old_first_char, old_options, old_count;
       
  1452 #endif
       
  1453       int count, backrefmax, first_char, need_char, okpartial, jchanged,
       
  1454         hascrorlf;
       
  1455       int nameentrysize, namecount;
       
  1456       const uschar *nametable;
       
  1457 
       
  1458       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
       
  1459       new_info(re, NULL, PCRE_INFO_SIZE, &size);
       
  1460       new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
       
  1461       new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
       
  1462       new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
       
  1463       new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
       
  1464       new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
       
  1465       new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
       
  1466       new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
       
  1467       new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
       
  1468       new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
       
  1469       new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
       
  1470 
       
  1471 #if !defined NOINFOCHECK
       
  1472       old_count = pcre_info(re, &old_options, &old_first_char);
       
  1473       if (count < 0) fprintf(outfile,
       
  1474         "Error %d from pcre_info()\n", count);
       
  1475       else
       
  1476         {
       
  1477         if (old_count != count) fprintf(outfile,
       
  1478           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
       
  1479             old_count);
       
  1480 
       
  1481         if (old_first_char != first_char) fprintf(outfile,
       
  1482           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
       
  1483             first_char, old_first_char);
       
  1484 
       
  1485         if (old_options != (int)get_options) fprintf(outfile,
       
  1486           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
       
  1487             get_options, old_options);
       
  1488         }
       
  1489 #endif
       
  1490 
       
  1491       if (size != regex_gotten_store) fprintf(outfile,
       
  1492         "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
       
  1493         (int)size, (int)regex_gotten_store);
       
  1494 
       
  1495       fprintf(outfile, "Capturing subpattern count = %d\n", count);
       
  1496       if (backrefmax > 0)
       
  1497         fprintf(outfile, "Max back reference = %d\n", backrefmax);
       
  1498 
       
  1499       if (namecount > 0)
       
  1500         {
       
  1501         fprintf(outfile, "Named capturing subpatterns:\n");
       
  1502         while (namecount-- > 0)
       
  1503           {
       
  1504           fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
       
  1505             nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
       
  1506             GET2(nametable, 0));
       
  1507           nametable += nameentrysize;
       
  1508           }
       
  1509         }
       
  1510 
       
  1511       if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
       
  1512       if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
       
  1513 
       
  1514       all_options = ((real_pcre *)re)->options;
       
  1515       if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
       
  1516 
       
  1517       if (get_options == 0) fprintf(outfile, "No options\n");
       
  1518         else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
       
  1519           ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
       
  1520           ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
       
  1521           ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
       
  1522           ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
       
  1523           ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
       
  1524           ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
       
  1525           ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
       
  1526           ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
       
  1527           ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
       
  1528           ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
       
  1529           ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
       
  1530           ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
       
  1531           ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
       
  1532           ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
       
  1533           ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
       
  1534 
       
  1535       if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
       
  1536 
       
  1537       switch (get_options & PCRE_NEWLINE_BITS)
       
  1538         {
       
  1539         case PCRE_NEWLINE_CR:
       
  1540         fprintf(outfile, "Forced newline sequence: CR\n");
       
  1541         break;
       
  1542 
       
  1543         case PCRE_NEWLINE_LF:
       
  1544         fprintf(outfile, "Forced newline sequence: LF\n");
       
  1545         break;
       
  1546 
       
  1547         case PCRE_NEWLINE_CRLF:
       
  1548         fprintf(outfile, "Forced newline sequence: CRLF\n");
       
  1549         break;
       
  1550 
       
  1551         case PCRE_NEWLINE_ANYCRLF:
       
  1552         fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
       
  1553         break;
       
  1554 
       
  1555         case PCRE_NEWLINE_ANY:
       
  1556         fprintf(outfile, "Forced newline sequence: ANY\n");
       
  1557         break;
       
  1558 
       
  1559         default:
       
  1560         break;
       
  1561         }
       
  1562 
       
  1563       if (first_char == -1)
       
  1564         {
       
  1565         fprintf(outfile, "First char at start or follows newline\n");
       
  1566         }
       
  1567       else if (first_char < 0)
       
  1568         {
       
  1569         fprintf(outfile, "No first char\n");
       
  1570         }
       
  1571       else
       
  1572         {
       
  1573         int ch = first_char & 255;
       
  1574         const char *caseless = ((first_char & REQ_CASELESS) == 0)?
       
  1575           "" : " (caseless)";
       
  1576         if (PRINTHEX(ch))
       
  1577           fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
       
  1578         else
       
  1579           fprintf(outfile, "First char = %d%s\n", ch, caseless);
       
  1580         }
       
  1581 
       
  1582       if (need_char < 0)
       
  1583         {
       
  1584         fprintf(outfile, "No need char\n");
       
  1585         }
       
  1586       else
       
  1587         {
       
  1588         int ch = need_char & 255;
       
  1589         const char *caseless = ((need_char & REQ_CASELESS) == 0)?
       
  1590           "" : " (caseless)";
       
  1591         if (PRINTHEX(ch))
       
  1592           fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
       
  1593         else
       
  1594           fprintf(outfile, "Need char = %d%s\n", ch, caseless);
       
  1595         }
       
  1596 
       
  1597       /* Don't output study size; at present it is in any case a fixed
       
  1598       value, but it varies, depending on the computer architecture, and
       
  1599       so messes up the test suite. (And with the /F option, it might be
       
  1600       flipped.) */
       
  1601 
       
  1602       if (do_study)
       
  1603         {
       
  1604         if (extra == NULL)
       
  1605           fprintf(outfile, "Study returned NULL\n");
       
  1606         else
       
  1607           {
       
  1608           uschar *start_bits = NULL;
       
  1609           new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
       
  1610 
       
  1611           if (start_bits == NULL)
       
  1612             fprintf(outfile, "No starting byte set\n");
       
  1613           else
       
  1614             {
       
  1615             int i;
       
  1616             int c = 24;
       
  1617             fprintf(outfile, "Starting byte set: ");
       
  1618             for (i = 0; i < 256; i++)
       
  1619               {
       
  1620               if ((start_bits[i/8] & (1<<(i&7))) != 0)
       
  1621                 {
       
  1622                 if (c > 75)
       
  1623                   {
       
  1624                   fprintf(outfile, "\n  ");
       
  1625                   c = 2;
       
  1626                   }
       
  1627                 if (PRINTHEX(i) && i != ' ')
       
  1628                   {
       
  1629                   fprintf(outfile, "%c ", i);
       
  1630                   c += 2;
       
  1631                   }
       
  1632                 else
       
  1633                   {
       
  1634                   fprintf(outfile, "\\x%02x ", i);
       
  1635                   c += 5;
       
  1636                   }
       
  1637                 }
       
  1638               }
       
  1639             fprintf(outfile, "\n");
       
  1640             }
       
  1641           }
       
  1642         }
       
  1643       }
       
  1644 
       
  1645     /* If the '>' option was present, we write out the regex to a file, and
       
  1646     that is all. The first 8 bytes of the file are the regex length and then
       
  1647     the study length, in big-endian order. */
       
  1648 
       
  1649     if (to_file != NULL)
       
  1650       {
       
  1651       FILE *f = fopen((char *)to_file, "wb");
       
  1652       if (f == NULL)
       
  1653         {
       
  1654         fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
       
  1655         }
       
  1656       else
       
  1657         {
       
  1658         uschar sbuf[8];
       
  1659         sbuf[0] = (uschar)((true_size >> 24) & 255);
       
  1660         sbuf[1] = (uschar)((true_size >> 16) & 255);
       
  1661         sbuf[2] = (uschar)((true_size >>  8) & 255);
       
  1662         sbuf[3] = (uschar)((true_size) & 255);
       
  1663 
       
  1664         sbuf[4] = (uschar)((true_study_size >> 24) & 255);
       
  1665         sbuf[5] = (uschar)((true_study_size >> 16) & 255);
       
  1666         sbuf[6] = (uschar)((true_study_size >>  8) & 255);
       
  1667         sbuf[7] = (uschar)((true_study_size) & 255);
       
  1668 
       
  1669         if (fwrite(sbuf, 1, 8, f) < 8 ||
       
  1670             fwrite(re, 1, true_size, f) < true_size)
       
  1671           {
       
  1672           fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
       
  1673           }
       
  1674         else
       
  1675           {
       
  1676           fprintf(outfile, "Compiled regex written to %s\n", to_file);
       
  1677           if (extra != NULL)
       
  1678             {
       
  1679             if (fwrite(extra->study_data, 1, true_study_size, f) <
       
  1680                 true_study_size)
       
  1681               {
       
  1682               fprintf(outfile, "Write error on %s: %s\n", to_file,
       
  1683                 strerror(errno));
       
  1684               }
       
  1685             else fprintf(outfile, "Study data written to %s\n", to_file);
       
  1686 
       
  1687             }
       
  1688           }
       
  1689         fclose(f);
       
  1690         }
       
  1691 
       
  1692       new_free(re);
       
  1693       if (extra != NULL) new_free(extra);
       
  1694       if (tables != NULL) new_free((void *)tables);
       
  1695       continue;  /* With next regex */
       
  1696       }
       
  1697     }        /* End of non-POSIX compile */
       
  1698 
       
  1699   /* Read data lines and test them */
       
  1700 
       
  1701   for (;;)
       
  1702     {
       
  1703     uschar *q;
       
  1704     uschar *bptr;
       
  1705     int *use_offsets = offsets;
       
  1706     int use_size_offsets = size_offsets;
       
  1707     int callout_data = 0;
       
  1708     int callout_data_set = 0;
       
  1709     int count, c;
       
  1710     int copystrings = 0;
       
  1711     int find_match_limit = 0;
       
  1712     int getstrings = 0;
       
  1713     int getlist = 0;
       
  1714     int gmatched = 0;
       
  1715     int start_offset = 0;
       
  1716     int g_notempty = 0;
       
  1717     int use_dfa = 0;
       
  1718 
       
  1719     options = 0;
       
  1720 
       
  1721     *copynames = 0;
       
  1722     *getnames = 0;
       
  1723 
       
  1724     copynamesptr = copynames;
       
  1725     getnamesptr = getnames;
       
  1726 
       
  1727     pcre_callout = callout;
       
  1728     first_callout = 1;
       
  1729     callout_extra = 0;
       
  1730     callout_count = 0;
       
  1731     callout_fail_count = 999999;
       
  1732     callout_fail_id = -1;
       
  1733     show_malloc = 0;
       
  1734 
       
  1735     if (extra != NULL) extra->flags &=
       
  1736       ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
       
  1737 
       
  1738     len = 0;
       
  1739     for (;;)
       
  1740       {
       
  1741       if (extend_inputline(infile, buffer + len, "data> ") == NULL)
       
  1742         {
       
  1743         if (len > 0) break;
       
  1744         done = 1;
       
  1745         goto CONTINUE;
       
  1746         }
       
  1747       if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
       
  1748       len = (int)strlen((char *)buffer);
       
  1749       if (buffer[len-1] == '\n') break;
       
  1750       }
       
  1751 
       
  1752     while (len > 0 && isspace(buffer[len-1])) len--;
       
  1753     buffer[len] = 0;
       
  1754     if (len == 0) break;
       
  1755 
       
  1756     p = buffer;
       
  1757     while (isspace(*p)) p++;
       
  1758 
       
  1759     bptr = q = dbuffer;
       
  1760     while ((c = *p++) != 0)
       
  1761       {
       
  1762       int i = 0;
       
  1763       int n = 0;
       
  1764 
       
  1765       if (c == '\\') switch ((c = *p++))
       
  1766         {
       
  1767         case 'a': c =    7; break;
       
  1768         case 'b': c = '\b'; break;
       
  1769         case 'e': c =   27; break;
       
  1770         case 'f': c = '\f'; break;
       
  1771         case 'n': c = '\n'; break;
       
  1772         case 'r': c = '\r'; break;
       
  1773         case 't': c = '\t'; break;
       
  1774         case 'v': c = '\v'; break;
       
  1775 
       
  1776         case '0': case '1': case '2': case '3':
       
  1777         case '4': case '5': case '6': case '7':
       
  1778         c -= '0';
       
  1779         while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
       
  1780           c = c * 8 + *p++ - '0';
       
  1781 
       
  1782 #if !defined NOUTF8
       
  1783         if (use_utf8 && c > 255)
       
  1784           {
       
  1785           unsigned char buff8[8];
       
  1786           int ii, utn;
       
  1787           utn = ord2utf8(c, buff8);
       
  1788           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
       
  1789           c = buff8[ii];   /* Last byte */
       
  1790           }
       
  1791 #endif
       
  1792         break;
       
  1793 
       
  1794         case 'x':
       
  1795 
       
  1796         /* Handle \x{..} specially - new Perl thing for utf8 */
       
  1797 
       
  1798 #if !defined NOUTF8
       
  1799         if (*p == '{')
       
  1800           {
       
  1801           unsigned char *pt = p;
       
  1802           c = 0;
       
  1803           while (isxdigit(*(++pt)))
       
  1804             c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
       
  1805           if (*pt == '}')
       
  1806             {
       
  1807             unsigned char buff8[8];
       
  1808             int ii, utn;
       
  1809             if (use_utf8)
       
  1810               {
       
  1811               utn = ord2utf8(c, buff8);
       
  1812               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
       
  1813               c = buff8[ii];   /* Last byte */
       
  1814               }
       
  1815             else
       
  1816              {
       
  1817              if (c > 255)
       
  1818                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
       
  1819                  "UTF-8 mode is not enabled.\n"
       
  1820                  "** Truncation will probably give the wrong result.\n", c);
       
  1821              }
       
  1822             p = pt + 1;
       
  1823             break;
       
  1824             }
       
  1825           /* Not correct form; fall through */
       
  1826           }
       
  1827 #endif
       
  1828 
       
  1829         /* Ordinary \x */
       
  1830 
       
  1831         c = 0;
       
  1832         while (i++ < 2 && isxdigit(*p))
       
  1833           {
       
  1834           c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
       
  1835           p++;
       
  1836           }
       
  1837         break;
       
  1838 
       
  1839         case 0:   /* \ followed by EOF allows for an empty line */
       
  1840         p--;
       
  1841         continue;
       
  1842 
       
  1843         case '>':
       
  1844         while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
       
  1845         continue;
       
  1846 
       
  1847         case 'A':  /* Option setting */
       
  1848         options |= PCRE_ANCHORED;
       
  1849         continue;
       
  1850 
       
  1851         case 'B':
       
  1852         options |= PCRE_NOTBOL;
       
  1853         continue;
       
  1854 
       
  1855         case 'C':
       
  1856         if (isdigit(*p))    /* Set copy string */
       
  1857           {
       
  1858           while(isdigit(*p)) n = n * 10 + *p++ - '0';
       
  1859           copystrings |= 1 << n;
       
  1860           }
       
  1861         else if (isalnum(*p))
       
  1862           {
       
  1863           uschar *npp = copynamesptr;
       
  1864           while (isalnum(*p)) *npp++ = *p++;
       
  1865           *npp++ = 0;
       
  1866           *npp = 0;
       
  1867           n = pcre_get_stringnumber(re, (char *)copynamesptr);
       
  1868           if (n < 0)
       
  1869             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
       
  1870           copynamesptr = npp;
       
  1871           }
       
  1872         else if (*p == '+')
       
  1873           {
       
  1874           callout_extra = 1;
       
  1875           p++;
       
  1876           }
       
  1877         else if (*p == '-')
       
  1878           {
       
  1879           pcre_callout = NULL;
       
  1880           p++;
       
  1881           }
       
  1882         else if (*p == '!')
       
  1883           {
       
  1884           callout_fail_id = 0;
       
  1885           p++;
       
  1886           while(isdigit(*p))
       
  1887             callout_fail_id = callout_fail_id * 10 + *p++ - '0';
       
  1888           callout_fail_count = 0;
       
  1889           if (*p == '!')
       
  1890             {
       
  1891             p++;
       
  1892             while(isdigit(*p))
       
  1893               callout_fail_count = callout_fail_count * 10 + *p++ - '0';
       
  1894             }
       
  1895           }
       
  1896         else if (*p == '*')
       
  1897           {
       
  1898           int sign = 1;
       
  1899           callout_data = 0;
       
  1900           if (*(++p) == '-') { sign = -1; p++; }
       
  1901           while(isdigit(*p))
       
  1902             callout_data = callout_data * 10 + *p++ - '0';
       
  1903           callout_data *= sign;
       
  1904           callout_data_set = 1;
       
  1905           }
       
  1906         continue;
       
  1907 
       
  1908 #if !defined NODFA
       
  1909         case 'D':
       
  1910 #if !defined NOPOSIX
       
  1911         if (posix || do_posix)
       
  1912           printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
       
  1913         else
       
  1914 #endif
       
  1915           use_dfa = 1;
       
  1916         continue;
       
  1917 
       
  1918         case 'F':
       
  1919         options |= PCRE_DFA_SHORTEST;
       
  1920         continue;
       
  1921 #endif
       
  1922 
       
  1923         case 'G':
       
  1924         if (isdigit(*p))
       
  1925           {
       
  1926           while(isdigit(*p)) n = n * 10 + *p++ - '0';
       
  1927           getstrings |= 1 << n;
       
  1928           }
       
  1929         else if (isalnum(*p))
       
  1930           {
       
  1931           uschar *npp = getnamesptr;
       
  1932           while (isalnum(*p)) *npp++ = *p++;
       
  1933           *npp++ = 0;
       
  1934           *npp = 0;
       
  1935           n = pcre_get_stringnumber(re, (char *)getnamesptr);
       
  1936           if (n < 0)
       
  1937             fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
       
  1938           getnamesptr = npp;
       
  1939           }
       
  1940         continue;
       
  1941 
       
  1942         case 'L':
       
  1943         getlist = 1;
       
  1944         continue;
       
  1945 
       
  1946         case 'M':
       
  1947         find_match_limit = 1;
       
  1948         continue;
       
  1949 
       
  1950         case 'N':
       
  1951         options |= PCRE_NOTEMPTY;
       
  1952         continue;
       
  1953 
       
  1954         case 'O':
       
  1955         while(isdigit(*p)) n = n * 10 + *p++ - '0';
       
  1956         if (n > size_offsets_max)
       
  1957           {
       
  1958           size_offsets_max = n;
       
  1959           free(offsets);
       
  1960           use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
       
  1961           if (offsets == NULL)
       
  1962             {
       
  1963             printf("** Failed to get %d bytes of memory for offsets vector\n",
       
  1964               (int)(size_offsets_max * sizeof(int)));
       
  1965             yield = 1;
       
  1966             goto EXIT;
       
  1967             }
       
  1968           }
       
  1969         use_size_offsets = n;
       
  1970         if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
       
  1971         continue;
       
  1972 
       
  1973         case 'P':
       
  1974         options |= PCRE_PARTIAL;
       
  1975         continue;
       
  1976 
       
  1977         case 'Q':
       
  1978         while(isdigit(*p)) n = n * 10 + *p++ - '0';
       
  1979         if (extra == NULL)
       
  1980           {
       
  1981           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
       
  1982           extra->flags = 0;
       
  1983           }
       
  1984         extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
       
  1985         extra->match_limit_recursion = n;
       
  1986         continue;
       
  1987 
       
  1988         case 'q':
       
  1989         while(isdigit(*p)) n = n * 10 + *p++ - '0';
       
  1990         if (extra == NULL)
       
  1991           {
       
  1992           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
       
  1993           extra->flags = 0;
       
  1994           }
       
  1995         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
       
  1996         extra->match_limit = n;
       
  1997         continue;
       
  1998 
       
  1999 #if !defined NODFA
       
  2000         case 'R':
       
  2001         options |= PCRE_DFA_RESTART;
       
  2002         continue;
       
  2003 #endif
       
  2004 
       
  2005         case 'S':
       
  2006         show_malloc = 1;
       
  2007         continue;
       
  2008 
       
  2009         case 'Z':
       
  2010         options |= PCRE_NOTEOL;
       
  2011         continue;
       
  2012 
       
  2013         case '?':
       
  2014         options |= PCRE_NO_UTF8_CHECK;
       
  2015         continue;
       
  2016 
       
  2017         case '<':
       
  2018           {
       
  2019           int x = check_newline(p, outfile);
       
  2020           if (x == 0) goto NEXT_DATA;
       
  2021           options |= x;
       
  2022           while (*p++ != '>');
       
  2023           }
       
  2024         continue;
       
  2025         }
       
  2026       *q++ = c;
       
  2027       }
       
  2028     *q = 0;
       
  2029     len = q - dbuffer;
       
  2030 
       
  2031     /* Move the data to the end of the buffer so that a read over the end of
       
  2032     the buffer will be seen by valgrind, even if it doesn't cause a crash. If
       
  2033     we are using the POSIX interface, we must include the terminating zero. */
       
  2034 
       
  2035 #if !defined NOPOSIX
       
  2036     if (posix || do_posix)
       
  2037       {
       
  2038       memmove(bptr + buffer_size - len - 1, bptr, len + 1);
       
  2039       bptr += buffer_size - len - 1;
       
  2040       }
       
  2041     else
       
  2042 #endif
       
  2043       {
       
  2044       memmove(bptr + buffer_size - len, bptr, len);
       
  2045       bptr += buffer_size - len;
       
  2046       }
       
  2047 
       
  2048     if ((all_use_dfa || use_dfa) && find_match_limit)
       
  2049       {
       
  2050       printf("**Match limit not relevant for DFA matching: ignored\n");
       
  2051       find_match_limit = 0;
       
  2052       }
       
  2053 
       
  2054     /* Handle matching via the POSIX interface, which does not
       
  2055     support timing or playing with the match limit or callout data. */
       
  2056 
       
  2057 #if !defined NOPOSIX
       
  2058     if (posix || do_posix)
       
  2059       {
       
  2060       int rc;
       
  2061       int eflags = 0;
       
  2062       regmatch_t *pmatch = NULL;
       
  2063       if (use_size_offsets > 0)
       
  2064         pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
       
  2065       if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
       
  2066       if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
       
  2067 
       
  2068       rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
       
  2069 
       
  2070       if (rc != 0)
       
  2071         {
       
  2072         (void)regerror(rc, &preg, (char *)buffer, buffer_size);
       
  2073         fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
       
  2074         }
       
  2075       else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
       
  2076               != 0)
       
  2077         {
       
  2078         fprintf(outfile, "Matched with REG_NOSUB\n");
       
  2079         }
       
  2080       else
       
  2081         {
       
  2082         size_t i;
       
  2083         for (i = 0; i < (size_t)use_size_offsets; i++)
       
  2084           {
       
  2085           if (pmatch[i].rm_so >= 0)
       
  2086             {
       
  2087             fprintf(outfile, "%2d: ", (int)i);
       
  2088             (void)pchars(dbuffer + pmatch[i].rm_so,
       
  2089               pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
       
  2090             fprintf(outfile, "\n");
       
  2091             if (i == 0 && do_showrest)
       
  2092               {
       
  2093               fprintf(outfile, " 0+ ");
       
  2094               (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
       
  2095                 outfile);
       
  2096               fprintf(outfile, "\n");
       
  2097               }
       
  2098             }
       
  2099           }
       
  2100         }
       
  2101       free(pmatch);
       
  2102       }
       
  2103 
       
  2104     /* Handle matching via the native interface - repeats for /g and /G */
       
  2105 
       
  2106     else
       
  2107 #endif  /* !defined NOPOSIX */
       
  2108 
       
  2109     for (;; gmatched++)    /* Loop for /g or /G */
       
  2110       {
       
  2111       if (timeitm > 0)
       
  2112         {
       
  2113         register int i;
       
  2114         clock_t time_taken;
       
  2115         clock_t start_time = clock();
       
  2116 
       
  2117 #if !defined NODFA
       
  2118         if (all_use_dfa || use_dfa)
       
  2119           {
       
  2120           int workspace[1000];
       
  2121           for (i = 0; i < timeitm; i++)
       
  2122             count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
       
  2123               options | g_notempty, use_offsets, use_size_offsets, workspace,
       
  2124               sizeof(workspace)/sizeof(int));
       
  2125           }
       
  2126         else
       
  2127 #endif
       
  2128 
       
  2129         for (i = 0; i < timeitm; i++)
       
  2130           count = pcre_exec(re, extra, (char *)bptr, len,
       
  2131             start_offset, options | g_notempty, use_offsets, use_size_offsets);
       
  2132 
       
  2133         time_taken = clock() - start_time;
       
  2134         fprintf(outfile, "Execute time %.4f milliseconds\n",
       
  2135           (((double)time_taken * 1000.0) / (double)timeitm) /
       
  2136             (double)CLOCKS_PER_SEC);
       
  2137         }
       
  2138 
       
  2139       /* If find_match_limit is set, we want to do repeated matches with
       
  2140       varying limits in order to find the minimum value for the match limit and
       
  2141       for the recursion limit. */
       
  2142 
       
  2143       if (find_match_limit)
       
  2144         {
       
  2145         if (extra == NULL)
       
  2146           {
       
  2147           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
       
  2148           extra->flags = 0;
       
  2149           }
       
  2150 
       
  2151         (void)check_match_limit(re, extra, bptr, len, start_offset,
       
  2152           options|g_notempty, use_offsets, use_size_offsets,
       
  2153           PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
       
  2154           PCRE_ERROR_MATCHLIMIT, "match()");
       
  2155 
       
  2156         count = check_match_limit(re, extra, bptr, len, start_offset,
       
  2157           options|g_notempty, use_offsets, use_size_offsets,
       
  2158           PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
       
  2159           PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
       
  2160         }
       
  2161 
       
  2162       /* If callout_data is set, use the interface with additional data */
       
  2163 
       
  2164       else if (callout_data_set)
       
  2165         {
       
  2166         if (extra == NULL)
       
  2167           {
       
  2168           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
       
  2169           extra->flags = 0;
       
  2170           }
       
  2171         extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
       
  2172         extra->callout_data = &callout_data;
       
  2173         count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
       
  2174           options | g_notempty, use_offsets, use_size_offsets);
       
  2175         extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
       
  2176         }
       
  2177 
       
  2178       /* The normal case is just to do the match once, with the default
       
  2179       value of match_limit. */
       
  2180 
       
  2181 #if !defined NODFA
       
  2182       else if (all_use_dfa || use_dfa)
       
  2183         {
       
  2184         int workspace[1000];
       
  2185         count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
       
  2186           options | g_notempty, use_offsets, use_size_offsets, workspace,
       
  2187           sizeof(workspace)/sizeof(int));
       
  2188         if (count == 0)
       
  2189           {
       
  2190           fprintf(outfile, "Matched, but too many subsidiary matches\n");
       
  2191           count = use_size_offsets/2;
       
  2192           }
       
  2193         }
       
  2194 #endif
       
  2195 
       
  2196       else
       
  2197         {
       
  2198         count = pcre_exec(re, extra, (char *)bptr, len,
       
  2199           start_offset, options | g_notempty, use_offsets, use_size_offsets);
       
  2200         if (count == 0)
       
  2201           {
       
  2202           fprintf(outfile, "Matched, but too many substrings\n");
       
  2203           count = use_size_offsets/3;
       
  2204           }
       
  2205         }
       
  2206 
       
  2207       /* Matched */
       
  2208 
       
  2209       if (count >= 0)
       
  2210         {
       
  2211         int i, maxcount;
       
  2212 
       
  2213 #if !defined NODFA
       
  2214         if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
       
  2215 #endif
       
  2216           maxcount = use_size_offsets/3;
       
  2217 
       
  2218         /* This is a check against a lunatic return value. */
       
  2219 
       
  2220         if (count > maxcount)
       
  2221           {
       
  2222           fprintf(outfile,
       
  2223             "** PCRE error: returned count %d is too big for offset size %d\n",
       
  2224             count, use_size_offsets);
       
  2225           count = use_size_offsets/3;
       
  2226           if (do_g || do_G)
       
  2227             {
       
  2228             fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
       
  2229             do_g = do_G = FALSE;        /* Break g/G loop */
       
  2230             }
       
  2231           }
       
  2232 
       
  2233         for (i = 0; i < count * 2; i += 2)
       
  2234           {
       
  2235           if (use_offsets[i] < 0)
       
  2236             fprintf(outfile, "%2d: <unset>\n", i/2);
       
  2237           else
       
  2238             {
       
  2239             fprintf(outfile, "%2d: ", i/2);
       
  2240             (void)pchars(bptr + use_offsets[i],
       
  2241               use_offsets[i+1] - use_offsets[i], outfile);
       
  2242             fprintf(outfile, "\n");
       
  2243             if (i == 0)
       
  2244               {
       
  2245               if (do_showrest)
       
  2246                 {
       
  2247                 fprintf(outfile, " 0+ ");
       
  2248                 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
       
  2249                   outfile);
       
  2250                 fprintf(outfile, "\n");
       
  2251                 }
       
  2252               }
       
  2253             }
       
  2254           }
       
  2255 
       
  2256         for (i = 0; i < 32; i++)
       
  2257           {
       
  2258           if ((copystrings & (1 << i)) != 0)
       
  2259             {
       
  2260             char copybuffer[256];
       
  2261             int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
       
  2262               i, copybuffer, sizeof(copybuffer));
       
  2263             if (rc < 0)
       
  2264               fprintf(outfile, "copy substring %d failed %d\n", i, rc);
       
  2265             else
       
  2266               fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
       
  2267             }
       
  2268           }
       
  2269 
       
  2270         for (copynamesptr = copynames;
       
  2271              *copynamesptr != 0;
       
  2272              copynamesptr += (int)strlen((char*)copynamesptr) + 1)
       
  2273           {
       
  2274           char copybuffer[256];
       
  2275           int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
       
  2276             count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
       
  2277           if (rc < 0)
       
  2278             fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
       
  2279           else
       
  2280             fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
       
  2281           }
       
  2282 
       
  2283         for (i = 0; i < 32; i++)
       
  2284           {
       
  2285           if ((getstrings & (1 << i)) != 0)
       
  2286             {
       
  2287             const char *substring;
       
  2288             int rc = pcre_get_substring((char *)bptr, use_offsets, count,
       
  2289               i, &substring);
       
  2290             if (rc < 0)
       
  2291               fprintf(outfile, "get substring %d failed %d\n", i, rc);
       
  2292             else
       
  2293               {
       
  2294               fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
       
  2295               pcre_free_substring(substring);
       
  2296               }
       
  2297             }
       
  2298           }
       
  2299 
       
  2300         for (getnamesptr = getnames;
       
  2301              *getnamesptr != 0;
       
  2302              getnamesptr += (int)strlen((char*)getnamesptr) + 1)
       
  2303           {
       
  2304           const char *substring;
       
  2305           int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
       
  2306             count, (char *)getnamesptr, &substring);
       
  2307           if (rc < 0)
       
  2308             fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
       
  2309           else
       
  2310             {
       
  2311             fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
       
  2312             pcre_free_substring(substring);
       
  2313             }
       
  2314           }
       
  2315 
       
  2316         if (getlist)
       
  2317           {
       
  2318           const char **stringlist;
       
  2319           int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
       
  2320             &stringlist);
       
  2321           if (rc < 0)
       
  2322             fprintf(outfile, "get substring list failed %d\n", rc);
       
  2323           else
       
  2324             {
       
  2325             for (i = 0; i < count; i++)
       
  2326               fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
       
  2327             if (stringlist[i] != NULL)
       
  2328               fprintf(outfile, "string list not terminated by NULL\n");
       
  2329             /* free((void *)stringlist); */
       
  2330             pcre_free_substring_list(stringlist);
       
  2331             }
       
  2332           }
       
  2333         }
       
  2334 
       
  2335       /* There was a partial match */
       
  2336 
       
  2337       else if (count == PCRE_ERROR_PARTIAL)
       
  2338         {
       
  2339         fprintf(outfile, "Partial match");
       
  2340 #if !defined NODFA
       
  2341         if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
       
  2342           fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
       
  2343             bptr + use_offsets[0]);
       
  2344 #endif
       
  2345         fprintf(outfile, "\n");
       
  2346         break;  /* Out of the /g loop */
       
  2347         }
       
  2348 
       
  2349       /* Failed to match. If this is a /g or /G loop and we previously set
       
  2350       g_notempty after a null match, this is not necessarily the end. We want
       
  2351       to advance the start offset, and continue. We won't be at the end of the
       
  2352       string - that was checked before setting g_notempty.
       
  2353 
       
  2354       Complication arises in the case when the newline option is "any" or
       
  2355       "anycrlf". If the previous match was at the end of a line terminated by
       
  2356       CRLF, an advance of one character just passes the \r, whereas we should
       
  2357       prefer the longer newline sequence, as does the code in pcre_exec().
       
  2358       Fudge the offset value to achieve this.
       
  2359 
       
  2360       Otherwise, in the case of UTF-8 matching, the advance must be one
       
  2361       character, not one byte. */
       
  2362 
       
  2363       else
       
  2364         {
       
  2365         if (g_notempty != 0)
       
  2366           {
       
  2367           int onechar = 1;
       
  2368           unsigned int obits = ((real_pcre *)re)->options;
       
  2369           use_offsets[0] = start_offset;
       
  2370           if ((obits & PCRE_NEWLINE_BITS) == 0)
       
  2371             {
       
  2372             int d;
       
  2373             (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
       
  2374             obits = (d == '\r')? PCRE_NEWLINE_CR :
       
  2375                     (d == '\n')? PCRE_NEWLINE_LF :
       
  2376                     (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
       
  2377                     (d == -2)? PCRE_NEWLINE_ANYCRLF :
       
  2378                     (d == -1)? PCRE_NEWLINE_ANY : 0;
       
  2379             }
       
  2380           if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
       
  2381                (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
       
  2382               &&
       
  2383               start_offset < len - 1 &&
       
  2384               bptr[start_offset] == '\r' &&
       
  2385               bptr[start_offset+1] == '\n')
       
  2386             onechar++;
       
  2387           else if (use_utf8)
       
  2388             {
       
  2389             while (start_offset + onechar < len)
       
  2390               {
       
  2391               int tb = bptr[start_offset+onechar];
       
  2392               if (tb <= 127) break;
       
  2393               tb &= 0xc0;
       
  2394               if (tb != 0 && tb != 0xc0) onechar++;
       
  2395               }
       
  2396             }
       
  2397           use_offsets[1] = start_offset + onechar;
       
  2398           }
       
  2399         else
       
  2400           {
       
  2401           if (count == PCRE_ERROR_NOMATCH)
       
  2402             {
       
  2403             if (gmatched == 0) fprintf(outfile, "No match\n");
       
  2404             }
       
  2405           else fprintf(outfile, "Error %d\n", count);
       
  2406           break;  /* Out of the /g loop */
       
  2407           }
       
  2408         }
       
  2409 
       
  2410       /* If not /g or /G we are done */
       
  2411 
       
  2412       if (!do_g && !do_G) break;
       
  2413 
       
  2414       /* If we have matched an empty string, first check to see if we are at
       
  2415       the end of the subject. If so, the /g loop is over. Otherwise, mimic
       
  2416       what Perl's /g options does. This turns out to be rather cunning. First
       
  2417       we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
       
  2418       same point. If this fails (picked up above) we advance to the next
       
  2419       character. */
       
  2420 
       
  2421       g_notempty = 0;
       
  2422 
       
  2423       if (use_offsets[0] == use_offsets[1])
       
  2424         {
       
  2425         if (use_offsets[0] == len) break;
       
  2426         g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
       
  2427         }
       
  2428 
       
  2429       /* For /g, update the start offset, leaving the rest alone */
       
  2430 
       
  2431       if (do_g) start_offset = use_offsets[1];
       
  2432 
       
  2433       /* For /G, update the pointer and length */
       
  2434 
       
  2435       else
       
  2436         {
       
  2437         bptr += use_offsets[1];
       
  2438         len -= use_offsets[1];
       
  2439         }
       
  2440       }  /* End of loop for /g and /G */
       
  2441 
       
  2442     NEXT_DATA: continue;
       
  2443     }    /* End of loop for data lines */
       
  2444 
       
  2445   CONTINUE:
       
  2446 
       
  2447 #if !defined NOPOSIX
       
  2448   if (posix || do_posix) regfree(&preg);
       
  2449 #endif
       
  2450 
       
  2451   if (re != NULL) new_free(re);
       
  2452   if (extra != NULL) new_free(extra);
       
  2453   if (tables != NULL)
       
  2454     {
       
  2455     new_free((void *)tables);
       
  2456     setlocale(LC_CTYPE, "C");
       
  2457     locale_set = 0;
       
  2458     }
       
  2459   }
       
  2460 
       
  2461 if (infile == stdin) fprintf(outfile, "\n");
       
  2462 
       
  2463 EXIT:
       
  2464 
       
  2465 if (infile != NULL && infile != stdin) fclose(infile);
       
  2466 if (outfile != NULL && outfile != stdout) fclose(outfile);
       
  2467 
       
  2468 free(buffer);
       
  2469 free(dbuffer);
       
  2470 free(pbuffer);
       
  2471 free(offsets);
       
  2472 
       
  2473 return yield;
       
  2474 }
       
  2475 
       
  2476 /* End of pcretest.c */