libraries/spcre/libpcre/pcre/pcre_printint.src
changeset 0 7f656887cf89
equal deleted inserted replaced
-1:000000000000 0:7f656887cf89
       
     1 /*************************************************
       
     2 *      Perl-Compatible Regular Expressions       *
       
     3 *************************************************/
       
     4 
       
     5 /* PCRE is a library of functions to support regular expressions whose syntax
       
     6 and semantics are as close as possible to those of the Perl 5 language.
       
     7 
       
     8                        Written by Philip Hazel
       
     9            Copyright (c) 1997-2008 University of Cambridge
       
    10 
       
    11 -----------------------------------------------------------------------------
       
    12 Redistribution and use in source and binary forms, with or without
       
    13 modification, are permitted provided that the following conditions are met:
       
    14 
       
    15     * Redistributions of source code must retain the above copyright notice,
       
    16       this list of conditions and the following disclaimer.
       
    17 
       
    18     * Redistributions in binary form must reproduce the above copyright
       
    19       notice, this list of conditions and the following disclaimer in the
       
    20       documentation and/or other materials provided with the distribution.
       
    21 
       
    22     * Neither the name of the University of Cambridge nor the names of its
       
    23       contributors may be used to endorse or promote products derived from
       
    24       this software without specific prior written permission.
       
    25 
       
    26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
       
    27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
       
    30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       
    31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
       
    32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
       
    33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
       
    34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
       
    35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    36 POSSIBILITY OF SUCH DAMAGE.
       
    37 -----------------------------------------------------------------------------
       
    38 */
       
    39 
       
    40 
       
    41 /* This module contains a PCRE private debugging function for printing out the
       
    42 internal form of a compiled regular expression, along with some supporting
       
    43 local functions. This source file is used in two places:
       
    44 
       
    45 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
       
    46 (DEBUG defined in pcre_internal.h). It is not included in production compiles.
       
    47 
       
    48 (2) It is always #included by pcretest.c, which can be asked to print out a
       
    49 compiled regex for debugging purposes. */
       
    50 
       
    51 
       
    52 /* Macro that decides whether a character should be output as a literal or in
       
    53 hexadecimal. We don't use isprint() because that can vary from system to system
       
    54 (even without the use of locales) and we want the output always to be the same,
       
    55 for testing purposes. This macro is used in pcretest as well as in this file. */
       
    56 
       
    57 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
       
    58 
       
    59 /* The table of operator names. */
       
    60 
       
    61 static const char *OP_names[] = { OP_NAME_LIST };
       
    62 
       
    63 
       
    64 
       
    65 /*************************************************
       
    66 *       Print single- or multi-byte character    *
       
    67 *************************************************/
       
    68 
       
    69 static int
       
    70 print_char(FILE *f, uschar *ptr, BOOL utf8)
       
    71 {
       
    72 int c = *ptr;
       
    73 
       
    74 #ifndef SUPPORT_UTF8
       
    75 utf8 = utf8;  /* Avoid compiler warning */
       
    76 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
       
    77 return 0;
       
    78 
       
    79 #else
       
    80 if (!utf8 || (c & 0xc0) != 0xc0)
       
    81   {
       
    82   if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
       
    83   return 0;
       
    84   }
       
    85 else
       
    86   {
       
    87   int i;
       
    88   int a = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */
       
    89   int s = 6*a;
       
    90   c = (c & _pcre_utf8_table3[a]) << s;
       
    91   for (i = 1; i <= a; i++)
       
    92     {
       
    93     /* This is a check for malformed UTF-8; it should only occur if the sanity
       
    94     check has been turned off. Rather than swallow random bytes, just stop if
       
    95     we hit a bad one. Print it with \X instead of \x as an indication. */
       
    96 
       
    97     if ((ptr[i] & 0xc0) != 0x80)
       
    98       {
       
    99       fprintf(f, "\\X{%x}", c);
       
   100       return i - 1;
       
   101       }
       
   102 
       
   103     /* The byte is OK */
       
   104 
       
   105     s -= 6;
       
   106     c |= (ptr[i] & 0x3f) << s;
       
   107     }
       
   108   if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
       
   109   return a;
       
   110   }
       
   111 #endif
       
   112 }
       
   113 
       
   114 
       
   115 
       
   116 /*************************************************
       
   117 *          Find Unicode property name            *
       
   118 *************************************************/
       
   119 
       
   120 static const char *
       
   121 get_ucpname(int ptype, int pvalue)
       
   122 {
       
   123 #ifdef SUPPORT_UCP
       
   124 int i;
       
   125 for (i = _pcre_utt_size - 1; i >= 0; i--)
       
   126   {
       
   127   if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
       
   128   }
       
   129 return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
       
   130 #else
       
   131 /* It gets harder and harder to shut off unwanted compiler warnings. */
       
   132 ptype = ptype * pvalue;
       
   133 return (ptype == pvalue)? "??" : "??";
       
   134 #endif
       
   135 }
       
   136 
       
   137 
       
   138 
       
   139 /*************************************************
       
   140 *         Print compiled regex                   *
       
   141 *************************************************/
       
   142 
       
   143 /* Make this function work for a regex with integers either byte order.
       
   144 However, we assume that what we are passed is a compiled regex. The
       
   145 print_lengths flag controls whether offsets and lengths of items are printed.
       
   146 They can be turned off from pcretest so that automatic tests on bytecode can be
       
   147 written that do not depend on the value of LINK_SIZE. */
       
   148 
       
   149 static void
       
   150 pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
       
   151 {
       
   152 real_pcre *re = (real_pcre *)external_re;
       
   153 uschar *codestart, *code;
       
   154 BOOL utf8;
       
   155 
       
   156 unsigned int options = re->options;
       
   157 int offset = re->name_table_offset;
       
   158 int count = re->name_count;
       
   159 int size = re->name_entry_size;
       
   160 
       
   161 if (re->magic_number != MAGIC_NUMBER)
       
   162   {
       
   163   offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
       
   164   count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
       
   165   size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
       
   166   options = ((options << 24) & 0xff000000) |
       
   167             ((options <<  8) & 0x00ff0000) |
       
   168             ((options >>  8) & 0x0000ff00) |
       
   169             ((options >> 24) & 0x000000ff);
       
   170   }
       
   171 
       
   172 code = codestart = (uschar *)re + offset + count * size;
       
   173 utf8 = (options & PCRE_UTF8) != 0;
       
   174 
       
   175 for(;;)
       
   176   {
       
   177   uschar *ccode;
       
   178   int c;
       
   179   int extra = 0;
       
   180 
       
   181   if (print_lengths)
       
   182     fprintf(f, "%3d ", (int)(code - codestart));
       
   183   else
       
   184     fprintf(f, "    ");
       
   185 
       
   186   switch(*code)
       
   187     {
       
   188     case OP_END:
       
   189     fprintf(f, "    %s\n", OP_names[*code]);
       
   190     fprintf(f, "------------------------------------------------------------------\n");
       
   191     return;
       
   192 
       
   193     case OP_OPT:
       
   194     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
       
   195     break;
       
   196 
       
   197     case OP_CHAR:
       
   198     fprintf(f, "    ");
       
   199     do
       
   200       {
       
   201       code++;
       
   202       code += 1 + print_char(f, code, utf8);
       
   203       }
       
   204     while (*code == OP_CHAR);
       
   205     fprintf(f, "\n");
       
   206     continue;
       
   207 
       
   208     case OP_CHARNC:
       
   209     fprintf(f, " NC ");
       
   210     do
       
   211       {
       
   212       code++;
       
   213       code += 1 + print_char(f, code, utf8);
       
   214       }
       
   215     while (*code == OP_CHARNC);
       
   216     fprintf(f, "\n");
       
   217     continue;
       
   218 
       
   219     case OP_CBRA:
       
   220     case OP_SCBRA:
       
   221     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
       
   222       else fprintf(f, "    ");
       
   223     fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
       
   224     break;
       
   225 
       
   226     case OP_BRA:
       
   227     case OP_SBRA:
       
   228     case OP_KETRMAX:
       
   229     case OP_KETRMIN:
       
   230     case OP_ALT:
       
   231     case OP_KET:
       
   232     case OP_ASSERT:
       
   233     case OP_ASSERT_NOT:
       
   234     case OP_ASSERTBACK:
       
   235     case OP_ASSERTBACK_NOT:
       
   236     case OP_ONCE:
       
   237     case OP_COND:
       
   238     case OP_SCOND:
       
   239     case OP_REVERSE:
       
   240     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
       
   241       else fprintf(f, "    ");
       
   242     fprintf(f, "%s", OP_names[*code]);
       
   243     break;
       
   244 
       
   245     case OP_CREF:
       
   246     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
       
   247     break;
       
   248 
       
   249     case OP_RREF:
       
   250     c = GET2(code, 1);
       
   251     if (c == RREF_ANY)
       
   252       fprintf(f, "    Cond recurse any");
       
   253     else
       
   254       fprintf(f, "    Cond recurse %d", c);
       
   255     break;
       
   256 
       
   257     case OP_DEF:
       
   258     fprintf(f, "    Cond def");
       
   259     break;
       
   260 
       
   261     case OP_STAR:
       
   262     case OP_MINSTAR:
       
   263     case OP_POSSTAR:
       
   264     case OP_PLUS:
       
   265     case OP_MINPLUS:
       
   266     case OP_POSPLUS:
       
   267     case OP_QUERY:
       
   268     case OP_MINQUERY:
       
   269     case OP_POSQUERY:
       
   270     case OP_TYPESTAR:
       
   271     case OP_TYPEMINSTAR:
       
   272     case OP_TYPEPOSSTAR:
       
   273     case OP_TYPEPLUS:
       
   274     case OP_TYPEMINPLUS:
       
   275     case OP_TYPEPOSPLUS:
       
   276     case OP_TYPEQUERY:
       
   277     case OP_TYPEMINQUERY:
       
   278     case OP_TYPEPOSQUERY:
       
   279     fprintf(f, "    ");
       
   280     if (*code >= OP_TYPESTAR)
       
   281       {
       
   282       fprintf(f, "%s", OP_names[code[1]]);
       
   283       if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
       
   284         {
       
   285         fprintf(f, " %s ", get_ucpname(code[2], code[3]));
       
   286         extra = 2;
       
   287         }
       
   288       }
       
   289     else extra = print_char(f, code+1, utf8);
       
   290     fprintf(f, "%s", OP_names[*code]);
       
   291     break;
       
   292 
       
   293     case OP_EXACT:
       
   294     case OP_UPTO:
       
   295     case OP_MINUPTO:
       
   296     case OP_POSUPTO:
       
   297     fprintf(f, "    ");
       
   298     extra = print_char(f, code+3, utf8);
       
   299     fprintf(f, "{");
       
   300     if (*code != OP_EXACT) fprintf(f, "0,");
       
   301     fprintf(f, "%d}", GET2(code,1));
       
   302     if (*code == OP_MINUPTO) fprintf(f, "?");
       
   303       else if (*code == OP_POSUPTO) fprintf(f, "+");
       
   304     break;
       
   305 
       
   306     case OP_TYPEEXACT:
       
   307     case OP_TYPEUPTO:
       
   308     case OP_TYPEMINUPTO:
       
   309     case OP_TYPEPOSUPTO:
       
   310     fprintf(f, "    %s", OP_names[code[3]]);
       
   311     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
       
   312       {
       
   313       fprintf(f, " %s ", get_ucpname(code[4], code[5]));
       
   314       extra = 2;
       
   315       }
       
   316     fprintf(f, "{");
       
   317     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
       
   318     fprintf(f, "%d}", GET2(code,1));
       
   319     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
       
   320       else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
       
   321     break;
       
   322 
       
   323     case OP_NOT:
       
   324     c = code[1];
       
   325     if (PRINTABLE(c)) fprintf(f, "    [^%c]", c);
       
   326       else fprintf(f, "    [^\\x%02x]", c);
       
   327     break;
       
   328 
       
   329     case OP_NOTSTAR:
       
   330     case OP_NOTMINSTAR:
       
   331     case OP_NOTPOSSTAR:
       
   332     case OP_NOTPLUS:
       
   333     case OP_NOTMINPLUS:
       
   334     case OP_NOTPOSPLUS:
       
   335     case OP_NOTQUERY:
       
   336     case OP_NOTMINQUERY:
       
   337     case OP_NOTPOSQUERY:
       
   338     c = code[1];
       
   339     if (PRINTABLE(c)) fprintf(f, "    [^%c]", c);
       
   340       else fprintf(f, "    [^\\x%02x]", c);
       
   341     fprintf(f, "%s", OP_names[*code]);
       
   342     break;
       
   343 
       
   344     case OP_NOTEXACT:
       
   345     case OP_NOTUPTO:
       
   346     case OP_NOTMINUPTO:
       
   347     case OP_NOTPOSUPTO:
       
   348     c = code[3];
       
   349     if (PRINTABLE(c)) fprintf(f, "    [^%c]{", c);
       
   350       else fprintf(f, "    [^\\x%02x]{", c);
       
   351     if (*code != OP_NOTEXACT) fprintf(f, "0,");
       
   352     fprintf(f, "%d}", GET2(code,1));
       
   353     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
       
   354       else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
       
   355     break;
       
   356 
       
   357     case OP_RECURSE:
       
   358     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
       
   359       else fprintf(f, "    ");
       
   360     fprintf(f, "%s", OP_names[*code]);
       
   361     break;
       
   362 
       
   363     case OP_REF:
       
   364     fprintf(f, "    \\%d", GET2(code,1));
       
   365     ccode = code + _pcre_OP_lengths[*code];
       
   366     goto CLASS_REF_REPEAT;
       
   367 
       
   368     case OP_CALLOUT:
       
   369     fprintf(f, "    %s %d %d %d", OP_names[*code], code[1], GET(code,2),
       
   370       GET(code, 2 + LINK_SIZE));
       
   371     break;
       
   372 
       
   373     case OP_PROP:
       
   374     case OP_NOTPROP:
       
   375     fprintf(f, "    %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
       
   376     break;
       
   377 
       
   378     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
       
   379     having this code always here, and it makes it less messy without all those
       
   380     #ifdefs. */
       
   381 
       
   382     case OP_CLASS:
       
   383     case OP_NCLASS:
       
   384     case OP_XCLASS:
       
   385       {
       
   386       int i, min, max;
       
   387       BOOL printmap;
       
   388 
       
   389       fprintf(f, "    [");
       
   390 
       
   391       if (*code == OP_XCLASS)
       
   392         {
       
   393         extra = GET(code, 1);
       
   394         ccode = code + LINK_SIZE + 1;
       
   395         printmap = (*ccode & XCL_MAP) != 0;
       
   396         if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
       
   397         }
       
   398       else
       
   399         {
       
   400         printmap = TRUE;
       
   401         ccode = code + 1;
       
   402         }
       
   403 
       
   404       /* Print a bit map */
       
   405 
       
   406       if (printmap)
       
   407         {
       
   408         for (i = 0; i < 256; i++)
       
   409           {
       
   410           if ((ccode[i/8] & (1 << (i&7))) != 0)
       
   411             {
       
   412             int j;
       
   413             for (j = i+1; j < 256; j++)
       
   414               if ((ccode[j/8] & (1 << (j&7))) == 0) break;
       
   415             if (i == '-' || i == ']') fprintf(f, "\\");
       
   416             if (PRINTABLE(i)) fprintf(f, "%c", i);
       
   417               else fprintf(f, "\\x%02x", i);
       
   418             if (--j > i)
       
   419               {
       
   420               if (j != i + 1) fprintf(f, "-");
       
   421               if (j == '-' || j == ']') fprintf(f, "\\");
       
   422               if (PRINTABLE(j)) fprintf(f, "%c", j);
       
   423                 else fprintf(f, "\\x%02x", j);
       
   424               }
       
   425             i = j;
       
   426             }
       
   427           }
       
   428         ccode += 32;
       
   429         }
       
   430 
       
   431       /* For an XCLASS there is always some additional data */
       
   432 
       
   433       if (*code == OP_XCLASS)
       
   434         {
       
   435         int ch;
       
   436         while ((ch = *ccode++) != XCL_END)
       
   437           {
       
   438           if (ch == XCL_PROP)
       
   439             {
       
   440             int ptype = *ccode++;
       
   441             int pvalue = *ccode++;
       
   442             fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
       
   443             }
       
   444           else if (ch == XCL_NOTPROP)
       
   445             {
       
   446             int ptype = *ccode++;
       
   447             int pvalue = *ccode++;
       
   448             fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
       
   449             }
       
   450           else
       
   451             {
       
   452             ccode += 1 + print_char(f, ccode, TRUE);
       
   453             if (ch == XCL_RANGE)
       
   454               {
       
   455               fprintf(f, "-");
       
   456               ccode += 1 + print_char(f, ccode, TRUE);
       
   457               }
       
   458             }
       
   459           }
       
   460         }
       
   461 
       
   462       /* Indicate a non-UTF8 class which was created by negation */
       
   463 
       
   464       fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
       
   465 
       
   466       /* Handle repeats after a class or a back reference */
       
   467 
       
   468       CLASS_REF_REPEAT:
       
   469       switch(*ccode)
       
   470         {
       
   471         case OP_CRSTAR:
       
   472         case OP_CRMINSTAR:
       
   473         case OP_CRPLUS:
       
   474         case OP_CRMINPLUS:
       
   475         case OP_CRQUERY:
       
   476         case OP_CRMINQUERY:
       
   477         fprintf(f, "%s", OP_names[*ccode]);
       
   478         extra += _pcre_OP_lengths[*ccode];
       
   479         break;
       
   480 
       
   481         case OP_CRRANGE:
       
   482         case OP_CRMINRANGE:
       
   483         min = GET2(ccode,1);
       
   484         max = GET2(ccode,3);
       
   485         if (max == 0) fprintf(f, "{%d,}", min);
       
   486         else fprintf(f, "{%d,%d}", min, max);
       
   487         if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
       
   488         extra += _pcre_OP_lengths[*ccode];
       
   489         break;
       
   490 
       
   491         /* Do nothing if it's not a repeat; this code stops picky compilers
       
   492         warning about the lack of a default code path. */
       
   493 
       
   494         default:
       
   495         break;
       
   496         }
       
   497       }
       
   498     break;
       
   499 
       
   500     /* Anything else is just an item with no data*/
       
   501 
       
   502     default:
       
   503     fprintf(f, "    %s", OP_names[*code]);
       
   504     break;
       
   505     }
       
   506 
       
   507   code += _pcre_OP_lengths[*code] + extra;
       
   508   fprintf(f, "\n");
       
   509   }
       
   510 }
       
   511 
       
   512 /* End of pcre_printint.src */