libraries/spcre/libpcre/pcre/pcreposix.c
changeset 0 7f656887cf89
equal deleted inserted replaced
-1:000000000000 0:7f656887cf89
       
     1 /*************************************************
       
     2 *      Perl-Compatible Regular Expressions       *
       
     3 *************************************************/
       
     4 
       
     5 /* PCRE is a library of functions to support regular expressions whose syntax
       
     6 and semantics are as close as possible to those of the Perl 5 language.
       
     7 
       
     8                        Written by Philip Hazel
       
     9            Copyright (c) 1997-2008 University of Cambridge
       
    10 
       
    11 -----------------------------------------------------------------------------
       
    12 Redistribution and use in source and binary forms, with or without
       
    13 modification, are permitted provided that the following conditions are met:
       
    14 
       
    15     * Redistributions of source code must retain the above copyright notice,
       
    16       this list of conditions and the following disclaimer.
       
    17 
       
    18     * Redistributions in binary form must reproduce the above copyright
       
    19       notice, this list of conditions and the following disclaimer in the
       
    20       documentation and/or other materials provided with the distribution.
       
    21 
       
    22     * Neither the name of the University of Cambridge nor the names of its
       
    23       contributors may be used to endorse or promote products derived from
       
    24       this software without specific prior written permission.
       
    25 
       
    26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
       
    27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
       
    30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       
    31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
       
    32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
       
    33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
       
    34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
       
    35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    36 POSSIBILITY OF SUCH DAMAGE.
       
    37 -----------------------------------------------------------------------------
       
    38 */
       
    39 
       
    40 
       
    41 /* This module is a wrapper that provides a POSIX API to the underlying PCRE
       
    42 functions. */
       
    43 
       
    44 
       
    45 #ifdef HAVE_CONFIG_H
       
    46 #include "config.h"
       
    47 #endif
       
    48 
       
    49 
       
    50 /* Ensure that the PCREPOSIX_EXP_xxx macros are set appropriately for
       
    51 compiling these functions. This must come before including pcreposix.h, where
       
    52 they are set for an application (using these functions) if they have not
       
    53 previously been set. */
       
    54 
       
    55 #if defined(_WIN32) && !defined(PCRE_STATIC)
       
    56 #  define PCREPOSIX_EXP_DECL extern __declspec(dllexport)
       
    57 #  define PCREPOSIX_EXP_DEFN __declspec(dllexport)
       
    58 #endif
       
    59 
       
    60 #include "pcre.h"
       
    61 #include "pcre_internal.h"
       
    62 #include "pcreposix.h"
       
    63 
       
    64 
       
    65 /* Table to translate PCRE compile time error codes into POSIX error codes. */
       
    66 
       
    67 static const int eint[] = {
       
    68   0,           /* no error */
       
    69   REG_EESCAPE, /* \ at end of pattern */
       
    70   REG_EESCAPE, /* \c at end of pattern */
       
    71   REG_EESCAPE, /* unrecognized character follows \ */
       
    72   REG_BADBR,   /* numbers out of order in {} quantifier */
       
    73   REG_BADBR,   /* number too big in {} quantifier */
       
    74   REG_EBRACK,  /* missing terminating ] for character class */
       
    75   REG_ECTYPE,  /* invalid escape sequence in character class */
       
    76   REG_ERANGE,  /* range out of order in character class */
       
    77   REG_BADRPT,  /* nothing to repeat */
       
    78   REG_BADRPT,  /* operand of unlimited repeat could match the empty string */
       
    79   REG_ASSERT,  /* internal error: unexpected repeat */
       
    80   REG_BADPAT,  /* unrecognized character after (? */
       
    81   REG_BADPAT,  /* POSIX named classes are supported only within a class */
       
    82   REG_EPAREN,  /* missing ) */
       
    83   REG_ESUBREG, /* reference to non-existent subpattern */
       
    84   REG_INVARG,  /* erroffset passed as NULL */
       
    85   REG_INVARG,  /* unknown option bit(s) set */
       
    86   REG_EPAREN,  /* missing ) after comment */
       
    87   REG_ESIZE,   /* parentheses nested too deeply */
       
    88   REG_ESIZE,   /* regular expression too large */
       
    89   REG_ESPACE,  /* failed to get memory */
       
    90   REG_EPAREN,  /* unmatched brackets */
       
    91   REG_ASSERT,  /* internal error: code overflow */
       
    92   REG_BADPAT,  /* unrecognized character after (?< */
       
    93   REG_BADPAT,  /* lookbehind assertion is not fixed length */
       
    94   REG_BADPAT,  /* malformed number or name after (?( */
       
    95   REG_BADPAT,  /* conditional group contains more than two branches */
       
    96   REG_BADPAT,  /* assertion expected after (?( */
       
    97   REG_BADPAT,  /* (?R or (?[+-]digits must be followed by ) */
       
    98   REG_ECTYPE,  /* unknown POSIX class name */
       
    99   REG_BADPAT,  /* POSIX collating elements are not supported */
       
   100   REG_INVARG,  /* this version of PCRE is not compiled with PCRE_UTF8 support */
       
   101   REG_BADPAT,  /* spare error */
       
   102   REG_BADPAT,  /* character value in \x{...} sequence is too large */
       
   103   REG_BADPAT,  /* invalid condition (?(0) */
       
   104   REG_BADPAT,  /* \C not allowed in lookbehind assertion */
       
   105   REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */
       
   106   REG_BADPAT,  /* number after (?C is > 255 */
       
   107   REG_BADPAT,  /* closing ) for (?C expected */
       
   108   REG_BADPAT,  /* recursive call could loop indefinitely */
       
   109   REG_BADPAT,  /* unrecognized character after (?P */
       
   110   REG_BADPAT,  /* syntax error in subpattern name (missing terminator) */
       
   111   REG_BADPAT,  /* two named subpatterns have the same name */
       
   112   REG_BADPAT,  /* invalid UTF-8 string */
       
   113   REG_BADPAT,  /* support for \P, \p, and \X has not been compiled */
       
   114   REG_BADPAT,  /* malformed \P or \p sequence */
       
   115   REG_BADPAT,  /* unknown property name after \P or \p */
       
   116   REG_BADPAT,  /* subpattern name is too long (maximum 32 characters) */
       
   117   REG_BADPAT,  /* too many named subpatterns (maximum 10,000) */
       
   118   REG_BADPAT,  /* repeated subpattern is too long */
       
   119   REG_BADPAT,  /* octal value is greater than \377 (not in UTF-8 mode) */
       
   120   REG_BADPAT,  /* internal error: overran compiling workspace */
       
   121   REG_BADPAT,  /* internal error: previously-checked referenced subpattern not found */
       
   122   REG_BADPAT,  /* DEFINE group contains more than one branch */
       
   123   REG_BADPAT,  /* repeating a DEFINE group is not allowed */
       
   124   REG_INVARG,  /* inconsistent NEWLINE options */
       
   125   REG_BADPAT,  /* \g is not followed followed by an (optionally braced) non-zero number */
       
   126   REG_BADPAT,  /* (?+ or (?- must be followed by a non-zero number */
       
   127   REG_BADPAT,  /* number is too big */
       
   128   REG_BADPAT,  /* subpattern name expected */
       
   129   REG_BADPAT,  /* digit expected after (?+ */
       
   130   REG_BADPAT   /* ] is an invalid data character in JavaScript compatibility mode */
       
   131 };
       
   132 
       
   133 /* Table of texts corresponding to POSIX error codes */
       
   134 
       
   135 static const char *const pstring[] = {
       
   136   "",                                /* Dummy for value 0 */
       
   137   "internal error",                  /* REG_ASSERT */
       
   138   "invalid repeat counts in {}",     /* BADBR      */
       
   139   "pattern error",                   /* BADPAT     */
       
   140   "? * + invalid",                   /* BADRPT     */
       
   141   "unbalanced {}",                   /* EBRACE     */
       
   142   "unbalanced []",                   /* EBRACK     */
       
   143   "collation error - not relevant",  /* ECOLLATE   */
       
   144   "bad class",                       /* ECTYPE     */
       
   145   "bad escape sequence",             /* EESCAPE    */
       
   146   "empty expression",                /* EMPTY      */
       
   147   "unbalanced ()",                   /* EPAREN     */
       
   148   "bad range inside []",             /* ERANGE     */
       
   149   "expression too big",              /* ESIZE      */
       
   150   "failed to get memory",            /* ESPACE     */
       
   151   "bad back reference",              /* ESUBREG    */
       
   152   "bad argument",                    /* INVARG     */
       
   153   "match failed"                     /* NOMATCH    */
       
   154 };
       
   155 
       
   156 
       
   157 
       
   158 
       
   159 /*************************************************
       
   160 *          Translate error code to string        *
       
   161 *************************************************/
       
   162 
       
   163 PCREPOSIX_EXP_DEFN size_t PCRE_CALL_CONVENTION
       
   164 regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
       
   165 {
       
   166 const char *message, *addmessage;
       
   167 size_t length, addlength;
       
   168 
       
   169 message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
       
   170   "unknown error code" : pstring[errcode];
       
   171 length = strlen(message) + 1;
       
   172 
       
   173 addmessage = " at offset ";
       
   174 addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
       
   175   strlen(addmessage) + 6 : 0;
       
   176 
       
   177 if (errbuf_size > 0)
       
   178   {
       
   179   if (addlength > 0 && errbuf_size >= length + addlength)
       
   180     sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
       
   181   else
       
   182     {
       
   183     strncpy(errbuf, message, errbuf_size - 1);
       
   184     errbuf[errbuf_size-1] = 0;
       
   185     }
       
   186   }
       
   187 
       
   188 return length + addlength;
       
   189 }
       
   190 
       
   191 
       
   192 
       
   193 
       
   194 /*************************************************
       
   195 *           Free store held by a regex           *
       
   196 *************************************************/
       
   197 
       
   198 PCREPOSIX_EXP_DEFN void PCRE_CALL_CONVENTION
       
   199 regfree(regex_t *preg)
       
   200 {
       
   201 (pcre_free)(preg->re_pcre);
       
   202 }
       
   203 
       
   204 
       
   205 
       
   206 
       
   207 /*************************************************
       
   208 *            Compile a regular expression        *
       
   209 *************************************************/
       
   210 
       
   211 /*
       
   212 Arguments:
       
   213   preg        points to a structure for recording the compiled expression
       
   214   pattern     the pattern to compile
       
   215   cflags      compilation flags
       
   216 
       
   217 Returns:      0 on success
       
   218               various non-zero codes on failure
       
   219 */
       
   220 
       
   221 PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION
       
   222 regcomp(regex_t *preg, const char *pattern, int cflags)
       
   223 {
       
   224 const char *errorptr;
       
   225 int erroffset;
       
   226 int errorcode;
       
   227 int options = 0;
       
   228 
       
   229 if ((cflags & REG_ICASE) != 0)   options |= PCRE_CASELESS;
       
   230 if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
       
   231 if ((cflags & REG_DOTALL) != 0)  options |= PCRE_DOTALL;
       
   232 if ((cflags & REG_NOSUB) != 0)   options |= PCRE_NO_AUTO_CAPTURE;
       
   233 if ((cflags & REG_UTF8) != 0)    options |= PCRE_UTF8;
       
   234 
       
   235 preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr,
       
   236   &erroffset, NULL);
       
   237 preg->re_erroffset = erroffset;
       
   238 
       
   239 if (preg->re_pcre == NULL) return eint[errorcode];
       
   240 
       
   241 preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
       
   242 return 0;
       
   243 }
       
   244 
       
   245 
       
   246 
       
   247 
       
   248 /*************************************************
       
   249 *              Match a regular expression        *
       
   250 *************************************************/
       
   251 
       
   252 /* Unfortunately, PCRE requires 3 ints of working space for each captured
       
   253 substring, so we have to get and release working store instead of just using
       
   254 the POSIX structures as was done in earlier releases when PCRE needed only 2
       
   255 ints. However, if the number of possible capturing brackets is small, use a
       
   256 block of store on the stack, to reduce the use of malloc/free. The threshold is
       
   257 in a macro that can be changed at configure time.
       
   258 
       
   259 If REG_NOSUB was specified at compile time, the PCRE_NO_AUTO_CAPTURE flag will
       
   260 be set. When this is the case, the nmatch and pmatch arguments are ignored, and
       
   261 the only result is yes/no/error. */
       
   262 
       
   263 PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION
       
   264 regexec(const regex_t *preg, const char *string, size_t nmatch,
       
   265   regmatch_t pmatch[], int eflags)
       
   266 {
       
   267 int rc, so, eo;
       
   268 int options = 0;
       
   269 int *ovector = NULL;
       
   270 int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
       
   271 BOOL allocated_ovector = FALSE;
       
   272 BOOL nosub =
       
   273   (((const pcre *)preg->re_pcre)->options & PCRE_NO_AUTO_CAPTURE) != 0;
       
   274 
       
   275 if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
       
   276 if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
       
   277 
       
   278 ((regex_t *)preg)->re_erroffset = (size_t)(-1);  /* Only has meaning after compile */
       
   279 
       
   280 /* When no string data is being returned, ensure that nmatch is zero.
       
   281 Otherwise, ensure the vector for holding the return data is large enough. */
       
   282 
       
   283 if (nosub) nmatch = 0;
       
   284 
       
   285 else if (nmatch > 0)
       
   286   {
       
   287   if (nmatch <= POSIX_MALLOC_THRESHOLD)
       
   288     {
       
   289     ovector = &(small_ovector[0]);
       
   290     }
       
   291   else
       
   292     {
       
   293     if (nmatch > INT_MAX/(sizeof(int) * 3)) return REG_ESPACE;
       
   294     ovector = (int *)malloc(sizeof(int) * nmatch * 3);
       
   295     if (ovector == NULL) return REG_ESPACE;
       
   296     allocated_ovector = TRUE;
       
   297     }
       
   298   }
       
   299 
       
   300 /* REG_STARTEND is a BSD extension, to allow for non-NUL-terminated strings.
       
   301 The man page from OS X says "REG_STARTEND affects only the location of the
       
   302 string, not how it is matched". That is why the "so" value is used to bump the
       
   303 start location rather than being passed as a PCRE "starting offset". */
       
   304 
       
   305 if ((eflags & REG_STARTEND) != 0)
       
   306   {
       
   307   so = pmatch[0].rm_so;
       
   308   eo = pmatch[0].rm_eo;
       
   309   }
       
   310 else
       
   311   {
       
   312   so = 0;
       
   313   eo = strlen(string);
       
   314   }
       
   315 
       
   316 rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string + so, (eo - so),
       
   317   0, options, ovector, nmatch * 3);
       
   318 
       
   319 if (rc == 0) rc = nmatch;    /* All captured slots were filled in */
       
   320 
       
   321 if (rc >= 0)
       
   322   {
       
   323   size_t i;
       
   324   if (!nosub)
       
   325     {
       
   326     for (i = 0; i < (size_t)rc; i++)
       
   327       {
       
   328       pmatch[i].rm_so = ovector[i*2];
       
   329       pmatch[i].rm_eo = ovector[i*2+1];
       
   330       }
       
   331     if (allocated_ovector) free(ovector);
       
   332     for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
       
   333     }
       
   334   return 0;
       
   335   }
       
   336 
       
   337 else
       
   338   {
       
   339   if (allocated_ovector) free(ovector);
       
   340   switch(rc)
       
   341     {
       
   342     case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
       
   343     case PCRE_ERROR_NULL: return REG_INVARG;
       
   344     case PCRE_ERROR_BADOPTION: return REG_INVARG;
       
   345     case PCRE_ERROR_BADMAGIC: return REG_INVARG;
       
   346     case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
       
   347     case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
       
   348     case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
       
   349     case PCRE_ERROR_BADUTF8: return REG_INVARG;
       
   350     case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
       
   351     default: return REG_ASSERT;
       
   352     }
       
   353   }
       
   354 }
       
   355 
       
   356 /* End of pcreposix.c */