libraries/spcre/libpcre/pcre/pcre_get.c
changeset 0 7f656887cf89
equal deleted inserted replaced
-1:000000000000 0:7f656887cf89
       
     1 /*************************************************
       
     2 *      Perl-Compatible Regular Expressions       *
       
     3 *************************************************/
       
     4 
       
     5 /* PCRE is a library of functions to support regular expressions whose syntax
       
     6 and semantics are as close as possible to those of the Perl 5 language.
       
     7 
       
     8                        Written by Philip Hazel
       
     9            Copyright (c) 1997-2008 University of Cambridge
       
    10 
       
    11 -----------------------------------------------------------------------------
       
    12 Redistribution and use in source and binary forms, with or without
       
    13 modification, are permitted provided that the following conditions are met:
       
    14 
       
    15     * Redistributions of source code must retain the above copyright notice,
       
    16       this list of conditions and the following disclaimer.
       
    17 
       
    18     * Redistributions in binary form must reproduce the above copyright
       
    19       notice, this list of conditions and the following disclaimer in the
       
    20       documentation and/or other materials provided with the distribution.
       
    21 
       
    22     * Neither the name of the University of Cambridge nor the names of its
       
    23       contributors may be used to endorse or promote products derived from
       
    24       this software without specific prior written permission.
       
    25 
       
    26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
       
    27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
       
    30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       
    31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
       
    32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
       
    33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
       
    34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
       
    35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    36 POSSIBILITY OF SUCH DAMAGE.
       
    37 -----------------------------------------------------------------------------
       
    38 */
       
    39 
       
    40 
       
    41 /* This module contains some convenience functions for extracting substrings
       
    42 from the subject string after a regex match has succeeded. The original idea
       
    43 for these functions came from Scott Wimer. */
       
    44 
       
    45 
       
    46 #ifdef HAVE_CONFIG_H
       
    47 #include "config.h"
       
    48 #endif
       
    49 
       
    50 #include "pcre_internal.h"
       
    51 
       
    52 
       
    53 /*************************************************
       
    54 *           Find number for named string         *
       
    55 *************************************************/
       
    56 
       
    57 /* This function is used by the get_first_set() function below, as well
       
    58 as being generally available. It assumes that names are unique.
       
    59 
       
    60 Arguments:
       
    61   code        the compiled regex
       
    62   stringname  the name whose number is required
       
    63 
       
    64 Returns:      the number of the named parentheses, or a negative number
       
    65                 (PCRE_ERROR_NOSUBSTRING) if not found
       
    66 */
       
    67 
       
    68 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
       
    69 pcre_get_stringnumber(const pcre *code, const char *stringname)
       
    70 {
       
    71 int rc;
       
    72 int entrysize;
       
    73 int top, bot;
       
    74 uschar *nametable;
       
    75 
       
    76 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
       
    77   return rc;
       
    78 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
       
    79 
       
    80 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
       
    81   return rc;
       
    82 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
       
    83   return rc;
       
    84 
       
    85 bot = 0;
       
    86 while (top > bot)
       
    87   {
       
    88   int mid = (top + bot) / 2;
       
    89   uschar *entry = nametable + entrysize*mid;
       
    90   int c = strcmp(stringname, (char *)(entry + 2));
       
    91   if (c == 0) return (entry[0] << 8) + entry[1];
       
    92   if (c > 0) bot = mid + 1; else top = mid;
       
    93   }
       
    94 
       
    95 return PCRE_ERROR_NOSUBSTRING;
       
    96 }
       
    97 
       
    98 
       
    99 
       
   100 /*************************************************
       
   101 *     Find (multiple) entries for named string   *
       
   102 *************************************************/
       
   103 
       
   104 /* This is used by the get_first_set() function below, as well as being
       
   105 generally available. It is used when duplicated names are permitted.
       
   106 
       
   107 Arguments:
       
   108   code        the compiled regex
       
   109   stringname  the name whose entries required
       
   110   firstptr    where to put the pointer to the first entry
       
   111   lastptr     where to put the pointer to the last entry
       
   112 
       
   113 Returns:      the length of each entry, or a negative number
       
   114                 (PCRE_ERROR_NOSUBSTRING) if not found
       
   115 */
       
   116 
       
   117 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
       
   118 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
       
   119   char **firstptr, char **lastptr)
       
   120 {
       
   121 int rc;
       
   122 int entrysize;
       
   123 int top, bot;
       
   124 uschar *nametable, *lastentry;
       
   125 
       
   126 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
       
   127   return rc;
       
   128 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
       
   129 
       
   130 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
       
   131   return rc;
       
   132 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
       
   133   return rc;
       
   134 
       
   135 lastentry = nametable + entrysize * (top - 1);
       
   136 bot = 0;
       
   137 while (top > bot)
       
   138   {
       
   139   int mid = (top + bot) / 2;
       
   140   uschar *entry = nametable + entrysize*mid;
       
   141   int c = strcmp(stringname, (char *)(entry + 2));
       
   142   if (c == 0)
       
   143     {
       
   144     uschar *first = entry;
       
   145     uschar *last = entry;
       
   146     while (first > nametable)
       
   147       {
       
   148       if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
       
   149       first -= entrysize;
       
   150       }
       
   151     while (last < lastentry)
       
   152       {
       
   153       if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
       
   154       last += entrysize;
       
   155       }
       
   156     *firstptr = (char *)first;
       
   157     *lastptr = (char *)last;
       
   158     return entrysize;
       
   159     }
       
   160   if (c > 0) bot = mid + 1; else top = mid;
       
   161   }
       
   162 
       
   163 return PCRE_ERROR_NOSUBSTRING;
       
   164 }
       
   165 
       
   166 
       
   167 
       
   168 /*************************************************
       
   169 *    Find first set of multiple named strings    *
       
   170 *************************************************/
       
   171 
       
   172 /* This function allows for duplicate names in the table of named substrings.
       
   173 It returns the number of the first one that was set in a pattern match.
       
   174 
       
   175 Arguments:
       
   176   code         the compiled regex
       
   177   stringname   the name of the capturing substring
       
   178   ovector      the vector of matched substrings
       
   179 
       
   180 Returns:       the number of the first that is set,
       
   181                or the number of the last one if none are set,
       
   182                or a negative number on error
       
   183 */
       
   184 
       
   185 static int
       
   186 get_first_set(const pcre *code, const char *stringname, int *ovector)
       
   187 {
       
   188 const real_pcre *re = (const real_pcre *)code;
       
   189 int entrysize;
       
   190 char *first, *last;
       
   191 uschar *entry;
       
   192 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
       
   193   return pcre_get_stringnumber(code, stringname);
       
   194 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
       
   195 if (entrysize <= 0) return entrysize;
       
   196 for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
       
   197   {
       
   198   int n = (entry[0] << 8) + entry[1];
       
   199   if (ovector[n*2] >= 0) return n;
       
   200   }
       
   201 return (first[0] << 8) + first[1];
       
   202 }
       
   203 
       
   204 
       
   205 
       
   206 
       
   207 /*************************************************
       
   208 *      Copy captured string to given buffer      *
       
   209 *************************************************/
       
   210 
       
   211 /* This function copies a single captured substring into a given buffer.
       
   212 Note that we use memcpy() rather than strncpy() in case there are binary zeros
       
   213 in the string.
       
   214 
       
   215 Arguments:
       
   216   subject        the subject string that was matched
       
   217   ovector        pointer to the offsets table
       
   218   stringcount    the number of substrings that were captured
       
   219                    (i.e. the yield of the pcre_exec call, unless
       
   220                    that was zero, in which case it should be 1/3
       
   221                    of the offset table size)
       
   222   stringnumber   the number of the required substring
       
   223   buffer         where to put the substring
       
   224   size           the size of the buffer
       
   225 
       
   226 Returns:         if successful:
       
   227                    the length of the copied string, not including the zero
       
   228                    that is put on the end; can be zero
       
   229                  if not successful:
       
   230                    PCRE_ERROR_NOMEMORY (-6) buffer too small
       
   231                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
       
   232 */
       
   233 
       
   234 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
       
   235 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
       
   236   int stringnumber, char *buffer, int size)
       
   237 {
       
   238 int yield;
       
   239 if (stringnumber < 0 || stringnumber >= stringcount)
       
   240   return PCRE_ERROR_NOSUBSTRING;
       
   241 stringnumber *= 2;
       
   242 yield = ovector[stringnumber+1] - ovector[stringnumber];
       
   243 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
       
   244 memcpy(buffer, subject + ovector[stringnumber], yield);
       
   245 buffer[yield] = 0;
       
   246 return yield;
       
   247 }
       
   248 
       
   249 
       
   250 
       
   251 /*************************************************
       
   252 *   Copy named captured string to given buffer   *
       
   253 *************************************************/
       
   254 
       
   255 /* This function copies a single captured substring into a given buffer,
       
   256 identifying it by name. If the regex permits duplicate names, the first
       
   257 substring that is set is chosen.
       
   258 
       
   259 Arguments:
       
   260   code           the compiled regex
       
   261   subject        the subject string that was matched
       
   262   ovector        pointer to the offsets table
       
   263   stringcount    the number of substrings that were captured
       
   264                    (i.e. the yield of the pcre_exec call, unless
       
   265                    that was zero, in which case it should be 1/3
       
   266                    of the offset table size)
       
   267   stringname     the name of the required substring
       
   268   buffer         where to put the substring
       
   269   size           the size of the buffer
       
   270 
       
   271 Returns:         if successful:
       
   272                    the length of the copied string, not including the zero
       
   273                    that is put on the end; can be zero
       
   274                  if not successful:
       
   275                    PCRE_ERROR_NOMEMORY (-6) buffer too small
       
   276                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
       
   277 */
       
   278 
       
   279 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
       
   280 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
       
   281   int stringcount, const char *stringname, char *buffer, int size)
       
   282 {
       
   283 int n = get_first_set(code, stringname, ovector);
       
   284 if (n <= 0) return n;
       
   285 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
       
   286 }
       
   287 
       
   288 
       
   289 
       
   290 /*************************************************
       
   291 *      Copy all captured strings to new store    *
       
   292 *************************************************/
       
   293 
       
   294 /* This function gets one chunk of store and builds a list of pointers and all
       
   295 of the captured substrings in it. A NULL pointer is put on the end of the list.
       
   296 
       
   297 Arguments:
       
   298   subject        the subject string that was matched
       
   299   ovector        pointer to the offsets table
       
   300   stringcount    the number of substrings that were captured
       
   301                    (i.e. the yield of the pcre_exec call, unless
       
   302                    that was zero, in which case it should be 1/3
       
   303                    of the offset table size)
       
   304   listptr        set to point to the list of pointers
       
   305 
       
   306 Returns:         if successful: 0
       
   307                  if not successful:
       
   308                    PCRE_ERROR_NOMEMORY (-6) failed to get store
       
   309 */
       
   310 
       
   311 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
       
   312 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
       
   313   const char ***listptr)
       
   314 {
       
   315 int i;
       
   316 int size = sizeof(char *);
       
   317 int double_count = stringcount * 2;
       
   318 char **stringlist;
       
   319 char *p;
       
   320 
       
   321 for (i = 0; i < double_count; i += 2)
       
   322   size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
       
   323 
       
   324 stringlist = (char **)(pcre_malloc)(size);
       
   325 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
       
   326 
       
   327 *listptr = (const char **)stringlist;
       
   328 p = (char *)(stringlist + stringcount + 1);
       
   329 
       
   330 for (i = 0; i < double_count; i += 2)
       
   331   {
       
   332   int len = ovector[i+1] - ovector[i];
       
   333   memcpy(p, subject + ovector[i], len);
       
   334   *stringlist++ = p;
       
   335   p += len;
       
   336   *p++ = 0;
       
   337   }
       
   338 
       
   339 *stringlist = NULL;
       
   340 return 0;
       
   341 }
       
   342 
       
   343 
       
   344 
       
   345 /*************************************************
       
   346 *   Free store obtained by get_substring_list    *
       
   347 *************************************************/
       
   348 
       
   349 /* This function exists for the benefit of people calling PCRE from non-C
       
   350 programs that can call its functions, but not free() or (pcre_free)() directly.
       
   351 
       
   352 Argument:   the result of a previous pcre_get_substring_list()
       
   353 Returns:    nothing
       
   354 */
       
   355 
       
   356 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
       
   357 pcre_free_substring_list(const char **pointer)
       
   358 {
       
   359 (pcre_free)((void *)pointer);
       
   360 }
       
   361 
       
   362 
       
   363 
       
   364 /*************************************************
       
   365 *      Copy captured string to new store         *
       
   366 *************************************************/
       
   367 
       
   368 /* This function copies a single captured substring into a piece of new
       
   369 store
       
   370 
       
   371 Arguments:
       
   372   subject        the subject string that was matched
       
   373   ovector        pointer to the offsets table
       
   374   stringcount    the number of substrings that were captured
       
   375                    (i.e. the yield of the pcre_exec call, unless
       
   376                    that was zero, in which case it should be 1/3
       
   377                    of the offset table size)
       
   378   stringnumber   the number of the required substring
       
   379   stringptr      where to put a pointer to the substring
       
   380 
       
   381 Returns:         if successful:
       
   382                    the length of the string, not including the zero that
       
   383                    is put on the end; can be zero
       
   384                  if not successful:
       
   385                    PCRE_ERROR_NOMEMORY (-6) failed to get store
       
   386                    PCRE_ERROR_NOSUBSTRING (-7) substring not present
       
   387 */
       
   388 
       
   389 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
       
   390 pcre_get_substring(const char *subject, int *ovector, int stringcount,
       
   391   int stringnumber, const char **stringptr)
       
   392 {
       
   393 int yield;
       
   394 char *substring;
       
   395 if (stringnumber < 0 || stringnumber >= stringcount)
       
   396   return PCRE_ERROR_NOSUBSTRING;
       
   397 stringnumber *= 2;
       
   398 yield = ovector[stringnumber+1] - ovector[stringnumber];
       
   399 substring = (char *)(pcre_malloc)(yield + 1);
       
   400 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
       
   401 memcpy(substring, subject + ovector[stringnumber], yield);
       
   402 substring[yield] = 0;
       
   403 *stringptr = substring;
       
   404 return yield;
       
   405 }
       
   406 
       
   407 
       
   408 
       
   409 /*************************************************
       
   410 *   Copy named captured string to new store      *
       
   411 *************************************************/
       
   412 
       
   413 /* This function copies a single captured substring, identified by name, into
       
   414 new store. If the regex permits duplicate names, the first substring that is
       
   415 set is chosen.
       
   416 
       
   417 Arguments:
       
   418   code           the compiled regex
       
   419   subject        the subject string that was matched
       
   420   ovector        pointer to the offsets table
       
   421   stringcount    the number of substrings that were captured
       
   422                    (i.e. the yield of the pcre_exec call, unless
       
   423                    that was zero, in which case it should be 1/3
       
   424                    of the offset table size)
       
   425   stringname     the name of the required substring
       
   426   stringptr      where to put the pointer
       
   427 
       
   428 Returns:         if successful:
       
   429                    the length of the copied string, not including the zero
       
   430                    that is put on the end; can be zero
       
   431                  if not successful:
       
   432                    PCRE_ERROR_NOMEMORY (-6) couldn't get memory
       
   433                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
       
   434 */
       
   435 
       
   436 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
       
   437 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
       
   438   int stringcount, const char *stringname, const char **stringptr)
       
   439 {
       
   440 int n = get_first_set(code, stringname, ovector);
       
   441 if (n <= 0) return n;
       
   442 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
       
   443 }
       
   444 
       
   445 
       
   446 
       
   447 
       
   448 /*************************************************
       
   449 *       Free store obtained by get_substring     *
       
   450 *************************************************/
       
   451 
       
   452 /* This function exists for the benefit of people calling PCRE from non-C
       
   453 programs that can call its functions, but not free() or (pcre_free)() directly.
       
   454 
       
   455 Argument:   the result of a previous pcre_get_substring()
       
   456 Returns:    nothing
       
   457 */
       
   458 
       
   459 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
       
   460 pcre_free_substring(const char *pointer)
       
   461 {
       
   462 (pcre_free)((void *)pointer);
       
   463 }
       
   464 
       
   465 /* End of pcre_get.c */