|
1 /************************************************* |
|
2 * Perl-Compatible Regular Expressions * |
|
3 *************************************************/ |
|
4 |
|
5 /* PCRE is a library of functions to support regular expressions whose syntax |
|
6 and semantics are as close as possible to those of the Perl 5 language. |
|
7 |
|
8 Written by Philip Hazel |
|
9 Copyright (c) 1997-2008 University of Cambridge |
|
10 |
|
11 ----------------------------------------------------------------------------- |
|
12 Redistribution and use in source and binary forms, with or without |
|
13 modification, are permitted provided that the following conditions are met: |
|
14 |
|
15 * Redistributions of source code must retain the above copyright notice, |
|
16 this list of conditions and the following disclaimer. |
|
17 |
|
18 * Redistributions in binary form must reproduce the above copyright |
|
19 notice, this list of conditions and the following disclaimer in the |
|
20 documentation and/or other materials provided with the distribution. |
|
21 |
|
22 * Neither the name of the University of Cambridge nor the names of its |
|
23 contributors may be used to endorse or promote products derived from |
|
24 this software without specific prior written permission. |
|
25 |
|
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
|
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
|
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
36 POSSIBILITY OF SUCH DAMAGE. |
|
37 ----------------------------------------------------------------------------- |
|
38 */ |
|
39 |
|
40 |
|
41 /* This module contains some convenience functions for extracting substrings |
|
42 from the subject string after a regex match has succeeded. The original idea |
|
43 for these functions came from Scott Wimer. */ |
|
44 |
|
45 |
|
46 #ifdef HAVE_CONFIG_H |
|
47 #include "config.h" |
|
48 #endif |
|
49 |
|
50 #include "pcre_internal.h" |
|
51 |
|
52 |
|
53 /************************************************* |
|
54 * Find number for named string * |
|
55 *************************************************/ |
|
56 |
|
57 /* This function is used by the get_first_set() function below, as well |
|
58 as being generally available. It assumes that names are unique. |
|
59 |
|
60 Arguments: |
|
61 code the compiled regex |
|
62 stringname the name whose number is required |
|
63 |
|
64 Returns: the number of the named parentheses, or a negative number |
|
65 (PCRE_ERROR_NOSUBSTRING) if not found |
|
66 */ |
|
67 |
|
68 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
|
69 pcre_get_stringnumber(const pcre *code, const char *stringname) |
|
70 { |
|
71 int rc; |
|
72 int entrysize; |
|
73 int top, bot; |
|
74 uschar *nametable; |
|
75 |
|
76 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) |
|
77 return rc; |
|
78 if (top <= 0) return PCRE_ERROR_NOSUBSTRING; |
|
79 |
|
80 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) |
|
81 return rc; |
|
82 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) |
|
83 return rc; |
|
84 |
|
85 bot = 0; |
|
86 while (top > bot) |
|
87 { |
|
88 int mid = (top + bot) / 2; |
|
89 uschar *entry = nametable + entrysize*mid; |
|
90 int c = strcmp(stringname, (char *)(entry + 2)); |
|
91 if (c == 0) return (entry[0] << 8) + entry[1]; |
|
92 if (c > 0) bot = mid + 1; else top = mid; |
|
93 } |
|
94 |
|
95 return PCRE_ERROR_NOSUBSTRING; |
|
96 } |
|
97 |
|
98 |
|
99 |
|
100 /************************************************* |
|
101 * Find (multiple) entries for named string * |
|
102 *************************************************/ |
|
103 |
|
104 /* This is used by the get_first_set() function below, as well as being |
|
105 generally available. It is used when duplicated names are permitted. |
|
106 |
|
107 Arguments: |
|
108 code the compiled regex |
|
109 stringname the name whose entries required |
|
110 firstptr where to put the pointer to the first entry |
|
111 lastptr where to put the pointer to the last entry |
|
112 |
|
113 Returns: the length of each entry, or a negative number |
|
114 (PCRE_ERROR_NOSUBSTRING) if not found |
|
115 */ |
|
116 |
|
117 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
|
118 pcre_get_stringtable_entries(const pcre *code, const char *stringname, |
|
119 char **firstptr, char **lastptr) |
|
120 { |
|
121 int rc; |
|
122 int entrysize; |
|
123 int top, bot; |
|
124 uschar *nametable, *lastentry; |
|
125 |
|
126 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) |
|
127 return rc; |
|
128 if (top <= 0) return PCRE_ERROR_NOSUBSTRING; |
|
129 |
|
130 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) |
|
131 return rc; |
|
132 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) |
|
133 return rc; |
|
134 |
|
135 lastentry = nametable + entrysize * (top - 1); |
|
136 bot = 0; |
|
137 while (top > bot) |
|
138 { |
|
139 int mid = (top + bot) / 2; |
|
140 uschar *entry = nametable + entrysize*mid; |
|
141 int c = strcmp(stringname, (char *)(entry + 2)); |
|
142 if (c == 0) |
|
143 { |
|
144 uschar *first = entry; |
|
145 uschar *last = entry; |
|
146 while (first > nametable) |
|
147 { |
|
148 if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break; |
|
149 first -= entrysize; |
|
150 } |
|
151 while (last < lastentry) |
|
152 { |
|
153 if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break; |
|
154 last += entrysize; |
|
155 } |
|
156 *firstptr = (char *)first; |
|
157 *lastptr = (char *)last; |
|
158 return entrysize; |
|
159 } |
|
160 if (c > 0) bot = mid + 1; else top = mid; |
|
161 } |
|
162 |
|
163 return PCRE_ERROR_NOSUBSTRING; |
|
164 } |
|
165 |
|
166 |
|
167 |
|
168 /************************************************* |
|
169 * Find first set of multiple named strings * |
|
170 *************************************************/ |
|
171 |
|
172 /* This function allows for duplicate names in the table of named substrings. |
|
173 It returns the number of the first one that was set in a pattern match. |
|
174 |
|
175 Arguments: |
|
176 code the compiled regex |
|
177 stringname the name of the capturing substring |
|
178 ovector the vector of matched substrings |
|
179 |
|
180 Returns: the number of the first that is set, |
|
181 or the number of the last one if none are set, |
|
182 or a negative number on error |
|
183 */ |
|
184 |
|
185 static int |
|
186 get_first_set(const pcre *code, const char *stringname, int *ovector) |
|
187 { |
|
188 const real_pcre *re = (const real_pcre *)code; |
|
189 int entrysize; |
|
190 char *first, *last; |
|
191 uschar *entry; |
|
192 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) |
|
193 return pcre_get_stringnumber(code, stringname); |
|
194 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last); |
|
195 if (entrysize <= 0) return entrysize; |
|
196 for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize) |
|
197 { |
|
198 int n = (entry[0] << 8) + entry[1]; |
|
199 if (ovector[n*2] >= 0) return n; |
|
200 } |
|
201 return (first[0] << 8) + first[1]; |
|
202 } |
|
203 |
|
204 |
|
205 |
|
206 |
|
207 /************************************************* |
|
208 * Copy captured string to given buffer * |
|
209 *************************************************/ |
|
210 |
|
211 /* This function copies a single captured substring into a given buffer. |
|
212 Note that we use memcpy() rather than strncpy() in case there are binary zeros |
|
213 in the string. |
|
214 |
|
215 Arguments: |
|
216 subject the subject string that was matched |
|
217 ovector pointer to the offsets table |
|
218 stringcount the number of substrings that were captured |
|
219 (i.e. the yield of the pcre_exec call, unless |
|
220 that was zero, in which case it should be 1/3 |
|
221 of the offset table size) |
|
222 stringnumber the number of the required substring |
|
223 buffer where to put the substring |
|
224 size the size of the buffer |
|
225 |
|
226 Returns: if successful: |
|
227 the length of the copied string, not including the zero |
|
228 that is put on the end; can be zero |
|
229 if not successful: |
|
230 PCRE_ERROR_NOMEMORY (-6) buffer too small |
|
231 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring |
|
232 */ |
|
233 |
|
234 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
|
235 pcre_copy_substring(const char *subject, int *ovector, int stringcount, |
|
236 int stringnumber, char *buffer, int size) |
|
237 { |
|
238 int yield; |
|
239 if (stringnumber < 0 || stringnumber >= stringcount) |
|
240 return PCRE_ERROR_NOSUBSTRING; |
|
241 stringnumber *= 2; |
|
242 yield = ovector[stringnumber+1] - ovector[stringnumber]; |
|
243 if (size < yield + 1) return PCRE_ERROR_NOMEMORY; |
|
244 memcpy(buffer, subject + ovector[stringnumber], yield); |
|
245 buffer[yield] = 0; |
|
246 return yield; |
|
247 } |
|
248 |
|
249 |
|
250 |
|
251 /************************************************* |
|
252 * Copy named captured string to given buffer * |
|
253 *************************************************/ |
|
254 |
|
255 /* This function copies a single captured substring into a given buffer, |
|
256 identifying it by name. If the regex permits duplicate names, the first |
|
257 substring that is set is chosen. |
|
258 |
|
259 Arguments: |
|
260 code the compiled regex |
|
261 subject the subject string that was matched |
|
262 ovector pointer to the offsets table |
|
263 stringcount the number of substrings that were captured |
|
264 (i.e. the yield of the pcre_exec call, unless |
|
265 that was zero, in which case it should be 1/3 |
|
266 of the offset table size) |
|
267 stringname the name of the required substring |
|
268 buffer where to put the substring |
|
269 size the size of the buffer |
|
270 |
|
271 Returns: if successful: |
|
272 the length of the copied string, not including the zero |
|
273 that is put on the end; can be zero |
|
274 if not successful: |
|
275 PCRE_ERROR_NOMEMORY (-6) buffer too small |
|
276 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring |
|
277 */ |
|
278 |
|
279 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
|
280 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector, |
|
281 int stringcount, const char *stringname, char *buffer, int size) |
|
282 { |
|
283 int n = get_first_set(code, stringname, ovector); |
|
284 if (n <= 0) return n; |
|
285 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); |
|
286 } |
|
287 |
|
288 |
|
289 |
|
290 /************************************************* |
|
291 * Copy all captured strings to new store * |
|
292 *************************************************/ |
|
293 |
|
294 /* This function gets one chunk of store and builds a list of pointers and all |
|
295 of the captured substrings in it. A NULL pointer is put on the end of the list. |
|
296 |
|
297 Arguments: |
|
298 subject the subject string that was matched |
|
299 ovector pointer to the offsets table |
|
300 stringcount the number of substrings that were captured |
|
301 (i.e. the yield of the pcre_exec call, unless |
|
302 that was zero, in which case it should be 1/3 |
|
303 of the offset table size) |
|
304 listptr set to point to the list of pointers |
|
305 |
|
306 Returns: if successful: 0 |
|
307 if not successful: |
|
308 PCRE_ERROR_NOMEMORY (-6) failed to get store |
|
309 */ |
|
310 |
|
311 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
|
312 pcre_get_substring_list(const char *subject, int *ovector, int stringcount, |
|
313 const char ***listptr) |
|
314 { |
|
315 int i; |
|
316 int size = sizeof(char *); |
|
317 int double_count = stringcount * 2; |
|
318 char **stringlist; |
|
319 char *p; |
|
320 |
|
321 for (i = 0; i < double_count; i += 2) |
|
322 size += sizeof(char *) + ovector[i+1] - ovector[i] + 1; |
|
323 |
|
324 stringlist = (char **)(pcre_malloc)(size); |
|
325 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; |
|
326 |
|
327 *listptr = (const char **)stringlist; |
|
328 p = (char *)(stringlist + stringcount + 1); |
|
329 |
|
330 for (i = 0; i < double_count; i += 2) |
|
331 { |
|
332 int len = ovector[i+1] - ovector[i]; |
|
333 memcpy(p, subject + ovector[i], len); |
|
334 *stringlist++ = p; |
|
335 p += len; |
|
336 *p++ = 0; |
|
337 } |
|
338 |
|
339 *stringlist = NULL; |
|
340 return 0; |
|
341 } |
|
342 |
|
343 |
|
344 |
|
345 /************************************************* |
|
346 * Free store obtained by get_substring_list * |
|
347 *************************************************/ |
|
348 |
|
349 /* This function exists for the benefit of people calling PCRE from non-C |
|
350 programs that can call its functions, but not free() or (pcre_free)() directly. |
|
351 |
|
352 Argument: the result of a previous pcre_get_substring_list() |
|
353 Returns: nothing |
|
354 */ |
|
355 |
|
356 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION |
|
357 pcre_free_substring_list(const char **pointer) |
|
358 { |
|
359 (pcre_free)((void *)pointer); |
|
360 } |
|
361 |
|
362 |
|
363 |
|
364 /************************************************* |
|
365 * Copy captured string to new store * |
|
366 *************************************************/ |
|
367 |
|
368 /* This function copies a single captured substring into a piece of new |
|
369 store |
|
370 |
|
371 Arguments: |
|
372 subject the subject string that was matched |
|
373 ovector pointer to the offsets table |
|
374 stringcount the number of substrings that were captured |
|
375 (i.e. the yield of the pcre_exec call, unless |
|
376 that was zero, in which case it should be 1/3 |
|
377 of the offset table size) |
|
378 stringnumber the number of the required substring |
|
379 stringptr where to put a pointer to the substring |
|
380 |
|
381 Returns: if successful: |
|
382 the length of the string, not including the zero that |
|
383 is put on the end; can be zero |
|
384 if not successful: |
|
385 PCRE_ERROR_NOMEMORY (-6) failed to get store |
|
386 PCRE_ERROR_NOSUBSTRING (-7) substring not present |
|
387 */ |
|
388 |
|
389 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
|
390 pcre_get_substring(const char *subject, int *ovector, int stringcount, |
|
391 int stringnumber, const char **stringptr) |
|
392 { |
|
393 int yield; |
|
394 char *substring; |
|
395 if (stringnumber < 0 || stringnumber >= stringcount) |
|
396 return PCRE_ERROR_NOSUBSTRING; |
|
397 stringnumber *= 2; |
|
398 yield = ovector[stringnumber+1] - ovector[stringnumber]; |
|
399 substring = (char *)(pcre_malloc)(yield + 1); |
|
400 if (substring == NULL) return PCRE_ERROR_NOMEMORY; |
|
401 memcpy(substring, subject + ovector[stringnumber], yield); |
|
402 substring[yield] = 0; |
|
403 *stringptr = substring; |
|
404 return yield; |
|
405 } |
|
406 |
|
407 |
|
408 |
|
409 /************************************************* |
|
410 * Copy named captured string to new store * |
|
411 *************************************************/ |
|
412 |
|
413 /* This function copies a single captured substring, identified by name, into |
|
414 new store. If the regex permits duplicate names, the first substring that is |
|
415 set is chosen. |
|
416 |
|
417 Arguments: |
|
418 code the compiled regex |
|
419 subject the subject string that was matched |
|
420 ovector pointer to the offsets table |
|
421 stringcount the number of substrings that were captured |
|
422 (i.e. the yield of the pcre_exec call, unless |
|
423 that was zero, in which case it should be 1/3 |
|
424 of the offset table size) |
|
425 stringname the name of the required substring |
|
426 stringptr where to put the pointer |
|
427 |
|
428 Returns: if successful: |
|
429 the length of the copied string, not including the zero |
|
430 that is put on the end; can be zero |
|
431 if not successful: |
|
432 PCRE_ERROR_NOMEMORY (-6) couldn't get memory |
|
433 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring |
|
434 */ |
|
435 |
|
436 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
|
437 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector, |
|
438 int stringcount, const char *stringname, const char **stringptr) |
|
439 { |
|
440 int n = get_first_set(code, stringname, ovector); |
|
441 if (n <= 0) return n; |
|
442 return pcre_get_substring(subject, ovector, stringcount, n, stringptr); |
|
443 } |
|
444 |
|
445 |
|
446 |
|
447 |
|
448 /************************************************* |
|
449 * Free store obtained by get_substring * |
|
450 *************************************************/ |
|
451 |
|
452 /* This function exists for the benefit of people calling PCRE from non-C |
|
453 programs that can call its functions, but not free() or (pcre_free)() directly. |
|
454 |
|
455 Argument: the result of a previous pcre_get_substring() |
|
456 Returns: nothing |
|
457 */ |
|
458 |
|
459 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION |
|
460 pcre_free_substring(const char *pointer) |
|
461 { |
|
462 (pcre_free)((void *)pointer); |
|
463 } |
|
464 |
|
465 /* End of pcre_get.c */ |