|
1 /* |
|
2 * Secret Labs' Regular Expression Engine |
|
3 * |
|
4 * regular expression matching engine |
|
5 * |
|
6 * partial history: |
|
7 * 1999-10-24 fl created (based on existing template matcher code) |
|
8 * 2000-03-06 fl first alpha, sort of |
|
9 * 2000-08-01 fl fixes for 1.6b1 |
|
10 * 2000-08-07 fl use PyOS_CheckStack() if available |
|
11 * 2000-09-20 fl added expand method |
|
12 * 2001-03-20 fl lots of fixes for 2.1b2 |
|
13 * 2001-04-15 fl export copyright as Python attribute, not global |
|
14 * 2001-04-28 fl added __copy__ methods (work in progress) |
|
15 * 2001-05-14 fl fixes for 1.5.2 compatibility |
|
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) |
|
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller) |
|
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1 |
|
19 * 2001-10-21 fl added sub/subn primitive |
|
20 * 2001-10-24 fl added finditer primitive (for 2.2 only) |
|
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum) |
|
22 * 2002-11-09 fl fixed empty sub/subn return type |
|
23 * 2003-04-18 mvl fully support 4-byte codes |
|
24 * 2003-10-17 gn implemented non recursive scheme |
|
25 * |
|
26 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. |
|
27 * |
|
28 * This version of the SRE library can be redistributed under CNRI's |
|
29 * Python 1.6 license. For any other use, please contact Secret Labs |
|
30 * AB (info@pythonware.com). |
|
31 * |
|
32 * Portions of this engine have been developed in cooperation with |
|
33 * CNRI. Hewlett-Packard provided funding for 1.6 integration and |
|
34 * other compatibility work. |
|
35 */ |
|
36 |
|
37 #ifndef SRE_RECURSIVE |
|
38 |
|
39 static char copyright[] = |
|
40 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB "; |
|
41 |
|
42 #define PY_SSIZE_T_CLEAN |
|
43 |
|
44 #include "Python.h" |
|
45 #include "structmember.h" /* offsetof */ |
|
46 |
|
47 #include "sre.h" |
|
48 |
|
49 #include <ctype.h> |
|
50 |
|
51 /* name of this module, minus the leading underscore */ |
|
52 #if !defined(SRE_MODULE) |
|
53 #define SRE_MODULE "sre" |
|
54 #endif |
|
55 |
|
56 #define SRE_PY_MODULE "re" |
|
57 |
|
58 /* defining this one enables tracing */ |
|
59 #undef VERBOSE |
|
60 |
|
61 #if PY_VERSION_HEX >= 0x01060000 |
|
62 #if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE) |
|
63 /* defining this enables unicode support (default under 1.6a1 and later) */ |
|
64 #define HAVE_UNICODE |
|
65 #endif |
|
66 #endif |
|
67 |
|
68 /* -------------------------------------------------------------------- */ |
|
69 /* optional features */ |
|
70 |
|
71 /* enables fast searching */ |
|
72 #define USE_FAST_SEARCH |
|
73 |
|
74 /* enables aggressive inlining (always on for Visual C) */ |
|
75 #undef USE_INLINE |
|
76 |
|
77 /* enables copy/deepcopy handling (work in progress) */ |
|
78 #undef USE_BUILTIN_COPY |
|
79 |
|
80 #if PY_VERSION_HEX < 0x01060000 |
|
81 #define PyObject_DEL(op) PyMem_DEL((op)) |
|
82 #endif |
|
83 |
|
84 /* -------------------------------------------------------------------- */ |
|
85 |
|
86 #if defined(_MSC_VER) |
|
87 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */ |
|
88 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */ |
|
89 /* fastest possible local call under MSVC */ |
|
90 #define LOCAL(type) static __inline type __fastcall |
|
91 #elif defined(USE_INLINE) |
|
92 #define LOCAL(type) static inline type |
|
93 #else |
|
94 #define LOCAL(type) static type |
|
95 #endif |
|
96 |
|
97 /* error codes */ |
|
98 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */ |
|
99 #define SRE_ERROR_STATE -2 /* illegal state */ |
|
100 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */ |
|
101 #define SRE_ERROR_MEMORY -9 /* out of memory */ |
|
102 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */ |
|
103 |
|
104 #if defined(VERBOSE) |
|
105 #define TRACE(v) printf v |
|
106 #else |
|
107 #define TRACE(v) |
|
108 #endif |
|
109 |
|
110 /* -------------------------------------------------------------------- */ |
|
111 /* search engine state */ |
|
112 |
|
113 /* default character predicates (run sre_chars.py to regenerate tables) */ |
|
114 |
|
115 #define SRE_DIGIT_MASK 1 |
|
116 #define SRE_SPACE_MASK 2 |
|
117 #define SRE_LINEBREAK_MASK 4 |
|
118 #define SRE_ALNUM_MASK 8 |
|
119 #define SRE_WORD_MASK 16 |
|
120 |
|
121 /* FIXME: this assumes ASCII. create tables in init_sre() instead */ |
|
122 |
|
123 static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2, |
|
124 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, |
|
125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, |
|
126 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, |
|
127 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, |
|
128 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, |
|
129 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 }; |
|
130 |
|
131 static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, |
|
132 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, |
|
133 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, |
|
134 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, |
|
135 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, |
|
136 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, |
|
137 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, |
|
138 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, |
|
139 120, 121, 122, 123, 124, 125, 126, 127 }; |
|
140 |
|
141 #define SRE_IS_DIGIT(ch)\ |
|
142 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0) |
|
143 #define SRE_IS_SPACE(ch)\ |
|
144 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0) |
|
145 #define SRE_IS_LINEBREAK(ch)\ |
|
146 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0) |
|
147 #define SRE_IS_ALNUM(ch)\ |
|
148 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0) |
|
149 #define SRE_IS_WORD(ch)\ |
|
150 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0) |
|
151 |
|
152 static unsigned int sre_lower(unsigned int ch) |
|
153 { |
|
154 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch); |
|
155 } |
|
156 |
|
157 /* locale-specific character predicates */ |
|
158 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids |
|
159 * warnings when c's type supports only numbers < N+1 */ |
|
160 #define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0) |
|
161 #define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0) |
|
162 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n') |
|
163 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0) |
|
164 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_') |
|
165 |
|
166 static unsigned int sre_lower_locale(unsigned int ch) |
|
167 { |
|
168 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch); |
|
169 } |
|
170 |
|
171 /* unicode-specific character predicates */ |
|
172 |
|
173 #if defined(HAVE_UNICODE) |
|
174 |
|
175 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch)) |
|
176 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch)) |
|
177 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch)) |
|
178 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch)) |
|
179 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_') |
|
180 |
|
181 static unsigned int sre_lower_unicode(unsigned int ch) |
|
182 { |
|
183 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch)); |
|
184 } |
|
185 |
|
186 #endif |
|
187 |
|
188 LOCAL(int) |
|
189 sre_category(SRE_CODE category, unsigned int ch) |
|
190 { |
|
191 switch (category) { |
|
192 |
|
193 case SRE_CATEGORY_DIGIT: |
|
194 return SRE_IS_DIGIT(ch); |
|
195 case SRE_CATEGORY_NOT_DIGIT: |
|
196 return !SRE_IS_DIGIT(ch); |
|
197 case SRE_CATEGORY_SPACE: |
|
198 return SRE_IS_SPACE(ch); |
|
199 case SRE_CATEGORY_NOT_SPACE: |
|
200 return !SRE_IS_SPACE(ch); |
|
201 case SRE_CATEGORY_WORD: |
|
202 return SRE_IS_WORD(ch); |
|
203 case SRE_CATEGORY_NOT_WORD: |
|
204 return !SRE_IS_WORD(ch); |
|
205 case SRE_CATEGORY_LINEBREAK: |
|
206 return SRE_IS_LINEBREAK(ch); |
|
207 case SRE_CATEGORY_NOT_LINEBREAK: |
|
208 return !SRE_IS_LINEBREAK(ch); |
|
209 |
|
210 case SRE_CATEGORY_LOC_WORD: |
|
211 return SRE_LOC_IS_WORD(ch); |
|
212 case SRE_CATEGORY_LOC_NOT_WORD: |
|
213 return !SRE_LOC_IS_WORD(ch); |
|
214 |
|
215 #if defined(HAVE_UNICODE) |
|
216 case SRE_CATEGORY_UNI_DIGIT: |
|
217 return SRE_UNI_IS_DIGIT(ch); |
|
218 case SRE_CATEGORY_UNI_NOT_DIGIT: |
|
219 return !SRE_UNI_IS_DIGIT(ch); |
|
220 case SRE_CATEGORY_UNI_SPACE: |
|
221 return SRE_UNI_IS_SPACE(ch); |
|
222 case SRE_CATEGORY_UNI_NOT_SPACE: |
|
223 return !SRE_UNI_IS_SPACE(ch); |
|
224 case SRE_CATEGORY_UNI_WORD: |
|
225 return SRE_UNI_IS_WORD(ch); |
|
226 case SRE_CATEGORY_UNI_NOT_WORD: |
|
227 return !SRE_UNI_IS_WORD(ch); |
|
228 case SRE_CATEGORY_UNI_LINEBREAK: |
|
229 return SRE_UNI_IS_LINEBREAK(ch); |
|
230 case SRE_CATEGORY_UNI_NOT_LINEBREAK: |
|
231 return !SRE_UNI_IS_LINEBREAK(ch); |
|
232 #else |
|
233 case SRE_CATEGORY_UNI_DIGIT: |
|
234 return SRE_IS_DIGIT(ch); |
|
235 case SRE_CATEGORY_UNI_NOT_DIGIT: |
|
236 return !SRE_IS_DIGIT(ch); |
|
237 case SRE_CATEGORY_UNI_SPACE: |
|
238 return SRE_IS_SPACE(ch); |
|
239 case SRE_CATEGORY_UNI_NOT_SPACE: |
|
240 return !SRE_IS_SPACE(ch); |
|
241 case SRE_CATEGORY_UNI_WORD: |
|
242 return SRE_LOC_IS_WORD(ch); |
|
243 case SRE_CATEGORY_UNI_NOT_WORD: |
|
244 return !SRE_LOC_IS_WORD(ch); |
|
245 case SRE_CATEGORY_UNI_LINEBREAK: |
|
246 return SRE_IS_LINEBREAK(ch); |
|
247 case SRE_CATEGORY_UNI_NOT_LINEBREAK: |
|
248 return !SRE_IS_LINEBREAK(ch); |
|
249 #endif |
|
250 } |
|
251 return 0; |
|
252 } |
|
253 |
|
254 /* helpers */ |
|
255 |
|
256 static void |
|
257 data_stack_dealloc(SRE_STATE* state) |
|
258 { |
|
259 if (state->data_stack) { |
|
260 PyMem_FREE(state->data_stack); |
|
261 state->data_stack = NULL; |
|
262 } |
|
263 state->data_stack_size = state->data_stack_base = 0; |
|
264 } |
|
265 |
|
266 static int |
|
267 data_stack_grow(SRE_STATE* state, Py_ssize_t size) |
|
268 { |
|
269 Py_ssize_t minsize, cursize; |
|
270 minsize = state->data_stack_base+size; |
|
271 cursize = state->data_stack_size; |
|
272 if (cursize < minsize) { |
|
273 void* stack; |
|
274 cursize = minsize+minsize/4+1024; |
|
275 TRACE(("allocate/grow stack %d\n", cursize)); |
|
276 stack = PyMem_REALLOC(state->data_stack, cursize); |
|
277 if (!stack) { |
|
278 data_stack_dealloc(state); |
|
279 return SRE_ERROR_MEMORY; |
|
280 } |
|
281 state->data_stack = (char *)stack; |
|
282 state->data_stack_size = cursize; |
|
283 } |
|
284 return 0; |
|
285 } |
|
286 |
|
287 /* generate 8-bit version */ |
|
288 |
|
289 #define SRE_CHAR unsigned char |
|
290 #define SRE_AT sre_at |
|
291 #define SRE_COUNT sre_count |
|
292 #define SRE_CHARSET sre_charset |
|
293 #define SRE_INFO sre_info |
|
294 #define SRE_MATCH sre_match |
|
295 #define SRE_MATCH_CONTEXT sre_match_context |
|
296 #define SRE_SEARCH sre_search |
|
297 #define SRE_LITERAL_TEMPLATE sre_literal_template |
|
298 |
|
299 #if defined(HAVE_UNICODE) |
|
300 |
|
301 #define SRE_RECURSIVE |
|
302 #include "_sre.c" |
|
303 #undef SRE_RECURSIVE |
|
304 |
|
305 #undef SRE_LITERAL_TEMPLATE |
|
306 #undef SRE_SEARCH |
|
307 #undef SRE_MATCH |
|
308 #undef SRE_MATCH_CONTEXT |
|
309 #undef SRE_INFO |
|
310 #undef SRE_CHARSET |
|
311 #undef SRE_COUNT |
|
312 #undef SRE_AT |
|
313 #undef SRE_CHAR |
|
314 |
|
315 /* generate 16-bit unicode version */ |
|
316 |
|
317 #define SRE_CHAR Py_UNICODE |
|
318 #define SRE_AT sre_uat |
|
319 #define SRE_COUNT sre_ucount |
|
320 #define SRE_CHARSET sre_ucharset |
|
321 #define SRE_INFO sre_uinfo |
|
322 #define SRE_MATCH sre_umatch |
|
323 #define SRE_MATCH_CONTEXT sre_umatch_context |
|
324 #define SRE_SEARCH sre_usearch |
|
325 #define SRE_LITERAL_TEMPLATE sre_uliteral_template |
|
326 #endif |
|
327 |
|
328 #endif /* SRE_RECURSIVE */ |
|
329 |
|
330 /* -------------------------------------------------------------------- */ |
|
331 /* String matching engine */ |
|
332 |
|
333 /* the following section is compiled twice, with different character |
|
334 settings */ |
|
335 |
|
336 LOCAL(int) |
|
337 SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) |
|
338 { |
|
339 /* check if pointer is at given position */ |
|
340 |
|
341 Py_ssize_t thisp, thatp; |
|
342 |
|
343 switch (at) { |
|
344 |
|
345 case SRE_AT_BEGINNING: |
|
346 case SRE_AT_BEGINNING_STRING: |
|
347 return ((void*) ptr == state->beginning); |
|
348 |
|
349 case SRE_AT_BEGINNING_LINE: |
|
350 return ((void*) ptr == state->beginning || |
|
351 SRE_IS_LINEBREAK((int) ptr[-1])); |
|
352 |
|
353 case SRE_AT_END: |
|
354 return (((void*) (ptr+1) == state->end && |
|
355 SRE_IS_LINEBREAK((int) ptr[0])) || |
|
356 ((void*) ptr == state->end)); |
|
357 |
|
358 case SRE_AT_END_LINE: |
|
359 return ((void*) ptr == state->end || |
|
360 SRE_IS_LINEBREAK((int) ptr[0])); |
|
361 |
|
362 case SRE_AT_END_STRING: |
|
363 return ((void*) ptr == state->end); |
|
364 |
|
365 case SRE_AT_BOUNDARY: |
|
366 if (state->beginning == state->end) |
|
367 return 0; |
|
368 thatp = ((void*) ptr > state->beginning) ? |
|
369 SRE_IS_WORD((int) ptr[-1]) : 0; |
|
370 thisp = ((void*) ptr < state->end) ? |
|
371 SRE_IS_WORD((int) ptr[0]) : 0; |
|
372 return thisp != thatp; |
|
373 |
|
374 case SRE_AT_NON_BOUNDARY: |
|
375 if (state->beginning == state->end) |
|
376 return 0; |
|
377 thatp = ((void*) ptr > state->beginning) ? |
|
378 SRE_IS_WORD((int) ptr[-1]) : 0; |
|
379 thisp = ((void*) ptr < state->end) ? |
|
380 SRE_IS_WORD((int) ptr[0]) : 0; |
|
381 return thisp == thatp; |
|
382 |
|
383 case SRE_AT_LOC_BOUNDARY: |
|
384 if (state->beginning == state->end) |
|
385 return 0; |
|
386 thatp = ((void*) ptr > state->beginning) ? |
|
387 SRE_LOC_IS_WORD((int) ptr[-1]) : 0; |
|
388 thisp = ((void*) ptr < state->end) ? |
|
389 SRE_LOC_IS_WORD((int) ptr[0]) : 0; |
|
390 return thisp != thatp; |
|
391 |
|
392 case SRE_AT_LOC_NON_BOUNDARY: |
|
393 if (state->beginning == state->end) |
|
394 return 0; |
|
395 thatp = ((void*) ptr > state->beginning) ? |
|
396 SRE_LOC_IS_WORD((int) ptr[-1]) : 0; |
|
397 thisp = ((void*) ptr < state->end) ? |
|
398 SRE_LOC_IS_WORD((int) ptr[0]) : 0; |
|
399 return thisp == thatp; |
|
400 |
|
401 #if defined(HAVE_UNICODE) |
|
402 case SRE_AT_UNI_BOUNDARY: |
|
403 if (state->beginning == state->end) |
|
404 return 0; |
|
405 thatp = ((void*) ptr > state->beginning) ? |
|
406 SRE_UNI_IS_WORD((int) ptr[-1]) : 0; |
|
407 thisp = ((void*) ptr < state->end) ? |
|
408 SRE_UNI_IS_WORD((int) ptr[0]) : 0; |
|
409 return thisp != thatp; |
|
410 |
|
411 case SRE_AT_UNI_NON_BOUNDARY: |
|
412 if (state->beginning == state->end) |
|
413 return 0; |
|
414 thatp = ((void*) ptr > state->beginning) ? |
|
415 SRE_UNI_IS_WORD((int) ptr[-1]) : 0; |
|
416 thisp = ((void*) ptr < state->end) ? |
|
417 SRE_UNI_IS_WORD((int) ptr[0]) : 0; |
|
418 return thisp == thatp; |
|
419 #endif |
|
420 |
|
421 } |
|
422 |
|
423 return 0; |
|
424 } |
|
425 |
|
426 LOCAL(int) |
|
427 SRE_CHARSET(SRE_CODE* set, SRE_CODE ch) |
|
428 { |
|
429 /* check if character is a member of the given set */ |
|
430 |
|
431 int ok = 1; |
|
432 |
|
433 for (;;) { |
|
434 switch (*set++) { |
|
435 |
|
436 case SRE_OP_FAILURE: |
|
437 return !ok; |
|
438 |
|
439 case SRE_OP_LITERAL: |
|
440 /* <LITERAL> <code> */ |
|
441 if (ch == set[0]) |
|
442 return ok; |
|
443 set++; |
|
444 break; |
|
445 |
|
446 case SRE_OP_CATEGORY: |
|
447 /* <CATEGORY> <code> */ |
|
448 if (sre_category(set[0], (int) ch)) |
|
449 return ok; |
|
450 set += 1; |
|
451 break; |
|
452 |
|
453 case SRE_OP_CHARSET: |
|
454 if (sizeof(SRE_CODE) == 2) { |
|
455 /* <CHARSET> <bitmap> (16 bits per code word) */ |
|
456 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15)))) |
|
457 return ok; |
|
458 set += 16; |
|
459 } |
|
460 else { |
|
461 /* <CHARSET> <bitmap> (32 bits per code word) */ |
|
462 if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31)))) |
|
463 return ok; |
|
464 set += 8; |
|
465 } |
|
466 break; |
|
467 |
|
468 case SRE_OP_RANGE: |
|
469 /* <RANGE> <lower> <upper> */ |
|
470 if (set[0] <= ch && ch <= set[1]) |
|
471 return ok; |
|
472 set += 2; |
|
473 break; |
|
474 |
|
475 case SRE_OP_NEGATE: |
|
476 ok = !ok; |
|
477 break; |
|
478 |
|
479 case SRE_OP_BIGCHARSET: |
|
480 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ |
|
481 { |
|
482 Py_ssize_t count, block; |
|
483 count = *(set++); |
|
484 |
|
485 if (sizeof(SRE_CODE) == 2) { |
|
486 block = ((unsigned char*)set)[ch >> 8]; |
|
487 set += 128; |
|
488 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15))) |
|
489 return ok; |
|
490 set += count*16; |
|
491 } |
|
492 else { |
|
493 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids |
|
494 * warnings when c's type supports only numbers < N+1 */ |
|
495 if (!(ch & ~65535)) |
|
496 block = ((unsigned char*)set)[ch >> 8]; |
|
497 else |
|
498 block = -1; |
|
499 set += 64; |
|
500 if (block >=0 && |
|
501 (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31)))) |
|
502 return ok; |
|
503 set += count*8; |
|
504 } |
|
505 break; |
|
506 } |
|
507 |
|
508 default: |
|
509 /* internal error -- there's not much we can do about it |
|
510 here, so let's just pretend it didn't match... */ |
|
511 return 0; |
|
512 } |
|
513 } |
|
514 } |
|
515 |
|
516 LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern); |
|
517 |
|
518 LOCAL(Py_ssize_t) |
|
519 SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) |
|
520 { |
|
521 SRE_CODE chr; |
|
522 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr; |
|
523 SRE_CHAR* end = (SRE_CHAR *)state->end; |
|
524 Py_ssize_t i; |
|
525 |
|
526 /* adjust end */ |
|
527 if (maxcount < end - ptr && maxcount != 65535) |
|
528 end = ptr + maxcount; |
|
529 |
|
530 switch (pattern[0]) { |
|
531 |
|
532 case SRE_OP_IN: |
|
533 /* repeated set */ |
|
534 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); |
|
535 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr)) |
|
536 ptr++; |
|
537 break; |
|
538 |
|
539 case SRE_OP_ANY: |
|
540 /* repeated dot wildcard. */ |
|
541 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); |
|
542 while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) |
|
543 ptr++; |
|
544 break; |
|
545 |
|
546 case SRE_OP_ANY_ALL: |
|
547 /* repeated dot wildcard. skip to the end of the target |
|
548 string, and backtrack from there */ |
|
549 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); |
|
550 ptr = end; |
|
551 break; |
|
552 |
|
553 case SRE_OP_LITERAL: |
|
554 /* repeated literal */ |
|
555 chr = pattern[1]; |
|
556 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); |
|
557 while (ptr < end && (SRE_CODE) *ptr == chr) |
|
558 ptr++; |
|
559 break; |
|
560 |
|
561 case SRE_OP_LITERAL_IGNORE: |
|
562 /* repeated literal */ |
|
563 chr = pattern[1]; |
|
564 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); |
|
565 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr) |
|
566 ptr++; |
|
567 break; |
|
568 |
|
569 case SRE_OP_NOT_LITERAL: |
|
570 /* repeated non-literal */ |
|
571 chr = pattern[1]; |
|
572 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); |
|
573 while (ptr < end && (SRE_CODE) *ptr != chr) |
|
574 ptr++; |
|
575 break; |
|
576 |
|
577 case SRE_OP_NOT_LITERAL_IGNORE: |
|
578 /* repeated non-literal */ |
|
579 chr = pattern[1]; |
|
580 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); |
|
581 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr) |
|
582 ptr++; |
|
583 break; |
|
584 |
|
585 default: |
|
586 /* repeated single character pattern */ |
|
587 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); |
|
588 while ((SRE_CHAR*) state->ptr < end) { |
|
589 i = SRE_MATCH(state, pattern); |
|
590 if (i < 0) |
|
591 return i; |
|
592 if (!i) |
|
593 break; |
|
594 } |
|
595 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, |
|
596 (SRE_CHAR*) state->ptr - ptr)); |
|
597 return (SRE_CHAR*) state->ptr - ptr; |
|
598 } |
|
599 |
|
600 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr)); |
|
601 return ptr - (SRE_CHAR*) state->ptr; |
|
602 } |
|
603 |
|
604 #if 0 /* not used in this release */ |
|
605 LOCAL(int) |
|
606 SRE_INFO(SRE_STATE* state, SRE_CODE* pattern) |
|
607 { |
|
608 /* check if an SRE_OP_INFO block matches at the current position. |
|
609 returns the number of SRE_CODE objects to skip if successful, 0 |
|
610 if no match */ |
|
611 |
|
612 SRE_CHAR* end = state->end; |
|
613 SRE_CHAR* ptr = state->ptr; |
|
614 Py_ssize_t i; |
|
615 |
|
616 /* check minimal length */ |
|
617 if (pattern[3] && (end - ptr) < pattern[3]) |
|
618 return 0; |
|
619 |
|
620 /* check known prefix */ |
|
621 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) { |
|
622 /* <length> <skip> <prefix data> <overlap data> */ |
|
623 for (i = 0; i < pattern[5]; i++) |
|
624 if ((SRE_CODE) ptr[i] != pattern[7 + i]) |
|
625 return 0; |
|
626 return pattern[0] + 2 * pattern[6]; |
|
627 } |
|
628 return pattern[0]; |
|
629 } |
|
630 #endif |
|
631 |
|
632 /* The macros below should be used to protect recursive SRE_MATCH() |
|
633 * calls that *failed* and do *not* return immediately (IOW, those |
|
634 * that will backtrack). Explaining: |
|
635 * |
|
636 * - Recursive SRE_MATCH() returned true: that's usually a success |
|
637 * (besides atypical cases like ASSERT_NOT), therefore there's no |
|
638 * reason to restore lastmark; |
|
639 * |
|
640 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH() |
|
641 * is returning to the caller: If the current SRE_MATCH() is the |
|
642 * top function of the recursion, returning false will be a matching |
|
643 * failure, and it doesn't matter where lastmark is pointing to. |
|
644 * If it's *not* the top function, it will be a recursive SRE_MATCH() |
|
645 * failure by itself, and the calling SRE_MATCH() will have to deal |
|
646 * with the failure by the same rules explained here (it will restore |
|
647 * lastmark by itself if necessary); |
|
648 * |
|
649 * - Recursive SRE_MATCH() returned false, and will continue the |
|
650 * outside 'for' loop: must be protected when breaking, since the next |
|
651 * OP could potentially depend on lastmark; |
|
652 * |
|
653 * - Recursive SRE_MATCH() returned false, and will be called again |
|
654 * inside a local for/while loop: must be protected between each |
|
655 * loop iteration, since the recursive SRE_MATCH() could do anything, |
|
656 * and could potentially depend on lastmark. |
|
657 * |
|
658 * For more information, check the discussion at SF patch #712900. |
|
659 */ |
|
660 #define LASTMARK_SAVE() \ |
|
661 do { \ |
|
662 ctx->lastmark = state->lastmark; \ |
|
663 ctx->lastindex = state->lastindex; \ |
|
664 } while (0) |
|
665 #define LASTMARK_RESTORE() \ |
|
666 do { \ |
|
667 state->lastmark = ctx->lastmark; \ |
|
668 state->lastindex = ctx->lastindex; \ |
|
669 } while (0) |
|
670 |
|
671 #define RETURN_ERROR(i) do { return i; } while(0) |
|
672 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0) |
|
673 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0) |
|
674 |
|
675 #define RETURN_ON_ERROR(i) \ |
|
676 do { if (i < 0) RETURN_ERROR(i); } while (0) |
|
677 #define RETURN_ON_SUCCESS(i) \ |
|
678 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0) |
|
679 #define RETURN_ON_FAILURE(i) \ |
|
680 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0) |
|
681 |
|
682 #define SFY(x) #x |
|
683 |
|
684 #define DATA_STACK_ALLOC(state, type, ptr) \ |
|
685 do { \ |
|
686 alloc_pos = state->data_stack_base; \ |
|
687 TRACE(("allocating %s in %d (%d)\n", \ |
|
688 SFY(type), alloc_pos, sizeof(type))); \ |
|
689 if (state->data_stack_size < alloc_pos+sizeof(type)) { \ |
|
690 int j = data_stack_grow(state, sizeof(type)); \ |
|
691 if (j < 0) return j; \ |
|
692 if (ctx_pos != -1) \ |
|
693 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \ |
|
694 } \ |
|
695 ptr = (type*)(state->data_stack+alloc_pos); \ |
|
696 state->data_stack_base += sizeof(type); \ |
|
697 } while (0) |
|
698 |
|
699 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \ |
|
700 do { \ |
|
701 TRACE(("looking up %s at %d\n", SFY(type), pos)); \ |
|
702 ptr = (type*)(state->data_stack+pos); \ |
|
703 } while (0) |
|
704 |
|
705 #define DATA_STACK_PUSH(state, data, size) \ |
|
706 do { \ |
|
707 TRACE(("copy data in %p to %d (%d)\n", \ |
|
708 data, state->data_stack_base, size)); \ |
|
709 if (state->data_stack_size < state->data_stack_base+size) { \ |
|
710 int j = data_stack_grow(state, size); \ |
|
711 if (j < 0) return j; \ |
|
712 if (ctx_pos != -1) \ |
|
713 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \ |
|
714 } \ |
|
715 memcpy(state->data_stack+state->data_stack_base, data, size); \ |
|
716 state->data_stack_base += size; \ |
|
717 } while (0) |
|
718 |
|
719 #define DATA_STACK_POP(state, data, size, discard) \ |
|
720 do { \ |
|
721 TRACE(("copy data to %p from %d (%d)\n", \ |
|
722 data, state->data_stack_base-size, size)); \ |
|
723 memcpy(data, state->data_stack+state->data_stack_base-size, size); \ |
|
724 if (discard) \ |
|
725 state->data_stack_base -= size; \ |
|
726 } while (0) |
|
727 |
|
728 #define DATA_STACK_POP_DISCARD(state, size) \ |
|
729 do { \ |
|
730 TRACE(("discard data from %d (%d)\n", \ |
|
731 state->data_stack_base-size, size)); \ |
|
732 state->data_stack_base -= size; \ |
|
733 } while(0) |
|
734 |
|
735 #define DATA_PUSH(x) \ |
|
736 DATA_STACK_PUSH(state, (x), sizeof(*(x))) |
|
737 #define DATA_POP(x) \ |
|
738 DATA_STACK_POP(state, (x), sizeof(*(x)), 1) |
|
739 #define DATA_POP_DISCARD(x) \ |
|
740 DATA_STACK_POP_DISCARD(state, sizeof(*(x))) |
|
741 #define DATA_ALLOC(t,p) \ |
|
742 DATA_STACK_ALLOC(state, t, p) |
|
743 #define DATA_LOOKUP_AT(t,p,pos) \ |
|
744 DATA_STACK_LOOKUP_AT(state,t,p,pos) |
|
745 |
|
746 #define MARK_PUSH(lastmark) \ |
|
747 do if (lastmark > 0) { \ |
|
748 i = lastmark; /* ctx->lastmark may change if reallocated */ \ |
|
749 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \ |
|
750 } while (0) |
|
751 #define MARK_POP(lastmark) \ |
|
752 do if (lastmark > 0) { \ |
|
753 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \ |
|
754 } while (0) |
|
755 #define MARK_POP_KEEP(lastmark) \ |
|
756 do if (lastmark > 0) { \ |
|
757 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \ |
|
758 } while (0) |
|
759 #define MARK_POP_DISCARD(lastmark) \ |
|
760 do if (lastmark > 0) { \ |
|
761 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \ |
|
762 } while (0) |
|
763 |
|
764 #define JUMP_NONE 0 |
|
765 #define JUMP_MAX_UNTIL_1 1 |
|
766 #define JUMP_MAX_UNTIL_2 2 |
|
767 #define JUMP_MAX_UNTIL_3 3 |
|
768 #define JUMP_MIN_UNTIL_1 4 |
|
769 #define JUMP_MIN_UNTIL_2 5 |
|
770 #define JUMP_MIN_UNTIL_3 6 |
|
771 #define JUMP_REPEAT 7 |
|
772 #define JUMP_REPEAT_ONE_1 8 |
|
773 #define JUMP_REPEAT_ONE_2 9 |
|
774 #define JUMP_MIN_REPEAT_ONE 10 |
|
775 #define JUMP_BRANCH 11 |
|
776 #define JUMP_ASSERT 12 |
|
777 #define JUMP_ASSERT_NOT 13 |
|
778 |
|
779 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \ |
|
780 DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \ |
|
781 nextctx->last_ctx_pos = ctx_pos; \ |
|
782 nextctx->jump = jumpvalue; \ |
|
783 nextctx->pattern = nextpattern; \ |
|
784 ctx_pos = alloc_pos; \ |
|
785 ctx = nextctx; \ |
|
786 goto entrance; \ |
|
787 jumplabel: \ |
|
788 while (0) /* gcc doesn't like labels at end of scopes */ \ |
|
789 |
|
790 typedef struct { |
|
791 Py_ssize_t last_ctx_pos; |
|
792 Py_ssize_t jump; |
|
793 SRE_CHAR* ptr; |
|
794 SRE_CODE* pattern; |
|
795 Py_ssize_t count; |
|
796 Py_ssize_t lastmark; |
|
797 Py_ssize_t lastindex; |
|
798 union { |
|
799 SRE_CODE chr; |
|
800 SRE_REPEAT* rep; |
|
801 } u; |
|
802 } SRE_MATCH_CONTEXT; |
|
803 |
|
804 /* check if string matches the given pattern. returns <0 for |
|
805 error, 0 for failure, and 1 for success */ |
|
806 LOCAL(Py_ssize_t) |
|
807 SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) |
|
808 { |
|
809 SRE_CHAR* end = (SRE_CHAR *)state->end; |
|
810 Py_ssize_t alloc_pos, ctx_pos = -1; |
|
811 Py_ssize_t i, ret = 0; |
|
812 Py_ssize_t jump; |
|
813 unsigned int sigcount=0; |
|
814 |
|
815 SRE_MATCH_CONTEXT* ctx; |
|
816 SRE_MATCH_CONTEXT* nextctx; |
|
817 |
|
818 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); |
|
819 |
|
820 DATA_ALLOC(SRE_MATCH_CONTEXT, ctx); |
|
821 ctx->last_ctx_pos = -1; |
|
822 ctx->jump = JUMP_NONE; |
|
823 ctx->pattern = pattern; |
|
824 ctx_pos = alloc_pos; |
|
825 |
|
826 entrance: |
|
827 |
|
828 ctx->ptr = (SRE_CHAR *)state->ptr; |
|
829 |
|
830 if (ctx->pattern[0] == SRE_OP_INFO) { |
|
831 /* optimization info block */ |
|
832 /* <INFO> <1=skip> <2=flags> <3=min> ... */ |
|
833 if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) { |
|
834 TRACE(("reject (got %d chars, need %d)\n", |
|
835 (end - ctx->ptr), ctx->pattern[3])); |
|
836 RETURN_FAILURE; |
|
837 } |
|
838 ctx->pattern += ctx->pattern[1] + 1; |
|
839 } |
|
840 |
|
841 for (;;) { |
|
842 ++sigcount; |
|
843 if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals()) |
|
844 RETURN_ERROR(SRE_ERROR_INTERRUPTED); |
|
845 |
|
846 switch (*ctx->pattern++) { |
|
847 |
|
848 case SRE_OP_MARK: |
|
849 /* set mark */ |
|
850 /* <MARK> <gid> */ |
|
851 TRACE(("|%p|%p|MARK %d\n", ctx->pattern, |
|
852 ctx->ptr, ctx->pattern[0])); |
|
853 i = ctx->pattern[0]; |
|
854 if (i & 1) |
|
855 state->lastindex = i/2 + 1; |
|
856 if (i > state->lastmark) { |
|
857 /* state->lastmark is the highest valid index in the |
|
858 state->mark array. If it is increased by more than 1, |
|
859 the intervening marks must be set to NULL to signal |
|
860 that these marks have not been encountered. */ |
|
861 Py_ssize_t j = state->lastmark + 1; |
|
862 while (j < i) |
|
863 state->mark[j++] = NULL; |
|
864 state->lastmark = i; |
|
865 } |
|
866 state->mark[i] = ctx->ptr; |
|
867 ctx->pattern++; |
|
868 break; |
|
869 |
|
870 case SRE_OP_LITERAL: |
|
871 /* match literal string */ |
|
872 /* <LITERAL> <code> */ |
|
873 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern, |
|
874 ctx->ptr, *ctx->pattern)); |
|
875 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0]) |
|
876 RETURN_FAILURE; |
|
877 ctx->pattern++; |
|
878 ctx->ptr++; |
|
879 break; |
|
880 |
|
881 case SRE_OP_NOT_LITERAL: |
|
882 /* match anything that is not literal character */ |
|
883 /* <NOT_LITERAL> <code> */ |
|
884 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern, |
|
885 ctx->ptr, *ctx->pattern)); |
|
886 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0]) |
|
887 RETURN_FAILURE; |
|
888 ctx->pattern++; |
|
889 ctx->ptr++; |
|
890 break; |
|
891 |
|
892 case SRE_OP_SUCCESS: |
|
893 /* end of pattern */ |
|
894 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr)); |
|
895 state->ptr = ctx->ptr; |
|
896 RETURN_SUCCESS; |
|
897 |
|
898 case SRE_OP_AT: |
|
899 /* match at given position */ |
|
900 /* <AT> <code> */ |
|
901 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern)); |
|
902 if (!SRE_AT(state, ctx->ptr, *ctx->pattern)) |
|
903 RETURN_FAILURE; |
|
904 ctx->pattern++; |
|
905 break; |
|
906 |
|
907 case SRE_OP_CATEGORY: |
|
908 /* match at given category */ |
|
909 /* <CATEGORY> <code> */ |
|
910 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern, |
|
911 ctx->ptr, *ctx->pattern)); |
|
912 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0])) |
|
913 RETURN_FAILURE; |
|
914 ctx->pattern++; |
|
915 ctx->ptr++; |
|
916 break; |
|
917 |
|
918 case SRE_OP_ANY: |
|
919 /* match anything (except a newline) */ |
|
920 /* <ANY> */ |
|
921 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr)); |
|
922 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0])) |
|
923 RETURN_FAILURE; |
|
924 ctx->ptr++; |
|
925 break; |
|
926 |
|
927 case SRE_OP_ANY_ALL: |
|
928 /* match anything */ |
|
929 /* <ANY_ALL> */ |
|
930 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr)); |
|
931 if (ctx->ptr >= end) |
|
932 RETURN_FAILURE; |
|
933 ctx->ptr++; |
|
934 break; |
|
935 |
|
936 case SRE_OP_IN: |
|
937 /* match set member (or non_member) */ |
|
938 /* <IN> <skip> <set> */ |
|
939 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr)); |
|
940 if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr)) |
|
941 RETURN_FAILURE; |
|
942 ctx->pattern += ctx->pattern[0]; |
|
943 ctx->ptr++; |
|
944 break; |
|
945 |
|
946 case SRE_OP_LITERAL_IGNORE: |
|
947 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", |
|
948 ctx->pattern, ctx->ptr, ctx->pattern[0])); |
|
949 if (ctx->ptr >= end || |
|
950 state->lower(*ctx->ptr) != state->lower(*ctx->pattern)) |
|
951 RETURN_FAILURE; |
|
952 ctx->pattern++; |
|
953 ctx->ptr++; |
|
954 break; |
|
955 |
|
956 case SRE_OP_NOT_LITERAL_IGNORE: |
|
957 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", |
|
958 ctx->pattern, ctx->ptr, *ctx->pattern)); |
|
959 if (ctx->ptr >= end || |
|
960 state->lower(*ctx->ptr) == state->lower(*ctx->pattern)) |
|
961 RETURN_FAILURE; |
|
962 ctx->pattern++; |
|
963 ctx->ptr++; |
|
964 break; |
|
965 |
|
966 case SRE_OP_IN_IGNORE: |
|
967 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr)); |
|
968 if (ctx->ptr >= end |
|
969 || !SRE_CHARSET(ctx->pattern+1, |
|
970 (SRE_CODE)state->lower(*ctx->ptr))) |
|
971 RETURN_FAILURE; |
|
972 ctx->pattern += ctx->pattern[0]; |
|
973 ctx->ptr++; |
|
974 break; |
|
975 |
|
976 case SRE_OP_JUMP: |
|
977 case SRE_OP_INFO: |
|
978 /* jump forward */ |
|
979 /* <JUMP> <offset> */ |
|
980 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern, |
|
981 ctx->ptr, ctx->pattern[0])); |
|
982 ctx->pattern += ctx->pattern[0]; |
|
983 break; |
|
984 |
|
985 case SRE_OP_BRANCH: |
|
986 /* alternation */ |
|
987 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ |
|
988 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr)); |
|
989 LASTMARK_SAVE(); |
|
990 ctx->u.rep = state->repeat; |
|
991 if (ctx->u.rep) |
|
992 MARK_PUSH(ctx->lastmark); |
|
993 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) { |
|
994 if (ctx->pattern[1] == SRE_OP_LITERAL && |
|
995 (ctx->ptr >= end || |
|
996 (SRE_CODE) *ctx->ptr != ctx->pattern[2])) |
|
997 continue; |
|
998 if (ctx->pattern[1] == SRE_OP_IN && |
|
999 (ctx->ptr >= end || |
|
1000 !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr))) |
|
1001 continue; |
|
1002 state->ptr = ctx->ptr; |
|
1003 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1); |
|
1004 if (ret) { |
|
1005 if (ctx->u.rep) |
|
1006 MARK_POP_DISCARD(ctx->lastmark); |
|
1007 RETURN_ON_ERROR(ret); |
|
1008 RETURN_SUCCESS; |
|
1009 } |
|
1010 if (ctx->u.rep) |
|
1011 MARK_POP_KEEP(ctx->lastmark); |
|
1012 LASTMARK_RESTORE(); |
|
1013 } |
|
1014 if (ctx->u.rep) |
|
1015 MARK_POP_DISCARD(ctx->lastmark); |
|
1016 RETURN_FAILURE; |
|
1017 |
|
1018 case SRE_OP_REPEAT_ONE: |
|
1019 /* match repeated sequence (maximizing regexp) */ |
|
1020 |
|
1021 /* this operator only works if the repeated item is |
|
1022 exactly one character wide, and we're not already |
|
1023 collecting backtracking points. for other cases, |
|
1024 use the MAX_REPEAT operator */ |
|
1025 |
|
1026 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ |
|
1027 |
|
1028 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, |
|
1029 ctx->pattern[1], ctx->pattern[2])); |
|
1030 |
|
1031 if (ctx->ptr + ctx->pattern[1] > end) |
|
1032 RETURN_FAILURE; /* cannot match */ |
|
1033 |
|
1034 state->ptr = ctx->ptr; |
|
1035 |
|
1036 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]); |
|
1037 RETURN_ON_ERROR(ret); |
|
1038 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos); |
|
1039 ctx->count = ret; |
|
1040 ctx->ptr += ctx->count; |
|
1041 |
|
1042 /* when we arrive here, count contains the number of |
|
1043 matches, and ctx->ptr points to the tail of the target |
|
1044 string. check if the rest of the pattern matches, |
|
1045 and backtrack if not. */ |
|
1046 |
|
1047 if (ctx->count < (Py_ssize_t) ctx->pattern[1]) |
|
1048 RETURN_FAILURE; |
|
1049 |
|
1050 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) { |
|
1051 /* tail is empty. we're finished */ |
|
1052 state->ptr = ctx->ptr; |
|
1053 RETURN_SUCCESS; |
|
1054 } |
|
1055 |
|
1056 LASTMARK_SAVE(); |
|
1057 |
|
1058 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) { |
|
1059 /* tail starts with a literal. skip positions where |
|
1060 the rest of the pattern cannot possibly match */ |
|
1061 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1]; |
|
1062 for (;;) { |
|
1063 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] && |
|
1064 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) { |
|
1065 ctx->ptr--; |
|
1066 ctx->count--; |
|
1067 } |
|
1068 if (ctx->count < (Py_ssize_t) ctx->pattern[1]) |
|
1069 break; |
|
1070 state->ptr = ctx->ptr; |
|
1071 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, |
|
1072 ctx->pattern+ctx->pattern[0]); |
|
1073 if (ret) { |
|
1074 RETURN_ON_ERROR(ret); |
|
1075 RETURN_SUCCESS; |
|
1076 } |
|
1077 |
|
1078 LASTMARK_RESTORE(); |
|
1079 |
|
1080 ctx->ptr--; |
|
1081 ctx->count--; |
|
1082 } |
|
1083 |
|
1084 } else { |
|
1085 /* general case */ |
|
1086 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) { |
|
1087 state->ptr = ctx->ptr; |
|
1088 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, |
|
1089 ctx->pattern+ctx->pattern[0]); |
|
1090 if (ret) { |
|
1091 RETURN_ON_ERROR(ret); |
|
1092 RETURN_SUCCESS; |
|
1093 } |
|
1094 ctx->ptr--; |
|
1095 ctx->count--; |
|
1096 LASTMARK_RESTORE(); |
|
1097 } |
|
1098 } |
|
1099 RETURN_FAILURE; |
|
1100 |
|
1101 case SRE_OP_MIN_REPEAT_ONE: |
|
1102 /* match repeated sequence (minimizing regexp) */ |
|
1103 |
|
1104 /* this operator only works if the repeated item is |
|
1105 exactly one character wide, and we're not already |
|
1106 collecting backtracking points. for other cases, |
|
1107 use the MIN_REPEAT operator */ |
|
1108 |
|
1109 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ |
|
1110 |
|
1111 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, |
|
1112 ctx->pattern[1], ctx->pattern[2])); |
|
1113 |
|
1114 if (ctx->ptr + ctx->pattern[1] > end) |
|
1115 RETURN_FAILURE; /* cannot match */ |
|
1116 |
|
1117 state->ptr = ctx->ptr; |
|
1118 |
|
1119 if (ctx->pattern[1] == 0) |
|
1120 ctx->count = 0; |
|
1121 else { |
|
1122 /* count using pattern min as the maximum */ |
|
1123 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]); |
|
1124 RETURN_ON_ERROR(ret); |
|
1125 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos); |
|
1126 if (ret < (Py_ssize_t) ctx->pattern[1]) |
|
1127 /* didn't match minimum number of times */ |
|
1128 RETURN_FAILURE; |
|
1129 /* advance past minimum matches of repeat */ |
|
1130 ctx->count = ret; |
|
1131 ctx->ptr += ctx->count; |
|
1132 } |
|
1133 |
|
1134 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) { |
|
1135 /* tail is empty. we're finished */ |
|
1136 state->ptr = ctx->ptr; |
|
1137 RETURN_SUCCESS; |
|
1138 |
|
1139 } else { |
|
1140 /* general case */ |
|
1141 LASTMARK_SAVE(); |
|
1142 while ((Py_ssize_t)ctx->pattern[2] == 65535 |
|
1143 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) { |
|
1144 state->ptr = ctx->ptr; |
|
1145 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, |
|
1146 ctx->pattern+ctx->pattern[0]); |
|
1147 if (ret) { |
|
1148 RETURN_ON_ERROR(ret); |
|
1149 RETURN_SUCCESS; |
|
1150 } |
|
1151 state->ptr = ctx->ptr; |
|
1152 ret = SRE_COUNT(state, ctx->pattern+3, 1); |
|
1153 RETURN_ON_ERROR(ret); |
|
1154 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos); |
|
1155 if (ret == 0) |
|
1156 break; |
|
1157 assert(ret == 1); |
|
1158 ctx->ptr++; |
|
1159 ctx->count++; |
|
1160 LASTMARK_RESTORE(); |
|
1161 } |
|
1162 } |
|
1163 RETURN_FAILURE; |
|
1164 |
|
1165 case SRE_OP_REPEAT: |
|
1166 /* create repeat context. all the hard work is done |
|
1167 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ |
|
1168 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */ |
|
1169 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr, |
|
1170 ctx->pattern[1], ctx->pattern[2])); |
|
1171 |
|
1172 /* install new repeat context */ |
|
1173 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep)); |
|
1174 if (!ctx->u.rep) { |
|
1175 PyErr_NoMemory(); |
|
1176 RETURN_FAILURE; |
|
1177 } |
|
1178 ctx->u.rep->count = -1; |
|
1179 ctx->u.rep->pattern = ctx->pattern; |
|
1180 ctx->u.rep->prev = state->repeat; |
|
1181 ctx->u.rep->last_ptr = NULL; |
|
1182 state->repeat = ctx->u.rep; |
|
1183 |
|
1184 state->ptr = ctx->ptr; |
|
1185 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]); |
|
1186 state->repeat = ctx->u.rep->prev; |
|
1187 PyObject_FREE(ctx->u.rep); |
|
1188 |
|
1189 if (ret) { |
|
1190 RETURN_ON_ERROR(ret); |
|
1191 RETURN_SUCCESS; |
|
1192 } |
|
1193 RETURN_FAILURE; |
|
1194 |
|
1195 case SRE_OP_MAX_UNTIL: |
|
1196 /* maximizing repeat */ |
|
1197 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ |
|
1198 |
|
1199 /* FIXME: we probably need to deal with zero-width |
|
1200 matches in here... */ |
|
1201 |
|
1202 ctx->u.rep = state->repeat; |
|
1203 if (!ctx->u.rep) |
|
1204 RETURN_ERROR(SRE_ERROR_STATE); |
|
1205 |
|
1206 state->ptr = ctx->ptr; |
|
1207 |
|
1208 ctx->count = ctx->u.rep->count+1; |
|
1209 |
|
1210 TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern, |
|
1211 ctx->ptr, ctx->count)); |
|
1212 |
|
1213 if (ctx->count < ctx->u.rep->pattern[1]) { |
|
1214 /* not enough matches */ |
|
1215 ctx->u.rep->count = ctx->count; |
|
1216 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, |
|
1217 ctx->u.rep->pattern+3); |
|
1218 if (ret) { |
|
1219 RETURN_ON_ERROR(ret); |
|
1220 RETURN_SUCCESS; |
|
1221 } |
|
1222 ctx->u.rep->count = ctx->count-1; |
|
1223 state->ptr = ctx->ptr; |
|
1224 RETURN_FAILURE; |
|
1225 } |
|
1226 |
|
1227 if ((ctx->count < ctx->u.rep->pattern[2] || |
|
1228 ctx->u.rep->pattern[2] == 65535) && |
|
1229 state->ptr != ctx->u.rep->last_ptr) { |
|
1230 /* we may have enough matches, but if we can |
|
1231 match another item, do so */ |
|
1232 ctx->u.rep->count = ctx->count; |
|
1233 LASTMARK_SAVE(); |
|
1234 MARK_PUSH(ctx->lastmark); |
|
1235 /* zero-width match protection */ |
|
1236 DATA_PUSH(&ctx->u.rep->last_ptr); |
|
1237 ctx->u.rep->last_ptr = state->ptr; |
|
1238 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, |
|
1239 ctx->u.rep->pattern+3); |
|
1240 DATA_POP(&ctx->u.rep->last_ptr); |
|
1241 if (ret) { |
|
1242 MARK_POP_DISCARD(ctx->lastmark); |
|
1243 RETURN_ON_ERROR(ret); |
|
1244 RETURN_SUCCESS; |
|
1245 } |
|
1246 MARK_POP(ctx->lastmark); |
|
1247 LASTMARK_RESTORE(); |
|
1248 ctx->u.rep->count = ctx->count-1; |
|
1249 state->ptr = ctx->ptr; |
|
1250 } |
|
1251 |
|
1252 /* cannot match more repeated items here. make sure the |
|
1253 tail matches */ |
|
1254 state->repeat = ctx->u.rep->prev; |
|
1255 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern); |
|
1256 RETURN_ON_SUCCESS(ret); |
|
1257 state->repeat = ctx->u.rep; |
|
1258 state->ptr = ctx->ptr; |
|
1259 RETURN_FAILURE; |
|
1260 |
|
1261 case SRE_OP_MIN_UNTIL: |
|
1262 /* minimizing repeat */ |
|
1263 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ |
|
1264 |
|
1265 ctx->u.rep = state->repeat; |
|
1266 if (!ctx->u.rep) |
|
1267 RETURN_ERROR(SRE_ERROR_STATE); |
|
1268 |
|
1269 state->ptr = ctx->ptr; |
|
1270 |
|
1271 ctx->count = ctx->u.rep->count+1; |
|
1272 |
|
1273 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern, |
|
1274 ctx->ptr, ctx->count, ctx->u.rep->pattern)); |
|
1275 |
|
1276 if (ctx->count < ctx->u.rep->pattern[1]) { |
|
1277 /* not enough matches */ |
|
1278 ctx->u.rep->count = ctx->count; |
|
1279 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, |
|
1280 ctx->u.rep->pattern+3); |
|
1281 if (ret) { |
|
1282 RETURN_ON_ERROR(ret); |
|
1283 RETURN_SUCCESS; |
|
1284 } |
|
1285 ctx->u.rep->count = ctx->count-1; |
|
1286 state->ptr = ctx->ptr; |
|
1287 RETURN_FAILURE; |
|
1288 } |
|
1289 |
|
1290 LASTMARK_SAVE(); |
|
1291 |
|
1292 /* see if the tail matches */ |
|
1293 state->repeat = ctx->u.rep->prev; |
|
1294 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern); |
|
1295 if (ret) { |
|
1296 RETURN_ON_ERROR(ret); |
|
1297 RETURN_SUCCESS; |
|
1298 } |
|
1299 |
|
1300 state->repeat = ctx->u.rep; |
|
1301 state->ptr = ctx->ptr; |
|
1302 |
|
1303 LASTMARK_RESTORE(); |
|
1304 |
|
1305 if (ctx->count >= ctx->u.rep->pattern[2] |
|
1306 && ctx->u.rep->pattern[2] != 65535) |
|
1307 RETURN_FAILURE; |
|
1308 |
|
1309 ctx->u.rep->count = ctx->count; |
|
1310 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, |
|
1311 ctx->u.rep->pattern+3); |
|
1312 if (ret) { |
|
1313 RETURN_ON_ERROR(ret); |
|
1314 RETURN_SUCCESS; |
|
1315 } |
|
1316 ctx->u.rep->count = ctx->count-1; |
|
1317 state->ptr = ctx->ptr; |
|
1318 RETURN_FAILURE; |
|
1319 |
|
1320 case SRE_OP_GROUPREF: |
|
1321 /* match backreference */ |
|
1322 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern, |
|
1323 ctx->ptr, ctx->pattern[0])); |
|
1324 i = ctx->pattern[0]; |
|
1325 { |
|
1326 Py_ssize_t groupref = i+i; |
|
1327 if (groupref >= state->lastmark) { |
|
1328 RETURN_FAILURE; |
|
1329 } else { |
|
1330 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
|
1331 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
|
1332 if (!p || !e || e < p) |
|
1333 RETURN_FAILURE; |
|
1334 while (p < e) { |
|
1335 if (ctx->ptr >= end || *ctx->ptr != *p) |
|
1336 RETURN_FAILURE; |
|
1337 p++; ctx->ptr++; |
|
1338 } |
|
1339 } |
|
1340 } |
|
1341 ctx->pattern++; |
|
1342 break; |
|
1343 |
|
1344 case SRE_OP_GROUPREF_IGNORE: |
|
1345 /* match backreference */ |
|
1346 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern, |
|
1347 ctx->ptr, ctx->pattern[0])); |
|
1348 i = ctx->pattern[0]; |
|
1349 { |
|
1350 Py_ssize_t groupref = i+i; |
|
1351 if (groupref >= state->lastmark) { |
|
1352 RETURN_FAILURE; |
|
1353 } else { |
|
1354 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
|
1355 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
|
1356 if (!p || !e || e < p) |
|
1357 RETURN_FAILURE; |
|
1358 while (p < e) { |
|
1359 if (ctx->ptr >= end || |
|
1360 state->lower(*ctx->ptr) != state->lower(*p)) |
|
1361 RETURN_FAILURE; |
|
1362 p++; ctx->ptr++; |
|
1363 } |
|
1364 } |
|
1365 } |
|
1366 ctx->pattern++; |
|
1367 break; |
|
1368 |
|
1369 case SRE_OP_GROUPREF_EXISTS: |
|
1370 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern, |
|
1371 ctx->ptr, ctx->pattern[0])); |
|
1372 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ |
|
1373 i = ctx->pattern[0]; |
|
1374 { |
|
1375 Py_ssize_t groupref = i+i; |
|
1376 if (groupref >= state->lastmark) { |
|
1377 ctx->pattern += ctx->pattern[1]; |
|
1378 break; |
|
1379 } else { |
|
1380 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
|
1381 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
|
1382 if (!p || !e || e < p) { |
|
1383 ctx->pattern += ctx->pattern[1]; |
|
1384 break; |
|
1385 } |
|
1386 } |
|
1387 } |
|
1388 ctx->pattern += 2; |
|
1389 break; |
|
1390 |
|
1391 case SRE_OP_ASSERT: |
|
1392 /* assert subpattern */ |
|
1393 /* <ASSERT> <skip> <back> <pattern> */ |
|
1394 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern, |
|
1395 ctx->ptr, ctx->pattern[1])); |
|
1396 state->ptr = ctx->ptr - ctx->pattern[1]; |
|
1397 if (state->ptr < state->beginning) |
|
1398 RETURN_FAILURE; |
|
1399 DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2); |
|
1400 RETURN_ON_FAILURE(ret); |
|
1401 ctx->pattern += ctx->pattern[0]; |
|
1402 break; |
|
1403 |
|
1404 case SRE_OP_ASSERT_NOT: |
|
1405 /* assert not subpattern */ |
|
1406 /* <ASSERT_NOT> <skip> <back> <pattern> */ |
|
1407 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern, |
|
1408 ctx->ptr, ctx->pattern[1])); |
|
1409 state->ptr = ctx->ptr - ctx->pattern[1]; |
|
1410 if (state->ptr >= state->beginning) { |
|
1411 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2); |
|
1412 if (ret) { |
|
1413 RETURN_ON_ERROR(ret); |
|
1414 RETURN_FAILURE; |
|
1415 } |
|
1416 } |
|
1417 ctx->pattern += ctx->pattern[0]; |
|
1418 break; |
|
1419 |
|
1420 case SRE_OP_FAILURE: |
|
1421 /* immediate failure */ |
|
1422 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr)); |
|
1423 RETURN_FAILURE; |
|
1424 |
|
1425 default: |
|
1426 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr, |
|
1427 ctx->pattern[-1])); |
|
1428 RETURN_ERROR(SRE_ERROR_ILLEGAL); |
|
1429 } |
|
1430 } |
|
1431 |
|
1432 exit: |
|
1433 ctx_pos = ctx->last_ctx_pos; |
|
1434 jump = ctx->jump; |
|
1435 DATA_POP_DISCARD(ctx); |
|
1436 if (ctx_pos == -1) |
|
1437 return ret; |
|
1438 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos); |
|
1439 |
|
1440 switch (jump) { |
|
1441 case JUMP_MAX_UNTIL_2: |
|
1442 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr)); |
|
1443 goto jump_max_until_2; |
|
1444 case JUMP_MAX_UNTIL_3: |
|
1445 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr)); |
|
1446 goto jump_max_until_3; |
|
1447 case JUMP_MIN_UNTIL_2: |
|
1448 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr)); |
|
1449 goto jump_min_until_2; |
|
1450 case JUMP_MIN_UNTIL_3: |
|
1451 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr)); |
|
1452 goto jump_min_until_3; |
|
1453 case JUMP_BRANCH: |
|
1454 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr)); |
|
1455 goto jump_branch; |
|
1456 case JUMP_MAX_UNTIL_1: |
|
1457 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr)); |
|
1458 goto jump_max_until_1; |
|
1459 case JUMP_MIN_UNTIL_1: |
|
1460 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr)); |
|
1461 goto jump_min_until_1; |
|
1462 case JUMP_REPEAT: |
|
1463 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr)); |
|
1464 goto jump_repeat; |
|
1465 case JUMP_REPEAT_ONE_1: |
|
1466 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr)); |
|
1467 goto jump_repeat_one_1; |
|
1468 case JUMP_REPEAT_ONE_2: |
|
1469 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr)); |
|
1470 goto jump_repeat_one_2; |
|
1471 case JUMP_MIN_REPEAT_ONE: |
|
1472 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr)); |
|
1473 goto jump_min_repeat_one; |
|
1474 case JUMP_ASSERT: |
|
1475 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr)); |
|
1476 goto jump_assert; |
|
1477 case JUMP_ASSERT_NOT: |
|
1478 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr)); |
|
1479 goto jump_assert_not; |
|
1480 case JUMP_NONE: |
|
1481 TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret)); |
|
1482 break; |
|
1483 } |
|
1484 |
|
1485 return ret; /* should never get here */ |
|
1486 } |
|
1487 |
|
1488 LOCAL(Py_ssize_t) |
|
1489 SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern) |
|
1490 { |
|
1491 SRE_CHAR* ptr = (SRE_CHAR *)state->start; |
|
1492 SRE_CHAR* end = (SRE_CHAR *)state->end; |
|
1493 Py_ssize_t status = 0; |
|
1494 Py_ssize_t prefix_len = 0; |
|
1495 Py_ssize_t prefix_skip = 0; |
|
1496 SRE_CODE* prefix = NULL; |
|
1497 SRE_CODE* charset = NULL; |
|
1498 SRE_CODE* overlap = NULL; |
|
1499 int flags = 0; |
|
1500 |
|
1501 if (pattern[0] == SRE_OP_INFO) { |
|
1502 /* optimization info block */ |
|
1503 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ |
|
1504 |
|
1505 flags = pattern[2]; |
|
1506 |
|
1507 if (pattern[3] > 1) { |
|
1508 /* adjust end point (but make sure we leave at least one |
|
1509 character in there, so literal search will work) */ |
|
1510 end -= pattern[3]-1; |
|
1511 if (end <= ptr) |
|
1512 end = ptr+1; |
|
1513 } |
|
1514 |
|
1515 if (flags & SRE_INFO_PREFIX) { |
|
1516 /* pattern starts with a known prefix */ |
|
1517 /* <length> <skip> <prefix data> <overlap data> */ |
|
1518 prefix_len = pattern[5]; |
|
1519 prefix_skip = pattern[6]; |
|
1520 prefix = pattern + 7; |
|
1521 overlap = prefix + prefix_len - 1; |
|
1522 } else if (flags & SRE_INFO_CHARSET) |
|
1523 /* pattern starts with a character from a known set */ |
|
1524 /* <charset> */ |
|
1525 charset = pattern + 5; |
|
1526 |
|
1527 pattern += 1 + pattern[1]; |
|
1528 } |
|
1529 |
|
1530 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip)); |
|
1531 TRACE(("charset = %p\n", charset)); |
|
1532 |
|
1533 #if defined(USE_FAST_SEARCH) |
|
1534 if (prefix_len > 1) { |
|
1535 /* pattern starts with a known prefix. use the overlap |
|
1536 table to skip forward as fast as we possibly can */ |
|
1537 Py_ssize_t i = 0; |
|
1538 end = (SRE_CHAR *)state->end; |
|
1539 while (ptr < end) { |
|
1540 for (;;) { |
|
1541 if ((SRE_CODE) ptr[0] != prefix[i]) { |
|
1542 if (!i) |
|
1543 break; |
|
1544 else |
|
1545 i = overlap[i]; |
|
1546 } else { |
|
1547 if (++i == prefix_len) { |
|
1548 /* found a potential match */ |
|
1549 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); |
|
1550 state->start = ptr + 1 - prefix_len; |
|
1551 state->ptr = ptr + 1 - prefix_len + prefix_skip; |
|
1552 if (flags & SRE_INFO_LITERAL) |
|
1553 return 1; /* we got all of it */ |
|
1554 status = SRE_MATCH(state, pattern + 2*prefix_skip); |
|
1555 if (status != 0) |
|
1556 return status; |
|
1557 /* close but no cigar -- try again */ |
|
1558 i = overlap[i]; |
|
1559 } |
|
1560 break; |
|
1561 } |
|
1562 } |
|
1563 ptr++; |
|
1564 } |
|
1565 return 0; |
|
1566 } |
|
1567 #endif |
|
1568 |
|
1569 if (pattern[0] == SRE_OP_LITERAL) { |
|
1570 /* pattern starts with a literal character. this is used |
|
1571 for short prefixes, and if fast search is disabled */ |
|
1572 SRE_CODE chr = pattern[1]; |
|
1573 end = (SRE_CHAR *)state->end; |
|
1574 for (;;) { |
|
1575 while (ptr < end && (SRE_CODE) ptr[0] != chr) |
|
1576 ptr++; |
|
1577 if (ptr >= end) |
|
1578 return 0; |
|
1579 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); |
|
1580 state->start = ptr; |
|
1581 state->ptr = ++ptr; |
|
1582 if (flags & SRE_INFO_LITERAL) |
|
1583 return 1; /* we got all of it */ |
|
1584 status = SRE_MATCH(state, pattern + 2); |
|
1585 if (status != 0) |
|
1586 break; |
|
1587 } |
|
1588 } else if (charset) { |
|
1589 /* pattern starts with a character from a known set */ |
|
1590 end = (SRE_CHAR *)state->end; |
|
1591 for (;;) { |
|
1592 while (ptr < end && !SRE_CHARSET(charset, ptr[0])) |
|
1593 ptr++; |
|
1594 if (ptr >= end) |
|
1595 return 0; |
|
1596 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); |
|
1597 state->start = ptr; |
|
1598 state->ptr = ptr; |
|
1599 status = SRE_MATCH(state, pattern); |
|
1600 if (status != 0) |
|
1601 break; |
|
1602 ptr++; |
|
1603 } |
|
1604 } else |
|
1605 /* general case */ |
|
1606 while (ptr <= end) { |
|
1607 TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); |
|
1608 state->start = state->ptr = ptr++; |
|
1609 status = SRE_MATCH(state, pattern); |
|
1610 if (status != 0) |
|
1611 break; |
|
1612 } |
|
1613 |
|
1614 return status; |
|
1615 } |
|
1616 |
|
1617 LOCAL(int) |
|
1618 SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len) |
|
1619 { |
|
1620 /* check if given string is a literal template (i.e. no escapes) */ |
|
1621 while (len-- > 0) |
|
1622 if (*ptr++ == '\\') |
|
1623 return 0; |
|
1624 return 1; |
|
1625 } |
|
1626 |
|
1627 #if !defined(SRE_RECURSIVE) |
|
1628 |
|
1629 /* -------------------------------------------------------------------- */ |
|
1630 /* factories and destructors */ |
|
1631 |
|
1632 /* see sre.h for object declarations */ |
|
1633 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int); |
|
1634 static PyObject*pattern_scanner(PatternObject*, PyObject*); |
|
1635 |
|
1636 static PyObject * |
|
1637 sre_codesize(PyObject* self, PyObject *unused) |
|
1638 { |
|
1639 return Py_BuildValue("l", sizeof(SRE_CODE)); |
|
1640 } |
|
1641 |
|
1642 static PyObject * |
|
1643 sre_getlower(PyObject* self, PyObject* args) |
|
1644 { |
|
1645 int character, flags; |
|
1646 if (!PyArg_ParseTuple(args, "ii", &character, &flags)) |
|
1647 return NULL; |
|
1648 if (flags & SRE_FLAG_LOCALE) |
|
1649 return Py_BuildValue("i", sre_lower_locale(character)); |
|
1650 if (flags & SRE_FLAG_UNICODE) |
|
1651 #if defined(HAVE_UNICODE) |
|
1652 return Py_BuildValue("i", sre_lower_unicode(character)); |
|
1653 #else |
|
1654 return Py_BuildValue("i", sre_lower_locale(character)); |
|
1655 #endif |
|
1656 return Py_BuildValue("i", sre_lower(character)); |
|
1657 } |
|
1658 |
|
1659 LOCAL(void) |
|
1660 state_reset(SRE_STATE* state) |
|
1661 { |
|
1662 /* FIXME: dynamic! */ |
|
1663 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/ |
|
1664 |
|
1665 state->lastmark = -1; |
|
1666 state->lastindex = -1; |
|
1667 |
|
1668 state->repeat = NULL; |
|
1669 |
|
1670 data_stack_dealloc(state); |
|
1671 } |
|
1672 |
|
1673 static void* |
|
1674 getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize) |
|
1675 { |
|
1676 /* given a python object, return a data pointer, a length (in |
|
1677 characters), and a character size. return NULL if the object |
|
1678 is not a string (or not compatible) */ |
|
1679 |
|
1680 PyBufferProcs *buffer; |
|
1681 Py_ssize_t size, bytes; |
|
1682 int charsize; |
|
1683 void* ptr; |
|
1684 |
|
1685 #if defined(HAVE_UNICODE) |
|
1686 if (PyUnicode_Check(string)) { |
|
1687 /* unicode strings doesn't always support the buffer interface */ |
|
1688 ptr = (void*) PyUnicode_AS_DATA(string); |
|
1689 bytes = PyUnicode_GET_DATA_SIZE(string); |
|
1690 size = PyUnicode_GET_SIZE(string); |
|
1691 charsize = sizeof(Py_UNICODE); |
|
1692 |
|
1693 } else { |
|
1694 #endif |
|
1695 |
|
1696 /* get pointer to string buffer */ |
|
1697 buffer = Py_TYPE(string)->tp_as_buffer; |
|
1698 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount || |
|
1699 buffer->bf_getsegcount(string, NULL) != 1) { |
|
1700 PyErr_SetString(PyExc_TypeError, "expected string or buffer"); |
|
1701 return NULL; |
|
1702 } |
|
1703 |
|
1704 /* determine buffer size */ |
|
1705 bytes = buffer->bf_getreadbuffer(string, 0, &ptr); |
|
1706 if (bytes < 0) { |
|
1707 PyErr_SetString(PyExc_TypeError, "buffer has negative size"); |
|
1708 return NULL; |
|
1709 } |
|
1710 |
|
1711 /* determine character size */ |
|
1712 #if PY_VERSION_HEX >= 0x01060000 |
|
1713 size = PyObject_Size(string); |
|
1714 #else |
|
1715 size = PyObject_Length(string); |
|
1716 #endif |
|
1717 |
|
1718 if (PyString_Check(string) || bytes == size) |
|
1719 charsize = 1; |
|
1720 #if defined(HAVE_UNICODE) |
|
1721 else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE))) |
|
1722 charsize = sizeof(Py_UNICODE); |
|
1723 #endif |
|
1724 else { |
|
1725 PyErr_SetString(PyExc_TypeError, "buffer size mismatch"); |
|
1726 return NULL; |
|
1727 } |
|
1728 |
|
1729 #if defined(HAVE_UNICODE) |
|
1730 } |
|
1731 #endif |
|
1732 |
|
1733 *p_length = size; |
|
1734 *p_charsize = charsize; |
|
1735 |
|
1736 return ptr; |
|
1737 } |
|
1738 |
|
1739 LOCAL(PyObject*) |
|
1740 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, |
|
1741 Py_ssize_t start, Py_ssize_t end) |
|
1742 { |
|
1743 /* prepare state object */ |
|
1744 |
|
1745 Py_ssize_t length; |
|
1746 int charsize; |
|
1747 void* ptr; |
|
1748 |
|
1749 memset(state, 0, sizeof(SRE_STATE)); |
|
1750 |
|
1751 state->lastmark = -1; |
|
1752 state->lastindex = -1; |
|
1753 |
|
1754 ptr = getstring(string, &length, &charsize); |
|
1755 if (!ptr) |
|
1756 return NULL; |
|
1757 |
|
1758 /* adjust boundaries */ |
|
1759 if (start < 0) |
|
1760 start = 0; |
|
1761 else if (start > length) |
|
1762 start = length; |
|
1763 |
|
1764 if (end < 0) |
|
1765 end = 0; |
|
1766 else if (end > length) |
|
1767 end = length; |
|
1768 |
|
1769 state->charsize = charsize; |
|
1770 |
|
1771 state->beginning = ptr; |
|
1772 |
|
1773 state->start = (void*) ((char*) ptr + start * state->charsize); |
|
1774 state->end = (void*) ((char*) ptr + end * state->charsize); |
|
1775 |
|
1776 Py_INCREF(string); |
|
1777 state->string = string; |
|
1778 state->pos = start; |
|
1779 state->endpos = end; |
|
1780 |
|
1781 if (pattern->flags & SRE_FLAG_LOCALE) |
|
1782 state->lower = sre_lower_locale; |
|
1783 else if (pattern->flags & SRE_FLAG_UNICODE) |
|
1784 #if defined(HAVE_UNICODE) |
|
1785 state->lower = sre_lower_unicode; |
|
1786 #else |
|
1787 state->lower = sre_lower_locale; |
|
1788 #endif |
|
1789 else |
|
1790 state->lower = sre_lower; |
|
1791 |
|
1792 return string; |
|
1793 } |
|
1794 |
|
1795 LOCAL(void) |
|
1796 state_fini(SRE_STATE* state) |
|
1797 { |
|
1798 Py_XDECREF(state->string); |
|
1799 data_stack_dealloc(state); |
|
1800 } |
|
1801 |
|
1802 /* calculate offset from start of string */ |
|
1803 #define STATE_OFFSET(state, member)\ |
|
1804 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize) |
|
1805 |
|
1806 LOCAL(PyObject*) |
|
1807 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty) |
|
1808 { |
|
1809 Py_ssize_t i, j; |
|
1810 |
|
1811 index = (index - 1) * 2; |
|
1812 |
|
1813 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) { |
|
1814 if (empty) |
|
1815 /* want empty string */ |
|
1816 i = j = 0; |
|
1817 else { |
|
1818 Py_INCREF(Py_None); |
|
1819 return Py_None; |
|
1820 } |
|
1821 } else { |
|
1822 i = STATE_OFFSET(state, state->mark[index]); |
|
1823 j = STATE_OFFSET(state, state->mark[index+1]); |
|
1824 } |
|
1825 |
|
1826 return PySequence_GetSlice(string, i, j); |
|
1827 } |
|
1828 |
|
1829 static void |
|
1830 pattern_error(int status) |
|
1831 { |
|
1832 switch (status) { |
|
1833 case SRE_ERROR_RECURSION_LIMIT: |
|
1834 PyErr_SetString( |
|
1835 PyExc_RuntimeError, |
|
1836 "maximum recursion limit exceeded" |
|
1837 ); |
|
1838 break; |
|
1839 case SRE_ERROR_MEMORY: |
|
1840 PyErr_NoMemory(); |
|
1841 break; |
|
1842 case SRE_ERROR_INTERRUPTED: |
|
1843 /* An exception has already been raised, so let it fly */ |
|
1844 break; |
|
1845 default: |
|
1846 /* other error codes indicate compiler/engine bugs */ |
|
1847 PyErr_SetString( |
|
1848 PyExc_RuntimeError, |
|
1849 "internal error in regular expression engine" |
|
1850 ); |
|
1851 } |
|
1852 } |
|
1853 |
|
1854 static void |
|
1855 pattern_dealloc(PatternObject* self) |
|
1856 { |
|
1857 if (self->weakreflist != NULL) |
|
1858 PyObject_ClearWeakRefs((PyObject *) self); |
|
1859 Py_XDECREF(self->pattern); |
|
1860 Py_XDECREF(self->groupindex); |
|
1861 Py_XDECREF(self->indexgroup); |
|
1862 PyObject_DEL(self); |
|
1863 } |
|
1864 |
|
1865 static PyObject* |
|
1866 pattern_match(PatternObject* self, PyObject* args, PyObject* kw) |
|
1867 { |
|
1868 SRE_STATE state; |
|
1869 int status; |
|
1870 |
|
1871 PyObject* string; |
|
1872 Py_ssize_t start = 0; |
|
1873 Py_ssize_t end = PY_SSIZE_T_MAX; |
|
1874 static char* kwlist[] = { "pattern", "pos", "endpos", NULL }; |
|
1875 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist, |
|
1876 &string, &start, &end)) |
|
1877 return NULL; |
|
1878 |
|
1879 string = state_init(&state, self, string, start, end); |
|
1880 if (!string) |
|
1881 return NULL; |
|
1882 |
|
1883 state.ptr = state.start; |
|
1884 |
|
1885 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr)); |
|
1886 |
|
1887 if (state.charsize == 1) { |
|
1888 status = sre_match(&state, PatternObject_GetCode(self)); |
|
1889 } else { |
|
1890 #if defined(HAVE_UNICODE) |
|
1891 status = sre_umatch(&state, PatternObject_GetCode(self)); |
|
1892 #endif |
|
1893 } |
|
1894 |
|
1895 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); |
|
1896 if (PyErr_Occurred()) |
|
1897 return NULL; |
|
1898 |
|
1899 state_fini(&state); |
|
1900 |
|
1901 return pattern_new_match(self, &state, status); |
|
1902 } |
|
1903 |
|
1904 static PyObject* |
|
1905 pattern_search(PatternObject* self, PyObject* args, PyObject* kw) |
|
1906 { |
|
1907 SRE_STATE state; |
|
1908 int status; |
|
1909 |
|
1910 PyObject* string; |
|
1911 Py_ssize_t start = 0; |
|
1912 Py_ssize_t end = PY_SSIZE_T_MAX; |
|
1913 static char* kwlist[] = { "pattern", "pos", "endpos", NULL }; |
|
1914 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist, |
|
1915 &string, &start, &end)) |
|
1916 return NULL; |
|
1917 |
|
1918 string = state_init(&state, self, string, start, end); |
|
1919 if (!string) |
|
1920 return NULL; |
|
1921 |
|
1922 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr)); |
|
1923 |
|
1924 if (state.charsize == 1) { |
|
1925 status = sre_search(&state, PatternObject_GetCode(self)); |
|
1926 } else { |
|
1927 #if defined(HAVE_UNICODE) |
|
1928 status = sre_usearch(&state, PatternObject_GetCode(self)); |
|
1929 #endif |
|
1930 } |
|
1931 |
|
1932 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); |
|
1933 |
|
1934 state_fini(&state); |
|
1935 |
|
1936 if (PyErr_Occurred()) |
|
1937 return NULL; |
|
1938 |
|
1939 return pattern_new_match(self, &state, status); |
|
1940 } |
|
1941 |
|
1942 static PyObject* |
|
1943 call(char* module, char* function, PyObject* args) |
|
1944 { |
|
1945 PyObject* name; |
|
1946 PyObject* mod; |
|
1947 PyObject* func; |
|
1948 PyObject* result; |
|
1949 |
|
1950 if (!args) |
|
1951 return NULL; |
|
1952 name = PyString_FromString(module); |
|
1953 if (!name) |
|
1954 return NULL; |
|
1955 mod = PyImport_Import(name); |
|
1956 Py_DECREF(name); |
|
1957 if (!mod) |
|
1958 return NULL; |
|
1959 func = PyObject_GetAttrString(mod, function); |
|
1960 Py_DECREF(mod); |
|
1961 if (!func) |
|
1962 return NULL; |
|
1963 result = PyObject_CallObject(func, args); |
|
1964 Py_DECREF(func); |
|
1965 Py_DECREF(args); |
|
1966 return result; |
|
1967 } |
|
1968 |
|
1969 #ifdef USE_BUILTIN_COPY |
|
1970 static int |
|
1971 deepcopy(PyObject** object, PyObject* memo) |
|
1972 { |
|
1973 PyObject* copy; |
|
1974 |
|
1975 copy = call( |
|
1976 "copy", "deepcopy", |
|
1977 PyTuple_Pack(2, *object, memo) |
|
1978 ); |
|
1979 if (!copy) |
|
1980 return 0; |
|
1981 |
|
1982 Py_DECREF(*object); |
|
1983 *object = copy; |
|
1984 |
|
1985 return 1; /* success */ |
|
1986 } |
|
1987 #endif |
|
1988 |
|
1989 static PyObject* |
|
1990 join_list(PyObject* list, PyObject* string) |
|
1991 { |
|
1992 /* join list elements */ |
|
1993 |
|
1994 PyObject* joiner; |
|
1995 #if PY_VERSION_HEX >= 0x01060000 |
|
1996 PyObject* function; |
|
1997 PyObject* args; |
|
1998 #endif |
|
1999 PyObject* result; |
|
2000 |
|
2001 joiner = PySequence_GetSlice(string, 0, 0); |
|
2002 if (!joiner) |
|
2003 return NULL; |
|
2004 |
|
2005 if (PyList_GET_SIZE(list) == 0) { |
|
2006 Py_DECREF(list); |
|
2007 return joiner; |
|
2008 } |
|
2009 |
|
2010 #if PY_VERSION_HEX >= 0x01060000 |
|
2011 function = PyObject_GetAttrString(joiner, "join"); |
|
2012 if (!function) { |
|
2013 Py_DECREF(joiner); |
|
2014 return NULL; |
|
2015 } |
|
2016 args = PyTuple_New(1); |
|
2017 if (!args) { |
|
2018 Py_DECREF(function); |
|
2019 Py_DECREF(joiner); |
|
2020 return NULL; |
|
2021 } |
|
2022 PyTuple_SET_ITEM(args, 0, list); |
|
2023 result = PyObject_CallObject(function, args); |
|
2024 Py_DECREF(args); /* also removes list */ |
|
2025 Py_DECREF(function); |
|
2026 #else |
|
2027 result = call( |
|
2028 "string", "join", |
|
2029 PyTuple_Pack(2, list, joiner) |
|
2030 ); |
|
2031 #endif |
|
2032 Py_DECREF(joiner); |
|
2033 |
|
2034 return result; |
|
2035 } |
|
2036 |
|
2037 static PyObject* |
|
2038 pattern_findall(PatternObject* self, PyObject* args, PyObject* kw) |
|
2039 { |
|
2040 SRE_STATE state; |
|
2041 PyObject* list; |
|
2042 int status; |
|
2043 Py_ssize_t i, b, e; |
|
2044 |
|
2045 PyObject* string; |
|
2046 Py_ssize_t start = 0; |
|
2047 Py_ssize_t end = PY_SSIZE_T_MAX; |
|
2048 static char* kwlist[] = { "source", "pos", "endpos", NULL }; |
|
2049 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist, |
|
2050 &string, &start, &end)) |
|
2051 return NULL; |
|
2052 |
|
2053 string = state_init(&state, self, string, start, end); |
|
2054 if (!string) |
|
2055 return NULL; |
|
2056 |
|
2057 list = PyList_New(0); |
|
2058 if (!list) { |
|
2059 state_fini(&state); |
|
2060 return NULL; |
|
2061 } |
|
2062 |
|
2063 while (state.start <= state.end) { |
|
2064 |
|
2065 PyObject* item; |
|
2066 |
|
2067 state_reset(&state); |
|
2068 |
|
2069 state.ptr = state.start; |
|
2070 |
|
2071 if (state.charsize == 1) { |
|
2072 status = sre_search(&state, PatternObject_GetCode(self)); |
|
2073 } else { |
|
2074 #if defined(HAVE_UNICODE) |
|
2075 status = sre_usearch(&state, PatternObject_GetCode(self)); |
|
2076 #endif |
|
2077 } |
|
2078 |
|
2079 if (PyErr_Occurred()) |
|
2080 goto error; |
|
2081 |
|
2082 if (status <= 0) { |
|
2083 if (status == 0) |
|
2084 break; |
|
2085 pattern_error(status); |
|
2086 goto error; |
|
2087 } |
|
2088 |
|
2089 /* don't bother to build a match object */ |
|
2090 switch (self->groups) { |
|
2091 case 0: |
|
2092 b = STATE_OFFSET(&state, state.start); |
|
2093 e = STATE_OFFSET(&state, state.ptr); |
|
2094 item = PySequence_GetSlice(string, b, e); |
|
2095 if (!item) |
|
2096 goto error; |
|
2097 break; |
|
2098 case 1: |
|
2099 item = state_getslice(&state, 1, string, 1); |
|
2100 if (!item) |
|
2101 goto error; |
|
2102 break; |
|
2103 default: |
|
2104 item = PyTuple_New(self->groups); |
|
2105 if (!item) |
|
2106 goto error; |
|
2107 for (i = 0; i < self->groups; i++) { |
|
2108 PyObject* o = state_getslice(&state, i+1, string, 1); |
|
2109 if (!o) { |
|
2110 Py_DECREF(item); |
|
2111 goto error; |
|
2112 } |
|
2113 PyTuple_SET_ITEM(item, i, o); |
|
2114 } |
|
2115 break; |
|
2116 } |
|
2117 |
|
2118 status = PyList_Append(list, item); |
|
2119 Py_DECREF(item); |
|
2120 if (status < 0) |
|
2121 goto error; |
|
2122 |
|
2123 if (state.ptr == state.start) |
|
2124 state.start = (void*) ((char*) state.ptr + state.charsize); |
|
2125 else |
|
2126 state.start = state.ptr; |
|
2127 |
|
2128 } |
|
2129 |
|
2130 state_fini(&state); |
|
2131 return list; |
|
2132 |
|
2133 error: |
|
2134 Py_DECREF(list); |
|
2135 state_fini(&state); |
|
2136 return NULL; |
|
2137 |
|
2138 } |
|
2139 |
|
2140 #if PY_VERSION_HEX >= 0x02020000 |
|
2141 static PyObject* |
|
2142 pattern_finditer(PatternObject* pattern, PyObject* args) |
|
2143 { |
|
2144 PyObject* scanner; |
|
2145 PyObject* search; |
|
2146 PyObject* iterator; |
|
2147 |
|
2148 scanner = pattern_scanner(pattern, args); |
|
2149 if (!scanner) |
|
2150 return NULL; |
|
2151 |
|
2152 search = PyObject_GetAttrString(scanner, "search"); |
|
2153 Py_DECREF(scanner); |
|
2154 if (!search) |
|
2155 return NULL; |
|
2156 |
|
2157 iterator = PyCallIter_New(search, Py_None); |
|
2158 Py_DECREF(search); |
|
2159 |
|
2160 return iterator; |
|
2161 } |
|
2162 #endif |
|
2163 |
|
2164 static PyObject* |
|
2165 pattern_split(PatternObject* self, PyObject* args, PyObject* kw) |
|
2166 { |
|
2167 SRE_STATE state; |
|
2168 PyObject* list; |
|
2169 PyObject* item; |
|
2170 int status; |
|
2171 Py_ssize_t n; |
|
2172 Py_ssize_t i; |
|
2173 void* last; |
|
2174 |
|
2175 PyObject* string; |
|
2176 Py_ssize_t maxsplit = 0; |
|
2177 static char* kwlist[] = { "source", "maxsplit", NULL }; |
|
2178 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist, |
|
2179 &string, &maxsplit)) |
|
2180 return NULL; |
|
2181 |
|
2182 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX); |
|
2183 if (!string) |
|
2184 return NULL; |
|
2185 |
|
2186 list = PyList_New(0); |
|
2187 if (!list) { |
|
2188 state_fini(&state); |
|
2189 return NULL; |
|
2190 } |
|
2191 |
|
2192 n = 0; |
|
2193 last = state.start; |
|
2194 |
|
2195 while (!maxsplit || n < maxsplit) { |
|
2196 |
|
2197 state_reset(&state); |
|
2198 |
|
2199 state.ptr = state.start; |
|
2200 |
|
2201 if (state.charsize == 1) { |
|
2202 status = sre_search(&state, PatternObject_GetCode(self)); |
|
2203 } else { |
|
2204 #if defined(HAVE_UNICODE) |
|
2205 status = sre_usearch(&state, PatternObject_GetCode(self)); |
|
2206 #endif |
|
2207 } |
|
2208 |
|
2209 if (PyErr_Occurred()) |
|
2210 goto error; |
|
2211 |
|
2212 if (status <= 0) { |
|
2213 if (status == 0) |
|
2214 break; |
|
2215 pattern_error(status); |
|
2216 goto error; |
|
2217 } |
|
2218 |
|
2219 if (state.start == state.ptr) { |
|
2220 if (last == state.end) |
|
2221 break; |
|
2222 /* skip one character */ |
|
2223 state.start = (void*) ((char*) state.ptr + state.charsize); |
|
2224 continue; |
|
2225 } |
|
2226 |
|
2227 /* get segment before this match */ |
|
2228 item = PySequence_GetSlice( |
|
2229 string, STATE_OFFSET(&state, last), |
|
2230 STATE_OFFSET(&state, state.start) |
|
2231 ); |
|
2232 if (!item) |
|
2233 goto error; |
|
2234 status = PyList_Append(list, item); |
|
2235 Py_DECREF(item); |
|
2236 if (status < 0) |
|
2237 goto error; |
|
2238 |
|
2239 /* add groups (if any) */ |
|
2240 for (i = 0; i < self->groups; i++) { |
|
2241 item = state_getslice(&state, i+1, string, 0); |
|
2242 if (!item) |
|
2243 goto error; |
|
2244 status = PyList_Append(list, item); |
|
2245 Py_DECREF(item); |
|
2246 if (status < 0) |
|
2247 goto error; |
|
2248 } |
|
2249 |
|
2250 n = n + 1; |
|
2251 |
|
2252 last = state.start = state.ptr; |
|
2253 |
|
2254 } |
|
2255 |
|
2256 /* get segment following last match (even if empty) */ |
|
2257 item = PySequence_GetSlice( |
|
2258 string, STATE_OFFSET(&state, last), state.endpos |
|
2259 ); |
|
2260 if (!item) |
|
2261 goto error; |
|
2262 status = PyList_Append(list, item); |
|
2263 Py_DECREF(item); |
|
2264 if (status < 0) |
|
2265 goto error; |
|
2266 |
|
2267 state_fini(&state); |
|
2268 return list; |
|
2269 |
|
2270 error: |
|
2271 Py_DECREF(list); |
|
2272 state_fini(&state); |
|
2273 return NULL; |
|
2274 |
|
2275 } |
|
2276 |
|
2277 static PyObject* |
|
2278 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, |
|
2279 Py_ssize_t count, Py_ssize_t subn) |
|
2280 { |
|
2281 SRE_STATE state; |
|
2282 PyObject* list; |
|
2283 PyObject* item; |
|
2284 PyObject* filter; |
|
2285 PyObject* args; |
|
2286 PyObject* match; |
|
2287 void* ptr; |
|
2288 int status; |
|
2289 Py_ssize_t n; |
|
2290 Py_ssize_t i, b, e; |
|
2291 int bint; |
|
2292 int filter_is_callable; |
|
2293 |
|
2294 if (PyCallable_Check(ptemplate)) { |
|
2295 /* sub/subn takes either a function or a template */ |
|
2296 filter = ptemplate; |
|
2297 Py_INCREF(filter); |
|
2298 filter_is_callable = 1; |
|
2299 } else { |
|
2300 /* if not callable, check if it's a literal string */ |
|
2301 int literal; |
|
2302 ptr = getstring(ptemplate, &n, &bint); |
|
2303 b = bint; |
|
2304 if (ptr) { |
|
2305 if (b == 1) { |
|
2306 literal = sre_literal_template((unsigned char *)ptr, n); |
|
2307 } else { |
|
2308 #if defined(HAVE_UNICODE) |
|
2309 literal = sre_uliteral_template((Py_UNICODE *)ptr, n); |
|
2310 #endif |
|
2311 } |
|
2312 } else { |
|
2313 PyErr_Clear(); |
|
2314 literal = 0; |
|
2315 } |
|
2316 if (literal) { |
|
2317 filter = ptemplate; |
|
2318 Py_INCREF(filter); |
|
2319 filter_is_callable = 0; |
|
2320 } else { |
|
2321 /* not a literal; hand it over to the template compiler */ |
|
2322 filter = call( |
|
2323 SRE_PY_MODULE, "_subx", |
|
2324 PyTuple_Pack(2, self, ptemplate) |
|
2325 ); |
|
2326 if (!filter) |
|
2327 return NULL; |
|
2328 filter_is_callable = PyCallable_Check(filter); |
|
2329 } |
|
2330 } |
|
2331 |
|
2332 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX); |
|
2333 if (!string) { |
|
2334 Py_DECREF(filter); |
|
2335 return NULL; |
|
2336 } |
|
2337 |
|
2338 list = PyList_New(0); |
|
2339 if (!list) { |
|
2340 Py_DECREF(filter); |
|
2341 state_fini(&state); |
|
2342 return NULL; |
|
2343 } |
|
2344 |
|
2345 n = i = 0; |
|
2346 |
|
2347 while (!count || n < count) { |
|
2348 |
|
2349 state_reset(&state); |
|
2350 |
|
2351 state.ptr = state.start; |
|
2352 |
|
2353 if (state.charsize == 1) { |
|
2354 status = sre_search(&state, PatternObject_GetCode(self)); |
|
2355 } else { |
|
2356 #if defined(HAVE_UNICODE) |
|
2357 status = sre_usearch(&state, PatternObject_GetCode(self)); |
|
2358 #endif |
|
2359 } |
|
2360 |
|
2361 if (PyErr_Occurred()) |
|
2362 goto error; |
|
2363 |
|
2364 if (status <= 0) { |
|
2365 if (status == 0) |
|
2366 break; |
|
2367 pattern_error(status); |
|
2368 goto error; |
|
2369 } |
|
2370 |
|
2371 b = STATE_OFFSET(&state, state.start); |
|
2372 e = STATE_OFFSET(&state, state.ptr); |
|
2373 |
|
2374 if (i < b) { |
|
2375 /* get segment before this match */ |
|
2376 item = PySequence_GetSlice(string, i, b); |
|
2377 if (!item) |
|
2378 goto error; |
|
2379 status = PyList_Append(list, item); |
|
2380 Py_DECREF(item); |
|
2381 if (status < 0) |
|
2382 goto error; |
|
2383 |
|
2384 } else if (i == b && i == e && n > 0) |
|
2385 /* ignore empty match on latest position */ |
|
2386 goto next; |
|
2387 |
|
2388 if (filter_is_callable) { |
|
2389 /* pass match object through filter */ |
|
2390 match = pattern_new_match(self, &state, 1); |
|
2391 if (!match) |
|
2392 goto error; |
|
2393 args = PyTuple_Pack(1, match); |
|
2394 if (!args) { |
|
2395 Py_DECREF(match); |
|
2396 goto error; |
|
2397 } |
|
2398 item = PyObject_CallObject(filter, args); |
|
2399 Py_DECREF(args); |
|
2400 Py_DECREF(match); |
|
2401 if (!item) |
|
2402 goto error; |
|
2403 } else { |
|
2404 /* filter is literal string */ |
|
2405 item = filter; |
|
2406 Py_INCREF(item); |
|
2407 } |
|
2408 |
|
2409 /* add to list */ |
|
2410 if (item != Py_None) { |
|
2411 status = PyList_Append(list, item); |
|
2412 Py_DECREF(item); |
|
2413 if (status < 0) |
|
2414 goto error; |
|
2415 } |
|
2416 |
|
2417 i = e; |
|
2418 n = n + 1; |
|
2419 |
|
2420 next: |
|
2421 /* move on */ |
|
2422 if (state.ptr == state.start) |
|
2423 state.start = (void*) ((char*) state.ptr + state.charsize); |
|
2424 else |
|
2425 state.start = state.ptr; |
|
2426 |
|
2427 } |
|
2428 |
|
2429 /* get segment following last match */ |
|
2430 if (i < state.endpos) { |
|
2431 item = PySequence_GetSlice(string, i, state.endpos); |
|
2432 if (!item) |
|
2433 goto error; |
|
2434 status = PyList_Append(list, item); |
|
2435 Py_DECREF(item); |
|
2436 if (status < 0) |
|
2437 goto error; |
|
2438 } |
|
2439 |
|
2440 state_fini(&state); |
|
2441 |
|
2442 Py_DECREF(filter); |
|
2443 |
|
2444 /* convert list to single string (also removes list) */ |
|
2445 item = join_list(list, string); |
|
2446 |
|
2447 if (!item) |
|
2448 return NULL; |
|
2449 |
|
2450 if (subn) |
|
2451 return Py_BuildValue("Ni", item, n); |
|
2452 |
|
2453 return item; |
|
2454 |
|
2455 error: |
|
2456 Py_DECREF(list); |
|
2457 state_fini(&state); |
|
2458 Py_DECREF(filter); |
|
2459 return NULL; |
|
2460 |
|
2461 } |
|
2462 |
|
2463 static PyObject* |
|
2464 pattern_sub(PatternObject* self, PyObject* args, PyObject* kw) |
|
2465 { |
|
2466 PyObject* ptemplate; |
|
2467 PyObject* string; |
|
2468 Py_ssize_t count = 0; |
|
2469 static char* kwlist[] = { "repl", "string", "count", NULL }; |
|
2470 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist, |
|
2471 &ptemplate, &string, &count)) |
|
2472 return NULL; |
|
2473 |
|
2474 return pattern_subx(self, ptemplate, string, count, 0); |
|
2475 } |
|
2476 |
|
2477 static PyObject* |
|
2478 pattern_subn(PatternObject* self, PyObject* args, PyObject* kw) |
|
2479 { |
|
2480 PyObject* ptemplate; |
|
2481 PyObject* string; |
|
2482 Py_ssize_t count = 0; |
|
2483 static char* kwlist[] = { "repl", "string", "count", NULL }; |
|
2484 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist, |
|
2485 &ptemplate, &string, &count)) |
|
2486 return NULL; |
|
2487 |
|
2488 return pattern_subx(self, ptemplate, string, count, 1); |
|
2489 } |
|
2490 |
|
2491 static PyObject* |
|
2492 pattern_copy(PatternObject* self, PyObject *unused) |
|
2493 { |
|
2494 #ifdef USE_BUILTIN_COPY |
|
2495 PatternObject* copy; |
|
2496 int offset; |
|
2497 |
|
2498 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize); |
|
2499 if (!copy) |
|
2500 return NULL; |
|
2501 |
|
2502 offset = offsetof(PatternObject, groups); |
|
2503 |
|
2504 Py_XINCREF(self->groupindex); |
|
2505 Py_XINCREF(self->indexgroup); |
|
2506 Py_XINCREF(self->pattern); |
|
2507 |
|
2508 memcpy((char*) copy + offset, (char*) self + offset, |
|
2509 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset); |
|
2510 copy->weakreflist = NULL; |
|
2511 |
|
2512 return (PyObject*) copy; |
|
2513 #else |
|
2514 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object"); |
|
2515 return NULL; |
|
2516 #endif |
|
2517 } |
|
2518 |
|
2519 static PyObject* |
|
2520 pattern_deepcopy(PatternObject* self, PyObject* memo) |
|
2521 { |
|
2522 #ifdef USE_BUILTIN_COPY |
|
2523 PatternObject* copy; |
|
2524 |
|
2525 copy = (PatternObject*) pattern_copy(self); |
|
2526 if (!copy) |
|
2527 return NULL; |
|
2528 |
|
2529 if (!deepcopy(©->groupindex, memo) || |
|
2530 !deepcopy(©->indexgroup, memo) || |
|
2531 !deepcopy(©->pattern, memo)) { |
|
2532 Py_DECREF(copy); |
|
2533 return NULL; |
|
2534 } |
|
2535 |
|
2536 #else |
|
2537 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object"); |
|
2538 return NULL; |
|
2539 #endif |
|
2540 } |
|
2541 |
|
2542 PyDoc_STRVAR(pattern_match_doc, |
|
2543 "match(string[, pos[, endpos]]) --> match object or None.\n\ |
|
2544 Matches zero or more characters at the beginning of the string"); |
|
2545 |
|
2546 PyDoc_STRVAR(pattern_search_doc, |
|
2547 "search(string[, pos[, endpos]]) --> match object or None.\n\ |
|
2548 Scan through string looking for a match, and return a corresponding\n\ |
|
2549 MatchObject instance. Return None if no position in the string matches."); |
|
2550 |
|
2551 PyDoc_STRVAR(pattern_split_doc, |
|
2552 "split(string[, maxsplit = 0]) --> list.\n\ |
|
2553 Split string by the occurrences of pattern."); |
|
2554 |
|
2555 PyDoc_STRVAR(pattern_findall_doc, |
|
2556 "findall(string[, pos[, endpos]]) --> list.\n\ |
|
2557 Return a list of all non-overlapping matches of pattern in string."); |
|
2558 |
|
2559 PyDoc_STRVAR(pattern_finditer_doc, |
|
2560 "finditer(string[, pos[, endpos]]) --> iterator.\n\ |
|
2561 Return an iterator over all non-overlapping matches for the \n\ |
|
2562 RE pattern in string. For each match, the iterator returns a\n\ |
|
2563 match object."); |
|
2564 |
|
2565 PyDoc_STRVAR(pattern_sub_doc, |
|
2566 "sub(repl, string[, count = 0]) --> newstring\n\ |
|
2567 Return the string obtained by replacing the leftmost non-overlapping\n\ |
|
2568 occurrences of pattern in string by the replacement repl."); |
|
2569 |
|
2570 PyDoc_STRVAR(pattern_subn_doc, |
|
2571 "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\ |
|
2572 Return the tuple (new_string, number_of_subs_made) found by replacing\n\ |
|
2573 the leftmost non-overlapping occurrences of pattern with the\n\ |
|
2574 replacement repl."); |
|
2575 |
|
2576 PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects"); |
|
2577 |
|
2578 static PyMethodDef pattern_methods[] = { |
|
2579 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS, |
|
2580 pattern_match_doc}, |
|
2581 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS, |
|
2582 pattern_search_doc}, |
|
2583 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS, |
|
2584 pattern_sub_doc}, |
|
2585 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS, |
|
2586 pattern_subn_doc}, |
|
2587 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS, |
|
2588 pattern_split_doc}, |
|
2589 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS, |
|
2590 pattern_findall_doc}, |
|
2591 #if PY_VERSION_HEX >= 0x02020000 |
|
2592 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS, |
|
2593 pattern_finditer_doc}, |
|
2594 #endif |
|
2595 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS}, |
|
2596 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS}, |
|
2597 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O}, |
|
2598 {NULL, NULL} |
|
2599 }; |
|
2600 |
|
2601 static PyObject* |
|
2602 pattern_getattr(PatternObject* self, char* name) |
|
2603 { |
|
2604 PyObject* res; |
|
2605 |
|
2606 res = Py_FindMethod(pattern_methods, (PyObject*) self, name); |
|
2607 |
|
2608 if (res) |
|
2609 return res; |
|
2610 |
|
2611 PyErr_Clear(); |
|
2612 |
|
2613 /* attributes */ |
|
2614 if (!strcmp(name, "pattern")) { |
|
2615 Py_INCREF(self->pattern); |
|
2616 return self->pattern; |
|
2617 } |
|
2618 |
|
2619 if (!strcmp(name, "flags")) |
|
2620 return Py_BuildValue("i", self->flags); |
|
2621 |
|
2622 if (!strcmp(name, "groups")) |
|
2623 return Py_BuildValue("i", self->groups); |
|
2624 |
|
2625 if (!strcmp(name, "groupindex") && self->groupindex) { |
|
2626 Py_INCREF(self->groupindex); |
|
2627 return self->groupindex; |
|
2628 } |
|
2629 |
|
2630 PyErr_SetString(PyExc_AttributeError, name); |
|
2631 return NULL; |
|
2632 } |
|
2633 |
|
2634 statichere PyTypeObject Pattern_Type = { |
|
2635 PyObject_HEAD_INIT(NULL) |
|
2636 0, "_" SRE_MODULE ".SRE_Pattern", |
|
2637 sizeof(PatternObject), sizeof(SRE_CODE), |
|
2638 (destructor)pattern_dealloc, /*tp_dealloc*/ |
|
2639 0, /*tp_print*/ |
|
2640 (getattrfunc)pattern_getattr, /*tp_getattr*/ |
|
2641 0, /* tp_setattr */ |
|
2642 0, /* tp_compare */ |
|
2643 0, /* tp_repr */ |
|
2644 0, /* tp_as_number */ |
|
2645 0, /* tp_as_sequence */ |
|
2646 0, /* tp_as_mapping */ |
|
2647 0, /* tp_hash */ |
|
2648 0, /* tp_call */ |
|
2649 0, /* tp_str */ |
|
2650 0, /* tp_getattro */ |
|
2651 0, /* tp_setattro */ |
|
2652 0, /* tp_as_buffer */ |
|
2653 Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */ |
|
2654 pattern_doc, /* tp_doc */ |
|
2655 0, /* tp_traverse */ |
|
2656 0, /* tp_clear */ |
|
2657 0, /* tp_richcompare */ |
|
2658 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */ |
|
2659 }; |
|
2660 |
|
2661 static int _validate(PatternObject *self); /* Forward */ |
|
2662 |
|
2663 static PyObject * |
|
2664 _compile(PyObject* self_, PyObject* args) |
|
2665 { |
|
2666 /* "compile" pattern descriptor to pattern object */ |
|
2667 |
|
2668 PatternObject* self; |
|
2669 Py_ssize_t i, n; |
|
2670 |
|
2671 PyObject* pattern; |
|
2672 int flags = 0; |
|
2673 PyObject* code; |
|
2674 Py_ssize_t groups = 0; |
|
2675 PyObject* groupindex = NULL; |
|
2676 PyObject* indexgroup = NULL; |
|
2677 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags, |
|
2678 &PyList_Type, &code, &groups, |
|
2679 &groupindex, &indexgroup)) |
|
2680 return NULL; |
|
2681 |
|
2682 n = PyList_GET_SIZE(code); |
|
2683 /* coverity[ampersand_in_size] */ |
|
2684 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n); |
|
2685 if (!self) |
|
2686 return NULL; |
|
2687 |
|
2688 self->codesize = n; |
|
2689 |
|
2690 for (i = 0; i < n; i++) { |
|
2691 PyObject *o = PyList_GET_ITEM(code, i); |
|
2692 unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o) |
|
2693 : PyLong_AsUnsignedLong(o); |
|
2694 self->code[i] = (SRE_CODE) value; |
|
2695 if ((unsigned long) self->code[i] != value) { |
|
2696 PyErr_SetString(PyExc_OverflowError, |
|
2697 "regular expression code size limit exceeded"); |
|
2698 break; |
|
2699 } |
|
2700 } |
|
2701 |
|
2702 if (PyErr_Occurred()) { |
|
2703 PyObject_DEL(self); |
|
2704 return NULL; |
|
2705 } |
|
2706 |
|
2707 Py_INCREF(pattern); |
|
2708 self->pattern = pattern; |
|
2709 |
|
2710 self->flags = flags; |
|
2711 |
|
2712 self->groups = groups; |
|
2713 |
|
2714 Py_XINCREF(groupindex); |
|
2715 self->groupindex = groupindex; |
|
2716 |
|
2717 Py_XINCREF(indexgroup); |
|
2718 self->indexgroup = indexgroup; |
|
2719 |
|
2720 self->weakreflist = NULL; |
|
2721 |
|
2722 if (!_validate(self)) { |
|
2723 Py_DECREF(self); |
|
2724 return NULL; |
|
2725 } |
|
2726 |
|
2727 return (PyObject*) self; |
|
2728 } |
|
2729 |
|
2730 /* -------------------------------------------------------------------- */ |
|
2731 /* Code validation */ |
|
2732 |
|
2733 /* To learn more about this code, have a look at the _compile() function in |
|
2734 Lib/sre_compile.py. The validation functions below checks the code array |
|
2735 for conformance with the code patterns generated there. |
|
2736 |
|
2737 The nice thing about the generated code is that it is position-independent: |
|
2738 all jumps are relative jumps forward. Also, jumps don't cross each other: |
|
2739 the target of a later jump is always earlier than the target of an earlier |
|
2740 jump. IOW, this is okay: |
|
2741 |
|
2742 J---------J-------T--------T |
|
2743 \ \_____/ / |
|
2744 \______________________/ |
|
2745 |
|
2746 but this is not: |
|
2747 |
|
2748 J---------J-------T--------T |
|
2749 \_________\_____/ / |
|
2750 \____________/ |
|
2751 |
|
2752 It also helps that SRE_CODE is always an unsigned type, either 2 bytes or 4 |
|
2753 bytes wide (the latter if Python is compiled for "wide" unicode support). |
|
2754 */ |
|
2755 |
|
2756 /* Defining this one enables tracing of the validator */ |
|
2757 #undef VVERBOSE |
|
2758 |
|
2759 /* Trace macro for the validator */ |
|
2760 #if defined(VVERBOSE) |
|
2761 #define VTRACE(v) printf v |
|
2762 #else |
|
2763 #define VTRACE(v) |
|
2764 #endif |
|
2765 |
|
2766 /* Report failure */ |
|
2767 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0) |
|
2768 |
|
2769 /* Extract opcode, argument, or skip count from code array */ |
|
2770 #define GET_OP \ |
|
2771 do { \ |
|
2772 VTRACE(("%p: ", code)); \ |
|
2773 if (code >= end) FAIL; \ |
|
2774 op = *code++; \ |
|
2775 VTRACE(("%lu (op)\n", (unsigned long)op)); \ |
|
2776 } while (0) |
|
2777 #define GET_ARG \ |
|
2778 do { \ |
|
2779 VTRACE(("%p= ", code)); \ |
|
2780 if (code >= end) FAIL; \ |
|
2781 arg = *code++; \ |
|
2782 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \ |
|
2783 } while (0) |
|
2784 #define GET_SKIP_ADJ(adj) \ |
|
2785 do { \ |
|
2786 VTRACE(("%p= ", code)); \ |
|
2787 if (code >= end) FAIL; \ |
|
2788 skip = *code; \ |
|
2789 VTRACE(("%lu (skip to %p)\n", \ |
|
2790 (unsigned long)skip, code+skip)); \ |
|
2791 if (code+skip-adj < code || code+skip-adj > end)\ |
|
2792 FAIL; \ |
|
2793 code++; \ |
|
2794 } while (0) |
|
2795 #define GET_SKIP GET_SKIP_ADJ(0) |
|
2796 |
|
2797 static int |
|
2798 _validate_charset(SRE_CODE *code, SRE_CODE *end) |
|
2799 { |
|
2800 /* Some variables are manipulated by the macros above */ |
|
2801 SRE_CODE op; |
|
2802 SRE_CODE arg; |
|
2803 SRE_CODE offset; |
|
2804 int i; |
|
2805 |
|
2806 while (code < end) { |
|
2807 GET_OP; |
|
2808 switch (op) { |
|
2809 |
|
2810 case SRE_OP_NEGATE: |
|
2811 break; |
|
2812 |
|
2813 case SRE_OP_LITERAL: |
|
2814 GET_ARG; |
|
2815 break; |
|
2816 |
|
2817 case SRE_OP_RANGE: |
|
2818 GET_ARG; |
|
2819 GET_ARG; |
|
2820 break; |
|
2821 |
|
2822 case SRE_OP_CHARSET: |
|
2823 offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */ |
|
2824 if (code+offset < code || code+offset > end) |
|
2825 FAIL; |
|
2826 code += offset; |
|
2827 break; |
|
2828 |
|
2829 case SRE_OP_BIGCHARSET: |
|
2830 GET_ARG; /* Number of blocks */ |
|
2831 offset = 256/sizeof(SRE_CODE); /* 256-byte table */ |
|
2832 if (code+offset < code || code+offset > end) |
|
2833 FAIL; |
|
2834 /* Make sure that each byte points to a valid block */ |
|
2835 for (i = 0; i < 256; i++) { |
|
2836 if (((unsigned char *)code)[i] >= arg) |
|
2837 FAIL; |
|
2838 } |
|
2839 code += offset; |
|
2840 offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */ |
|
2841 if (code+offset < code || code+offset > end) |
|
2842 FAIL; |
|
2843 code += offset; |
|
2844 break; |
|
2845 |
|
2846 case SRE_OP_CATEGORY: |
|
2847 GET_ARG; |
|
2848 switch (arg) { |
|
2849 case SRE_CATEGORY_DIGIT: |
|
2850 case SRE_CATEGORY_NOT_DIGIT: |
|
2851 case SRE_CATEGORY_SPACE: |
|
2852 case SRE_CATEGORY_NOT_SPACE: |
|
2853 case SRE_CATEGORY_WORD: |
|
2854 case SRE_CATEGORY_NOT_WORD: |
|
2855 case SRE_CATEGORY_LINEBREAK: |
|
2856 case SRE_CATEGORY_NOT_LINEBREAK: |
|
2857 case SRE_CATEGORY_LOC_WORD: |
|
2858 case SRE_CATEGORY_LOC_NOT_WORD: |
|
2859 case SRE_CATEGORY_UNI_DIGIT: |
|
2860 case SRE_CATEGORY_UNI_NOT_DIGIT: |
|
2861 case SRE_CATEGORY_UNI_SPACE: |
|
2862 case SRE_CATEGORY_UNI_NOT_SPACE: |
|
2863 case SRE_CATEGORY_UNI_WORD: |
|
2864 case SRE_CATEGORY_UNI_NOT_WORD: |
|
2865 case SRE_CATEGORY_UNI_LINEBREAK: |
|
2866 case SRE_CATEGORY_UNI_NOT_LINEBREAK: |
|
2867 break; |
|
2868 default: |
|
2869 FAIL; |
|
2870 } |
|
2871 break; |
|
2872 |
|
2873 default: |
|
2874 FAIL; |
|
2875 |
|
2876 } |
|
2877 } |
|
2878 |
|
2879 return 1; |
|
2880 } |
|
2881 |
|
2882 static int |
|
2883 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) |
|
2884 { |
|
2885 /* Some variables are manipulated by the macros above */ |
|
2886 SRE_CODE op; |
|
2887 SRE_CODE arg; |
|
2888 SRE_CODE skip; |
|
2889 |
|
2890 VTRACE(("code=%p, end=%p\n", code, end)); |
|
2891 |
|
2892 if (code > end) |
|
2893 FAIL; |
|
2894 |
|
2895 while (code < end) { |
|
2896 GET_OP; |
|
2897 switch (op) { |
|
2898 |
|
2899 case SRE_OP_MARK: |
|
2900 /* We don't check whether marks are properly nested; the |
|
2901 sre_match() code is robust even if they don't, and the worst |
|
2902 you can get is nonsensical match results. */ |
|
2903 GET_ARG; |
|
2904 if (arg > 2*groups+1) { |
|
2905 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups)); |
|
2906 FAIL; |
|
2907 } |
|
2908 break; |
|
2909 |
|
2910 case SRE_OP_LITERAL: |
|
2911 case SRE_OP_NOT_LITERAL: |
|
2912 case SRE_OP_LITERAL_IGNORE: |
|
2913 case SRE_OP_NOT_LITERAL_IGNORE: |
|
2914 GET_ARG; |
|
2915 /* The arg is just a character, nothing to check */ |
|
2916 break; |
|
2917 |
|
2918 case SRE_OP_SUCCESS: |
|
2919 case SRE_OP_FAILURE: |
|
2920 /* Nothing to check; these normally end the matching process */ |
|
2921 break; |
|
2922 |
|
2923 case SRE_OP_AT: |
|
2924 GET_ARG; |
|
2925 switch (arg) { |
|
2926 case SRE_AT_BEGINNING: |
|
2927 case SRE_AT_BEGINNING_STRING: |
|
2928 case SRE_AT_BEGINNING_LINE: |
|
2929 case SRE_AT_END: |
|
2930 case SRE_AT_END_LINE: |
|
2931 case SRE_AT_END_STRING: |
|
2932 case SRE_AT_BOUNDARY: |
|
2933 case SRE_AT_NON_BOUNDARY: |
|
2934 case SRE_AT_LOC_BOUNDARY: |
|
2935 case SRE_AT_LOC_NON_BOUNDARY: |
|
2936 case SRE_AT_UNI_BOUNDARY: |
|
2937 case SRE_AT_UNI_NON_BOUNDARY: |
|
2938 break; |
|
2939 default: |
|
2940 FAIL; |
|
2941 } |
|
2942 break; |
|
2943 |
|
2944 case SRE_OP_ANY: |
|
2945 case SRE_OP_ANY_ALL: |
|
2946 /* These have no operands */ |
|
2947 break; |
|
2948 |
|
2949 case SRE_OP_IN: |
|
2950 case SRE_OP_IN_IGNORE: |
|
2951 GET_SKIP; |
|
2952 /* Stop 1 before the end; we check the FAILURE below */ |
|
2953 if (!_validate_charset(code, code+skip-2)) |
|
2954 FAIL; |
|
2955 if (code[skip-2] != SRE_OP_FAILURE) |
|
2956 FAIL; |
|
2957 code += skip-1; |
|
2958 break; |
|
2959 |
|
2960 case SRE_OP_INFO: |
|
2961 { |
|
2962 /* A minimal info field is |
|
2963 <INFO> <1=skip> <2=flags> <3=min> <4=max>; |
|
2964 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags, |
|
2965 more follows. */ |
|
2966 SRE_CODE flags, min, max, i; |
|
2967 SRE_CODE *newcode; |
|
2968 GET_SKIP; |
|
2969 newcode = code+skip-1; |
|
2970 GET_ARG; flags = arg; |
|
2971 GET_ARG; min = arg; |
|
2972 GET_ARG; max = arg; |
|
2973 /* Check that only valid flags are present */ |
|
2974 if ((flags & ~(SRE_INFO_PREFIX | |
|
2975 SRE_INFO_LITERAL | |
|
2976 SRE_INFO_CHARSET)) != 0) |
|
2977 FAIL; |
|
2978 /* PREFIX and CHARSET are mutually exclusive */ |
|
2979 if ((flags & SRE_INFO_PREFIX) && |
|
2980 (flags & SRE_INFO_CHARSET)) |
|
2981 FAIL; |
|
2982 /* LITERAL implies PREFIX */ |
|
2983 if ((flags & SRE_INFO_LITERAL) && |
|
2984 !(flags & SRE_INFO_PREFIX)) |
|
2985 FAIL; |
|
2986 /* Validate the prefix */ |
|
2987 if (flags & SRE_INFO_PREFIX) { |
|
2988 SRE_CODE prefix_len, prefix_skip; |
|
2989 GET_ARG; prefix_len = arg; |
|
2990 GET_ARG; prefix_skip = arg; |
|
2991 /* Here comes the prefix string */ |
|
2992 if (code+prefix_len < code || code+prefix_len > newcode) |
|
2993 FAIL; |
|
2994 code += prefix_len; |
|
2995 /* And here comes the overlap table */ |
|
2996 if (code+prefix_len < code || code+prefix_len > newcode) |
|
2997 FAIL; |
|
2998 /* Each overlap value should be < prefix_len */ |
|
2999 for (i = 0; i < prefix_len; i++) { |
|
3000 if (code[i] >= prefix_len) |
|
3001 FAIL; |
|
3002 } |
|
3003 code += prefix_len; |
|
3004 } |
|
3005 /* Validate the charset */ |
|
3006 if (flags & SRE_INFO_CHARSET) { |
|
3007 if (!_validate_charset(code, newcode-1)) |
|
3008 FAIL; |
|
3009 if (newcode[-1] != SRE_OP_FAILURE) |
|
3010 FAIL; |
|
3011 code = newcode; |
|
3012 } |
|
3013 else if (code != newcode) { |
|
3014 VTRACE(("code=%p, newcode=%p\n", code, newcode)); |
|
3015 FAIL; |
|
3016 } |
|
3017 } |
|
3018 break; |
|
3019 |
|
3020 case SRE_OP_BRANCH: |
|
3021 { |
|
3022 SRE_CODE *target = NULL; |
|
3023 for (;;) { |
|
3024 GET_SKIP; |
|
3025 if (skip == 0) |
|
3026 break; |
|
3027 /* Stop 2 before the end; we check the JUMP below */ |
|
3028 if (!_validate_inner(code, code+skip-3, groups)) |
|
3029 FAIL; |
|
3030 code += skip-3; |
|
3031 /* Check that it ends with a JUMP, and that each JUMP |
|
3032 has the same target */ |
|
3033 GET_OP; |
|
3034 if (op != SRE_OP_JUMP) |
|
3035 FAIL; |
|
3036 GET_SKIP; |
|
3037 if (target == NULL) |
|
3038 target = code+skip-1; |
|
3039 else if (code+skip-1 != target) |
|
3040 FAIL; |
|
3041 } |
|
3042 } |
|
3043 break; |
|
3044 |
|
3045 case SRE_OP_REPEAT_ONE: |
|
3046 case SRE_OP_MIN_REPEAT_ONE: |
|
3047 { |
|
3048 SRE_CODE min, max; |
|
3049 GET_SKIP; |
|
3050 GET_ARG; min = arg; |
|
3051 GET_ARG; max = arg; |
|
3052 if (min > max) |
|
3053 FAIL; |
|
3054 #ifdef Py_UNICODE_WIDE |
|
3055 if (max > 65535) |
|
3056 FAIL; |
|
3057 #endif |
|
3058 if (!_validate_inner(code, code+skip-4, groups)) |
|
3059 FAIL; |
|
3060 code += skip-4; |
|
3061 GET_OP; |
|
3062 if (op != SRE_OP_SUCCESS) |
|
3063 FAIL; |
|
3064 } |
|
3065 break; |
|
3066 |
|
3067 case SRE_OP_REPEAT: |
|
3068 { |
|
3069 SRE_CODE min, max; |
|
3070 GET_SKIP; |
|
3071 GET_ARG; min = arg; |
|
3072 GET_ARG; max = arg; |
|
3073 if (min > max) |
|
3074 FAIL; |
|
3075 #ifdef Py_UNICODE_WIDE |
|
3076 if (max > 65535) |
|
3077 FAIL; |
|
3078 #endif |
|
3079 if (!_validate_inner(code, code+skip-3, groups)) |
|
3080 FAIL; |
|
3081 code += skip-3; |
|
3082 GET_OP; |
|
3083 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL) |
|
3084 FAIL; |
|
3085 } |
|
3086 break; |
|
3087 |
|
3088 case SRE_OP_GROUPREF: |
|
3089 case SRE_OP_GROUPREF_IGNORE: |
|
3090 GET_ARG; |
|
3091 if (arg >= groups) |
|
3092 FAIL; |
|
3093 break; |
|
3094 |
|
3095 case SRE_OP_GROUPREF_EXISTS: |
|
3096 /* The regex syntax for this is: '(?(group)then|else)', where |
|
3097 'group' is either an integer group number or a group name, |
|
3098 'then' and 'else' are sub-regexes, and 'else' is optional. */ |
|
3099 GET_ARG; |
|
3100 if (arg >= groups) |
|
3101 FAIL; |
|
3102 GET_SKIP_ADJ(1); |
|
3103 code--; /* The skip is relative to the first arg! */ |
|
3104 /* There are two possibilities here: if there is both a 'then' |
|
3105 part and an 'else' part, the generated code looks like: |
|
3106 |
|
3107 GROUPREF_EXISTS |
|
3108 <group> |
|
3109 <skipyes> |
|
3110 ...then part... |
|
3111 JUMP |
|
3112 <skipno> |
|
3113 (<skipyes> jumps here) |
|
3114 ...else part... |
|
3115 (<skipno> jumps here) |
|
3116 |
|
3117 If there is only a 'then' part, it looks like: |
|
3118 |
|
3119 GROUPREF_EXISTS |
|
3120 <group> |
|
3121 <skip> |
|
3122 ...then part... |
|
3123 (<skip> jumps here) |
|
3124 |
|
3125 There is no direct way to decide which it is, and we don't want |
|
3126 to allow arbitrary jumps anywhere in the code; so we just look |
|
3127 for a JUMP opcode preceding our skip target. |
|
3128 */ |
|
3129 if (skip >= 3 && code+skip-3 >= code && |
|
3130 code[skip-3] == SRE_OP_JUMP) |
|
3131 { |
|
3132 VTRACE(("both then and else parts present\n")); |
|
3133 if (!_validate_inner(code+1, code+skip-3, groups)) |
|
3134 FAIL; |
|
3135 code += skip-2; /* Position after JUMP, at <skipno> */ |
|
3136 GET_SKIP; |
|
3137 if (!_validate_inner(code, code+skip-1, groups)) |
|
3138 FAIL; |
|
3139 code += skip-1; |
|
3140 } |
|
3141 else { |
|
3142 VTRACE(("only a then part present\n")); |
|
3143 if (!_validate_inner(code+1, code+skip-1, groups)) |
|
3144 FAIL; |
|
3145 code += skip-1; |
|
3146 } |
|
3147 break; |
|
3148 |
|
3149 case SRE_OP_ASSERT: |
|
3150 case SRE_OP_ASSERT_NOT: |
|
3151 GET_SKIP; |
|
3152 GET_ARG; /* 0 for lookahead, width for lookbehind */ |
|
3153 code--; /* Back up over arg to simplify math below */ |
|
3154 if (arg & 0x80000000) |
|
3155 FAIL; /* Width too large */ |
|
3156 /* Stop 1 before the end; we check the SUCCESS below */ |
|
3157 if (!_validate_inner(code+1, code+skip-2, groups)) |
|
3158 FAIL; |
|
3159 code += skip-2; |
|
3160 GET_OP; |
|
3161 if (op != SRE_OP_SUCCESS) |
|
3162 FAIL; |
|
3163 break; |
|
3164 |
|
3165 default: |
|
3166 FAIL; |
|
3167 |
|
3168 } |
|
3169 } |
|
3170 |
|
3171 VTRACE(("okay\n")); |
|
3172 return 1; |
|
3173 } |
|
3174 |
|
3175 static int |
|
3176 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) |
|
3177 { |
|
3178 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS) |
|
3179 FAIL; |
|
3180 if (groups == 0) /* fix for simplejson */ |
|
3181 groups = 100; /* 100 groups should always be safe */ |
|
3182 return _validate_inner(code, end-1, groups); |
|
3183 } |
|
3184 |
|
3185 static int |
|
3186 _validate(PatternObject *self) |
|
3187 { |
|
3188 if (!_validate_outer(self->code, self->code+self->codesize, self->groups)) |
|
3189 { |
|
3190 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code"); |
|
3191 return 0; |
|
3192 } |
|
3193 else |
|
3194 VTRACE(("Success!\n")); |
|
3195 return 1; |
|
3196 } |
|
3197 |
|
3198 /* -------------------------------------------------------------------- */ |
|
3199 /* match methods */ |
|
3200 |
|
3201 static void |
|
3202 match_dealloc(MatchObject* self) |
|
3203 { |
|
3204 Py_XDECREF(self->regs); |
|
3205 Py_XDECREF(self->string); |
|
3206 Py_DECREF(self->pattern); |
|
3207 PyObject_DEL(self); |
|
3208 } |
|
3209 |
|
3210 static PyObject* |
|
3211 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def) |
|
3212 { |
|
3213 if (index < 0 || index >= self->groups) { |
|
3214 /* raise IndexError if we were given a bad group number */ |
|
3215 PyErr_SetString( |
|
3216 PyExc_IndexError, |
|
3217 "no such group" |
|
3218 ); |
|
3219 return NULL; |
|
3220 } |
|
3221 |
|
3222 index *= 2; |
|
3223 |
|
3224 if (self->string == Py_None || self->mark[index] < 0) { |
|
3225 /* return default value if the string or group is undefined */ |
|
3226 Py_INCREF(def); |
|
3227 return def; |
|
3228 } |
|
3229 |
|
3230 return PySequence_GetSlice( |
|
3231 self->string, self->mark[index], self->mark[index+1] |
|
3232 ); |
|
3233 } |
|
3234 |
|
3235 static Py_ssize_t |
|
3236 match_getindex(MatchObject* self, PyObject* index) |
|
3237 { |
|
3238 Py_ssize_t i; |
|
3239 |
|
3240 if (PyInt_Check(index)) |
|
3241 return PyInt_AsSsize_t(index); |
|
3242 |
|
3243 i = -1; |
|
3244 |
|
3245 if (self->pattern->groupindex) { |
|
3246 index = PyObject_GetItem(self->pattern->groupindex, index); |
|
3247 if (index) { |
|
3248 if (PyInt_Check(index) || PyLong_Check(index)) |
|
3249 i = PyInt_AsSsize_t(index); |
|
3250 Py_DECREF(index); |
|
3251 } else |
|
3252 PyErr_Clear(); |
|
3253 } |
|
3254 |
|
3255 return i; |
|
3256 } |
|
3257 |
|
3258 static PyObject* |
|
3259 match_getslice(MatchObject* self, PyObject* index, PyObject* def) |
|
3260 { |
|
3261 return match_getslice_by_index(self, match_getindex(self, index), def); |
|
3262 } |
|
3263 |
|
3264 static PyObject* |
|
3265 match_expand(MatchObject* self, PyObject* ptemplate) |
|
3266 { |
|
3267 /* delegate to Python code */ |
|
3268 return call( |
|
3269 SRE_PY_MODULE, "_expand", |
|
3270 PyTuple_Pack(3, self->pattern, self, ptemplate) |
|
3271 ); |
|
3272 } |
|
3273 |
|
3274 static PyObject* |
|
3275 match_group(MatchObject* self, PyObject* args) |
|
3276 { |
|
3277 PyObject* result; |
|
3278 Py_ssize_t i, size; |
|
3279 |
|
3280 size = PyTuple_GET_SIZE(args); |
|
3281 |
|
3282 switch (size) { |
|
3283 case 0: |
|
3284 result = match_getslice(self, Py_False, Py_None); |
|
3285 break; |
|
3286 case 1: |
|
3287 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None); |
|
3288 break; |
|
3289 default: |
|
3290 /* fetch multiple items */ |
|
3291 result = PyTuple_New(size); |
|
3292 if (!result) |
|
3293 return NULL; |
|
3294 for (i = 0; i < size; i++) { |
|
3295 PyObject* item = match_getslice( |
|
3296 self, PyTuple_GET_ITEM(args, i), Py_None |
|
3297 ); |
|
3298 if (!item) { |
|
3299 Py_DECREF(result); |
|
3300 return NULL; |
|
3301 } |
|
3302 PyTuple_SET_ITEM(result, i, item); |
|
3303 } |
|
3304 break; |
|
3305 } |
|
3306 return result; |
|
3307 } |
|
3308 |
|
3309 static PyObject* |
|
3310 match_groups(MatchObject* self, PyObject* args, PyObject* kw) |
|
3311 { |
|
3312 PyObject* result; |
|
3313 Py_ssize_t index; |
|
3314 |
|
3315 PyObject* def = Py_None; |
|
3316 static char* kwlist[] = { "default", NULL }; |
|
3317 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def)) |
|
3318 return NULL; |
|
3319 |
|
3320 result = PyTuple_New(self->groups-1); |
|
3321 if (!result) |
|
3322 return NULL; |
|
3323 |
|
3324 for (index = 1; index < self->groups; index++) { |
|
3325 PyObject* item; |
|
3326 item = match_getslice_by_index(self, index, def); |
|
3327 if (!item) { |
|
3328 Py_DECREF(result); |
|
3329 return NULL; |
|
3330 } |
|
3331 PyTuple_SET_ITEM(result, index-1, item); |
|
3332 } |
|
3333 |
|
3334 return result; |
|
3335 } |
|
3336 |
|
3337 static PyObject* |
|
3338 match_groupdict(MatchObject* self, PyObject* args, PyObject* kw) |
|
3339 { |
|
3340 PyObject* result; |
|
3341 PyObject* keys; |
|
3342 Py_ssize_t index; |
|
3343 |
|
3344 PyObject* def = Py_None; |
|
3345 static char* kwlist[] = { "default", NULL }; |
|
3346 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def)) |
|
3347 return NULL; |
|
3348 |
|
3349 result = PyDict_New(); |
|
3350 if (!result || !self->pattern->groupindex) |
|
3351 return result; |
|
3352 |
|
3353 keys = PyMapping_Keys(self->pattern->groupindex); |
|
3354 if (!keys) |
|
3355 goto failed; |
|
3356 |
|
3357 for (index = 0; index < PyList_GET_SIZE(keys); index++) { |
|
3358 int status; |
|
3359 PyObject* key; |
|
3360 PyObject* value; |
|
3361 key = PyList_GET_ITEM(keys, index); |
|
3362 if (!key) |
|
3363 goto failed; |
|
3364 value = match_getslice(self, key, def); |
|
3365 if (!value) { |
|
3366 Py_DECREF(key); |
|
3367 goto failed; |
|
3368 } |
|
3369 status = PyDict_SetItem(result, key, value); |
|
3370 Py_DECREF(value); |
|
3371 if (status < 0) |
|
3372 goto failed; |
|
3373 } |
|
3374 |
|
3375 Py_DECREF(keys); |
|
3376 |
|
3377 return result; |
|
3378 |
|
3379 failed: |
|
3380 Py_XDECREF(keys); |
|
3381 Py_DECREF(result); |
|
3382 return NULL; |
|
3383 } |
|
3384 |
|
3385 static PyObject* |
|
3386 match_start(MatchObject* self, PyObject* args) |
|
3387 { |
|
3388 Py_ssize_t index; |
|
3389 |
|
3390 PyObject* index_ = Py_False; /* zero */ |
|
3391 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_)) |
|
3392 return NULL; |
|
3393 |
|
3394 index = match_getindex(self, index_); |
|
3395 |
|
3396 if (index < 0 || index >= self->groups) { |
|
3397 PyErr_SetString( |
|
3398 PyExc_IndexError, |
|
3399 "no such group" |
|
3400 ); |
|
3401 return NULL; |
|
3402 } |
|
3403 |
|
3404 /* mark is -1 if group is undefined */ |
|
3405 return Py_BuildValue("i", self->mark[index*2]); |
|
3406 } |
|
3407 |
|
3408 static PyObject* |
|
3409 match_end(MatchObject* self, PyObject* args) |
|
3410 { |
|
3411 Py_ssize_t index; |
|
3412 |
|
3413 PyObject* index_ = Py_False; /* zero */ |
|
3414 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_)) |
|
3415 return NULL; |
|
3416 |
|
3417 index = match_getindex(self, index_); |
|
3418 |
|
3419 if (index < 0 || index >= self->groups) { |
|
3420 PyErr_SetString( |
|
3421 PyExc_IndexError, |
|
3422 "no such group" |
|
3423 ); |
|
3424 return NULL; |
|
3425 } |
|
3426 |
|
3427 /* mark is -1 if group is undefined */ |
|
3428 return Py_BuildValue("i", self->mark[index*2+1]); |
|
3429 } |
|
3430 |
|
3431 LOCAL(PyObject*) |
|
3432 _pair(Py_ssize_t i1, Py_ssize_t i2) |
|
3433 { |
|
3434 PyObject* pair; |
|
3435 PyObject* item; |
|
3436 |
|
3437 pair = PyTuple_New(2); |
|
3438 if (!pair) |
|
3439 return NULL; |
|
3440 |
|
3441 item = PyInt_FromSsize_t(i1); |
|
3442 if (!item) |
|
3443 goto error; |
|
3444 PyTuple_SET_ITEM(pair, 0, item); |
|
3445 |
|
3446 item = PyInt_FromSsize_t(i2); |
|
3447 if (!item) |
|
3448 goto error; |
|
3449 PyTuple_SET_ITEM(pair, 1, item); |
|
3450 |
|
3451 return pair; |
|
3452 |
|
3453 error: |
|
3454 Py_DECREF(pair); |
|
3455 return NULL; |
|
3456 } |
|
3457 |
|
3458 static PyObject* |
|
3459 match_span(MatchObject* self, PyObject* args) |
|
3460 { |
|
3461 Py_ssize_t index; |
|
3462 |
|
3463 PyObject* index_ = Py_False; /* zero */ |
|
3464 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_)) |
|
3465 return NULL; |
|
3466 |
|
3467 index = match_getindex(self, index_); |
|
3468 |
|
3469 if (index < 0 || index >= self->groups) { |
|
3470 PyErr_SetString( |
|
3471 PyExc_IndexError, |
|
3472 "no such group" |
|
3473 ); |
|
3474 return NULL; |
|
3475 } |
|
3476 |
|
3477 /* marks are -1 if group is undefined */ |
|
3478 return _pair(self->mark[index*2], self->mark[index*2+1]); |
|
3479 } |
|
3480 |
|
3481 static PyObject* |
|
3482 match_regs(MatchObject* self) |
|
3483 { |
|
3484 PyObject* regs; |
|
3485 PyObject* item; |
|
3486 Py_ssize_t index; |
|
3487 |
|
3488 regs = PyTuple_New(self->groups); |
|
3489 if (!regs) |
|
3490 return NULL; |
|
3491 |
|
3492 for (index = 0; index < self->groups; index++) { |
|
3493 item = _pair(self->mark[index*2], self->mark[index*2+1]); |
|
3494 if (!item) { |
|
3495 Py_DECREF(regs); |
|
3496 return NULL; |
|
3497 } |
|
3498 PyTuple_SET_ITEM(regs, index, item); |
|
3499 } |
|
3500 |
|
3501 Py_INCREF(regs); |
|
3502 self->regs = regs; |
|
3503 |
|
3504 return regs; |
|
3505 } |
|
3506 |
|
3507 static PyObject* |
|
3508 match_copy(MatchObject* self, PyObject *unused) |
|
3509 { |
|
3510 #ifdef USE_BUILTIN_COPY |
|
3511 MatchObject* copy; |
|
3512 Py_ssize_t slots, offset; |
|
3513 |
|
3514 slots = 2 * (self->pattern->groups+1); |
|
3515 |
|
3516 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots); |
|
3517 if (!copy) |
|
3518 return NULL; |
|
3519 |
|
3520 /* this value a constant, but any compiler should be able to |
|
3521 figure that out all by itself */ |
|
3522 offset = offsetof(MatchObject, string); |
|
3523 |
|
3524 Py_XINCREF(self->pattern); |
|
3525 Py_XINCREF(self->string); |
|
3526 Py_XINCREF(self->regs); |
|
3527 |
|
3528 memcpy((char*) copy + offset, (char*) self + offset, |
|
3529 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset); |
|
3530 |
|
3531 return (PyObject*) copy; |
|
3532 #else |
|
3533 PyErr_SetString(PyExc_TypeError, "cannot copy this match object"); |
|
3534 return NULL; |
|
3535 #endif |
|
3536 } |
|
3537 |
|
3538 static PyObject* |
|
3539 match_deepcopy(MatchObject* self, PyObject* memo) |
|
3540 { |
|
3541 #ifdef USE_BUILTIN_COPY |
|
3542 MatchObject* copy; |
|
3543 |
|
3544 copy = (MatchObject*) match_copy(self); |
|
3545 if (!copy) |
|
3546 return NULL; |
|
3547 |
|
3548 if (!deepcopy((PyObject**) ©->pattern, memo) || |
|
3549 !deepcopy(©->string, memo) || |
|
3550 !deepcopy(©->regs, memo)) { |
|
3551 Py_DECREF(copy); |
|
3552 return NULL; |
|
3553 } |
|
3554 |
|
3555 #else |
|
3556 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object"); |
|
3557 return NULL; |
|
3558 #endif |
|
3559 } |
|
3560 |
|
3561 static PyMethodDef match_methods[] = { |
|
3562 {"group", (PyCFunction) match_group, METH_VARARGS}, |
|
3563 {"start", (PyCFunction) match_start, METH_VARARGS}, |
|
3564 {"end", (PyCFunction) match_end, METH_VARARGS}, |
|
3565 {"span", (PyCFunction) match_span, METH_VARARGS}, |
|
3566 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS}, |
|
3567 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS}, |
|
3568 {"expand", (PyCFunction) match_expand, METH_O}, |
|
3569 {"__copy__", (PyCFunction) match_copy, METH_NOARGS}, |
|
3570 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O}, |
|
3571 {NULL, NULL} |
|
3572 }; |
|
3573 |
|
3574 static PyObject* |
|
3575 match_getattr(MatchObject* self, char* name) |
|
3576 { |
|
3577 PyObject* res; |
|
3578 |
|
3579 res = Py_FindMethod(match_methods, (PyObject*) self, name); |
|
3580 if (res) |
|
3581 return res; |
|
3582 |
|
3583 PyErr_Clear(); |
|
3584 |
|
3585 if (!strcmp(name, "lastindex")) { |
|
3586 if (self->lastindex >= 0) |
|
3587 return Py_BuildValue("i", self->lastindex); |
|
3588 Py_INCREF(Py_None); |
|
3589 return Py_None; |
|
3590 } |
|
3591 |
|
3592 if (!strcmp(name, "lastgroup")) { |
|
3593 if (self->pattern->indexgroup && self->lastindex >= 0) { |
|
3594 PyObject* result = PySequence_GetItem( |
|
3595 self->pattern->indexgroup, self->lastindex |
|
3596 ); |
|
3597 if (result) |
|
3598 return result; |
|
3599 PyErr_Clear(); |
|
3600 } |
|
3601 Py_INCREF(Py_None); |
|
3602 return Py_None; |
|
3603 } |
|
3604 |
|
3605 if (!strcmp(name, "string")) { |
|
3606 if (self->string) { |
|
3607 Py_INCREF(self->string); |
|
3608 return self->string; |
|
3609 } else { |
|
3610 Py_INCREF(Py_None); |
|
3611 return Py_None; |
|
3612 } |
|
3613 } |
|
3614 |
|
3615 if (!strcmp(name, "regs")) { |
|
3616 if (self->regs) { |
|
3617 Py_INCREF(self->regs); |
|
3618 return self->regs; |
|
3619 } else |
|
3620 return match_regs(self); |
|
3621 } |
|
3622 |
|
3623 if (!strcmp(name, "re")) { |
|
3624 Py_INCREF(self->pattern); |
|
3625 return (PyObject*) self->pattern; |
|
3626 } |
|
3627 |
|
3628 if (!strcmp(name, "pos")) |
|
3629 return Py_BuildValue("i", self->pos); |
|
3630 |
|
3631 if (!strcmp(name, "endpos")) |
|
3632 return Py_BuildValue("i", self->endpos); |
|
3633 |
|
3634 PyErr_SetString(PyExc_AttributeError, name); |
|
3635 return NULL; |
|
3636 } |
|
3637 |
|
3638 /* FIXME: implement setattr("string", None) as a special case (to |
|
3639 detach the associated string, if any */ |
|
3640 |
|
3641 statichere PyTypeObject Match_Type = { |
|
3642 PyObject_HEAD_INIT(NULL) |
|
3643 0, "_" SRE_MODULE ".SRE_Match", |
|
3644 sizeof(MatchObject), sizeof(Py_ssize_t), |
|
3645 (destructor)match_dealloc, /*tp_dealloc*/ |
|
3646 0, /*tp_print*/ |
|
3647 (getattrfunc)match_getattr /*tp_getattr*/ |
|
3648 }; |
|
3649 |
|
3650 static PyObject* |
|
3651 pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status) |
|
3652 { |
|
3653 /* create match object (from state object) */ |
|
3654 |
|
3655 MatchObject* match; |
|
3656 Py_ssize_t i, j; |
|
3657 char* base; |
|
3658 int n; |
|
3659 |
|
3660 if (status > 0) { |
|
3661 |
|
3662 /* create match object (with room for extra group marks) */ |
|
3663 /* coverity[ampersand_in_size] */ |
|
3664 match = PyObject_NEW_VAR(MatchObject, &Match_Type, |
|
3665 2*(pattern->groups+1)); |
|
3666 if (!match) |
|
3667 return NULL; |
|
3668 |
|
3669 Py_INCREF(pattern); |
|
3670 match->pattern = pattern; |
|
3671 |
|
3672 Py_INCREF(state->string); |
|
3673 match->string = state->string; |
|
3674 |
|
3675 match->regs = NULL; |
|
3676 match->groups = pattern->groups+1; |
|
3677 |
|
3678 /* fill in group slices */ |
|
3679 |
|
3680 base = (char*) state->beginning; |
|
3681 n = state->charsize; |
|
3682 |
|
3683 match->mark[0] = ((char*) state->start - base) / n; |
|
3684 match->mark[1] = ((char*) state->ptr - base) / n; |
|
3685 |
|
3686 for (i = j = 0; i < pattern->groups; i++, j+=2) |
|
3687 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) { |
|
3688 match->mark[j+2] = ((char*) state->mark[j] - base) / n; |
|
3689 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n; |
|
3690 } else |
|
3691 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */ |
|
3692 |
|
3693 match->pos = state->pos; |
|
3694 match->endpos = state->endpos; |
|
3695 |
|
3696 match->lastindex = state->lastindex; |
|
3697 |
|
3698 return (PyObject*) match; |
|
3699 |
|
3700 } else if (status == 0) { |
|
3701 |
|
3702 /* no match */ |
|
3703 Py_INCREF(Py_None); |
|
3704 return Py_None; |
|
3705 |
|
3706 } |
|
3707 |
|
3708 /* internal error */ |
|
3709 pattern_error(status); |
|
3710 return NULL; |
|
3711 } |
|
3712 |
|
3713 |
|
3714 /* -------------------------------------------------------------------- */ |
|
3715 /* scanner methods (experimental) */ |
|
3716 |
|
3717 static void |
|
3718 scanner_dealloc(ScannerObject* self) |
|
3719 { |
|
3720 state_fini(&self->state); |
|
3721 Py_DECREF(self->pattern); |
|
3722 PyObject_DEL(self); |
|
3723 } |
|
3724 |
|
3725 static PyObject* |
|
3726 scanner_match(ScannerObject* self, PyObject *unused) |
|
3727 { |
|
3728 SRE_STATE* state = &self->state; |
|
3729 PyObject* match; |
|
3730 int status; |
|
3731 |
|
3732 state_reset(state); |
|
3733 |
|
3734 state->ptr = state->start; |
|
3735 |
|
3736 if (state->charsize == 1) { |
|
3737 status = sre_match(state, PatternObject_GetCode(self->pattern)); |
|
3738 } else { |
|
3739 #if defined(HAVE_UNICODE) |
|
3740 status = sre_umatch(state, PatternObject_GetCode(self->pattern)); |
|
3741 #endif |
|
3742 } |
|
3743 if (PyErr_Occurred()) |
|
3744 return NULL; |
|
3745 |
|
3746 match = pattern_new_match((PatternObject*) self->pattern, |
|
3747 state, status); |
|
3748 |
|
3749 if (status == 0 || state->ptr == state->start) |
|
3750 state->start = (void*) ((char*) state->ptr + state->charsize); |
|
3751 else |
|
3752 state->start = state->ptr; |
|
3753 |
|
3754 return match; |
|
3755 } |
|
3756 |
|
3757 |
|
3758 static PyObject* |
|
3759 scanner_search(ScannerObject* self, PyObject *unused) |
|
3760 { |
|
3761 SRE_STATE* state = &self->state; |
|
3762 PyObject* match; |
|
3763 int status; |
|
3764 |
|
3765 state_reset(state); |
|
3766 |
|
3767 state->ptr = state->start; |
|
3768 |
|
3769 if (state->charsize == 1) { |
|
3770 status = sre_search(state, PatternObject_GetCode(self->pattern)); |
|
3771 } else { |
|
3772 #if defined(HAVE_UNICODE) |
|
3773 status = sre_usearch(state, PatternObject_GetCode(self->pattern)); |
|
3774 #endif |
|
3775 } |
|
3776 if (PyErr_Occurred()) |
|
3777 return NULL; |
|
3778 |
|
3779 match = pattern_new_match((PatternObject*) self->pattern, |
|
3780 state, status); |
|
3781 |
|
3782 if (status == 0 || state->ptr == state->start) |
|
3783 state->start = (void*) ((char*) state->ptr + state->charsize); |
|
3784 else |
|
3785 state->start = state->ptr; |
|
3786 |
|
3787 return match; |
|
3788 } |
|
3789 |
|
3790 static PyMethodDef scanner_methods[] = { |
|
3791 {"match", (PyCFunction) scanner_match, METH_NOARGS}, |
|
3792 {"search", (PyCFunction) scanner_search, METH_NOARGS}, |
|
3793 {NULL, NULL} |
|
3794 }; |
|
3795 |
|
3796 static PyObject* |
|
3797 scanner_getattr(ScannerObject* self, char* name) |
|
3798 { |
|
3799 PyObject* res; |
|
3800 |
|
3801 res = Py_FindMethod(scanner_methods, (PyObject*) self, name); |
|
3802 if (res) |
|
3803 return res; |
|
3804 |
|
3805 PyErr_Clear(); |
|
3806 |
|
3807 /* attributes */ |
|
3808 if (!strcmp(name, "pattern")) { |
|
3809 Py_INCREF(self->pattern); |
|
3810 return self->pattern; |
|
3811 } |
|
3812 |
|
3813 PyErr_SetString(PyExc_AttributeError, name); |
|
3814 return NULL; |
|
3815 } |
|
3816 |
|
3817 statichere PyTypeObject Scanner_Type = { |
|
3818 PyObject_HEAD_INIT(NULL) |
|
3819 0, "_" SRE_MODULE ".SRE_Scanner", |
|
3820 sizeof(ScannerObject), 0, |
|
3821 (destructor)scanner_dealloc, /*tp_dealloc*/ |
|
3822 0, /*tp_print*/ |
|
3823 (getattrfunc)scanner_getattr, /*tp_getattr*/ |
|
3824 }; |
|
3825 |
|
3826 static PyObject* |
|
3827 pattern_scanner(PatternObject* pattern, PyObject* args) |
|
3828 { |
|
3829 /* create search state object */ |
|
3830 |
|
3831 ScannerObject* self; |
|
3832 |
|
3833 PyObject* string; |
|
3834 Py_ssize_t start = 0; |
|
3835 Py_ssize_t end = PY_SSIZE_T_MAX; |
|
3836 if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end)) |
|
3837 return NULL; |
|
3838 |
|
3839 /* create scanner object */ |
|
3840 self = PyObject_NEW(ScannerObject, &Scanner_Type); |
|
3841 if (!self) |
|
3842 return NULL; |
|
3843 |
|
3844 string = state_init(&self->state, pattern, string, start, end); |
|
3845 if (!string) { |
|
3846 PyObject_DEL(self); |
|
3847 return NULL; |
|
3848 } |
|
3849 |
|
3850 Py_INCREF(pattern); |
|
3851 self->pattern = (PyObject*) pattern; |
|
3852 |
|
3853 return (PyObject*) self; |
|
3854 } |
|
3855 |
|
3856 static PyMethodDef _functions[] = { |
|
3857 {"compile", _compile, METH_VARARGS}, |
|
3858 {"getcodesize", sre_codesize, METH_NOARGS}, |
|
3859 {"getlower", sre_getlower, METH_VARARGS}, |
|
3860 {NULL, NULL} |
|
3861 }; |
|
3862 |
|
3863 #if PY_VERSION_HEX < 0x02030000 |
|
3864 DL_EXPORT(void) init_sre(void) |
|
3865 #else |
|
3866 PyMODINIT_FUNC init_sre(void) |
|
3867 #endif |
|
3868 { |
|
3869 PyObject* m; |
|
3870 PyObject* d; |
|
3871 PyObject* x; |
|
3872 |
|
3873 /* Patch object types */ |
|
3874 Pattern_Type.ob_type = Match_Type.ob_type = |
|
3875 Scanner_Type.ob_type = &PyType_Type; |
|
3876 |
|
3877 m = Py_InitModule("_" SRE_MODULE, _functions); |
|
3878 if (m == NULL) |
|
3879 return; |
|
3880 d = PyModule_GetDict(m); |
|
3881 |
|
3882 x = PyInt_FromLong(SRE_MAGIC); |
|
3883 if (x) { |
|
3884 PyDict_SetItemString(d, "MAGIC", x); |
|
3885 Py_DECREF(x); |
|
3886 } |
|
3887 |
|
3888 x = PyInt_FromLong(sizeof(SRE_CODE)); |
|
3889 if (x) { |
|
3890 PyDict_SetItemString(d, "CODESIZE", x); |
|
3891 Py_DECREF(x); |
|
3892 } |
|
3893 |
|
3894 x = PyString_FromString(copyright); |
|
3895 if (x) { |
|
3896 PyDict_SetItemString(d, "copyright", x); |
|
3897 Py_DECREF(x); |
|
3898 } |
|
3899 } |
|
3900 |
|
3901 #endif /* !defined(SRE_RECURSIVE) */ |
|
3902 |
|
3903 /* vim:ts=4:sw=4:et |
|
3904 */ |