|
1 // -*- coding: utf-8 -*- |
|
2 // |
|
3 // Copyright (c) 2005 - 2006, Google Inc. |
|
4 // All rights reserved. |
|
5 // |
|
6 // Redistribution and use in source and binary forms, with or without |
|
7 // modification, are permitted provided that the following conditions are |
|
8 // met: |
|
9 // |
|
10 // * Redistributions of source code must retain the above copyright |
|
11 // notice, this list of conditions and the following disclaimer. |
|
12 // * Redistributions in binary form must reproduce the above |
|
13 // copyright notice, this list of conditions and the following disclaimer |
|
14 // in the documentation and/or other materials provided with the |
|
15 // distribution. |
|
16 // * Neither the name of Google Inc. nor the names of its |
|
17 // contributors may be used to endorse or promote products derived from |
|
18 // this software without specific prior written permission. |
|
19 // |
|
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
31 // |
|
32 // Author: Sanjay Ghemawat |
|
33 // |
|
34 // TODO: Test extractions for PartialMatch/Consume |
|
35 |
|
36 #ifdef HAVE_CONFIG_H |
|
37 #include "config.h" |
|
38 #endif |
|
39 |
|
40 #include <stdio.h> |
|
41 #include <cassert> |
|
42 #include <vector> |
|
43 #include "pcrecpp.h" |
|
44 |
|
45 using pcrecpp::StringPiece; |
|
46 using pcrecpp::RE; |
|
47 using pcrecpp::RE_Options; |
|
48 using pcrecpp::Hex; |
|
49 using pcrecpp::Octal; |
|
50 using pcrecpp::CRadix; |
|
51 |
|
52 static bool VERBOSE_TEST = false; |
|
53 |
|
54 // CHECK dies with a fatal error if condition is not true. It is *not* |
|
55 // controlled by NDEBUG, so the check will be executed regardless of |
|
56 // compilation mode. Therefore, it is safe to do things like: |
|
57 // CHECK_EQ(fp->Write(x), 4) |
|
58 #define CHECK(condition) do { \ |
|
59 if (!(condition)) { \ |
|
60 fprintf(stderr, "%s:%d: Check failed: %s\n", \ |
|
61 __FILE__, __LINE__, #condition); \ |
|
62 exit(1); \ |
|
63 } \ |
|
64 } while (0) |
|
65 |
|
66 #define CHECK_EQ(a, b) CHECK(a == b) |
|
67 |
|
68 static void Timing1(int num_iters) { |
|
69 // Same pattern lots of times |
|
70 RE pattern("ruby:\\d+"); |
|
71 StringPiece p("ruby:1234"); |
|
72 for (int j = num_iters; j > 0; j--) { |
|
73 CHECK(pattern.FullMatch(p)); |
|
74 } |
|
75 } |
|
76 |
|
77 static void Timing2(int num_iters) { |
|
78 // Same pattern lots of times |
|
79 RE pattern("ruby:(\\d+)"); |
|
80 int i; |
|
81 for (int j = num_iters; j > 0; j--) { |
|
82 CHECK(pattern.FullMatch("ruby:1234", &i)); |
|
83 CHECK_EQ(i, 1234); |
|
84 } |
|
85 } |
|
86 |
|
87 static void Timing3(int num_iters) { |
|
88 string text_string; |
|
89 for (int j = num_iters; j > 0; j--) { |
|
90 text_string += "this is another line\n"; |
|
91 } |
|
92 |
|
93 RE line_matcher(".*\n"); |
|
94 string line; |
|
95 StringPiece text(text_string); |
|
96 int counter = 0; |
|
97 while (line_matcher.Consume(&text)) { |
|
98 counter++; |
|
99 } |
|
100 printf("Matched %d lines\n", counter); |
|
101 } |
|
102 |
|
103 #if 0 // uncomment this if you have a way of defining VirtualProcessSize() |
|
104 |
|
105 static void LeakTest() { |
|
106 // Check for memory leaks |
|
107 unsigned long long initial_size = 0; |
|
108 for (int i = 0; i < 100000; i++) { |
|
109 if (i == 50000) { |
|
110 initial_size = VirtualProcessSize(); |
|
111 printf("Size after 50000: %llu\n", initial_size); |
|
112 } |
|
113 char buf[100]; // definitely big enough |
|
114 sprintf(buf, "pat%09d", i); |
|
115 RE newre(buf); |
|
116 } |
|
117 uint64 final_size = VirtualProcessSize(); |
|
118 printf("Size after 100000: %llu\n", final_size); |
|
119 const double growth = double(final_size - initial_size) / final_size; |
|
120 printf("Growth: %0.2f%%", growth * 100); |
|
121 CHECK(growth < 0.02); // Allow < 2% growth |
|
122 } |
|
123 |
|
124 #endif |
|
125 |
|
126 static void RadixTests() { |
|
127 printf("Testing hex\n"); |
|
128 |
|
129 #define CHECK_HEX(type, value) \ |
|
130 do { \ |
|
131 type v; \ |
|
132 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \ |
|
133 CHECK_EQ(v, 0x ## value); \ |
|
134 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \ |
|
135 CHECK_EQ(v, 0x ## value); \ |
|
136 } while(0) |
|
137 |
|
138 CHECK_HEX(short, 2bad); |
|
139 CHECK_HEX(unsigned short, 2badU); |
|
140 CHECK_HEX(int, dead); |
|
141 CHECK_HEX(unsigned int, deadU); |
|
142 CHECK_HEX(long, 7eadbeefL); |
|
143 CHECK_HEX(unsigned long, deadbeefUL); |
|
144 #ifdef HAVE_LONG_LONG |
|
145 CHECK_HEX(long long, 12345678deadbeefLL); |
|
146 #endif |
|
147 #ifdef HAVE_UNSIGNED_LONG_LONG |
|
148 CHECK_HEX(unsigned long long, cafebabedeadbeefULL); |
|
149 #endif |
|
150 |
|
151 #undef CHECK_HEX |
|
152 |
|
153 printf("Testing octal\n"); |
|
154 |
|
155 #define CHECK_OCTAL(type, value) \ |
|
156 do { \ |
|
157 type v; \ |
|
158 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \ |
|
159 CHECK_EQ(v, 0 ## value); \ |
|
160 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \ |
|
161 CHECK_EQ(v, 0 ## value); \ |
|
162 } while(0) |
|
163 |
|
164 CHECK_OCTAL(short, 77777); |
|
165 CHECK_OCTAL(unsigned short, 177777U); |
|
166 CHECK_OCTAL(int, 17777777777); |
|
167 CHECK_OCTAL(unsigned int, 37777777777U); |
|
168 CHECK_OCTAL(long, 17777777777L); |
|
169 CHECK_OCTAL(unsigned long, 37777777777UL); |
|
170 #ifdef HAVE_LONG_LONG |
|
171 CHECK_OCTAL(long long, 777777777777777777777LL); |
|
172 #endif |
|
173 #ifdef HAVE_UNSIGNED_LONG_LONG |
|
174 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL); |
|
175 #endif |
|
176 |
|
177 #undef CHECK_OCTAL |
|
178 |
|
179 printf("Testing decimal\n"); |
|
180 |
|
181 #define CHECK_DECIMAL(type, value) \ |
|
182 do { \ |
|
183 type v; \ |
|
184 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \ |
|
185 CHECK_EQ(v, value); \ |
|
186 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \ |
|
187 CHECK_EQ(v, value); \ |
|
188 } while(0) |
|
189 |
|
190 CHECK_DECIMAL(short, -1); |
|
191 CHECK_DECIMAL(unsigned short, 9999); |
|
192 CHECK_DECIMAL(int, -1000); |
|
193 CHECK_DECIMAL(unsigned int, 12345U); |
|
194 CHECK_DECIMAL(long, -10000000L); |
|
195 CHECK_DECIMAL(unsigned long, 3083324652U); |
|
196 #ifdef HAVE_LONG_LONG |
|
197 CHECK_DECIMAL(long long, -100000000000000LL); |
|
198 #endif |
|
199 #ifdef HAVE_UNSIGNED_LONG_LONG |
|
200 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL); |
|
201 #endif |
|
202 |
|
203 #undef CHECK_DECIMAL |
|
204 |
|
205 } |
|
206 |
|
207 static void TestReplace() { |
|
208 printf("Testing Replace\n"); |
|
209 |
|
210 struct ReplaceTest { |
|
211 const char *regexp; |
|
212 const char *rewrite; |
|
213 const char *original; |
|
214 const char *single; |
|
215 const char *global; |
|
216 int global_count; // the expected return value from ReplaceAll |
|
217 }; |
|
218 static const ReplaceTest tests[] = { |
|
219 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)", |
|
220 "\\2\\1ay", |
|
221 "the quick brown fox jumps over the lazy dogs.", |
|
222 "ethay quick brown fox jumps over the lazy dogs.", |
|
223 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.", |
|
224 9 }, |
|
225 { "\\w+", |
|
226 "\\0-NOSPAM", |
|
227 "paul.haahr@google.com", |
|
228 "paul-NOSPAM.haahr@google.com", |
|
229 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM", |
|
230 4 }, |
|
231 { "^", |
|
232 "(START)", |
|
233 "foo", |
|
234 "(START)foo", |
|
235 "(START)foo", |
|
236 1 }, |
|
237 { "^", |
|
238 "(START)", |
|
239 "", |
|
240 "(START)", |
|
241 "(START)", |
|
242 1 }, |
|
243 { "$", |
|
244 "(END)", |
|
245 "", |
|
246 "(END)", |
|
247 "(END)", |
|
248 1 }, |
|
249 { "b", |
|
250 "bb", |
|
251 "ababababab", |
|
252 "abbabababab", |
|
253 "abbabbabbabbabb", |
|
254 5 }, |
|
255 { "b", |
|
256 "bb", |
|
257 "bbbbbb", |
|
258 "bbbbbbb", |
|
259 "bbbbbbbbbbbb", |
|
260 6 }, |
|
261 { "b+", |
|
262 "bb", |
|
263 "bbbbbb", |
|
264 "bb", |
|
265 "bb", |
|
266 1 }, |
|
267 { "b*", |
|
268 "bb", |
|
269 "bbbbbb", |
|
270 "bb", |
|
271 "bb", |
|
272 1 }, |
|
273 { "b*", |
|
274 "bb", |
|
275 "aaaaa", |
|
276 "bbaaaaa", |
|
277 "bbabbabbabbabbabb", |
|
278 6 }, |
|
279 { "b*", |
|
280 "bb", |
|
281 "aa\naa\n", |
|
282 "bbaa\naa\n", |
|
283 "bbabbabb\nbbabbabb\nbb", |
|
284 7 }, |
|
285 { "b*", |
|
286 "bb", |
|
287 "aa\raa\r", |
|
288 "bbaa\raa\r", |
|
289 "bbabbabb\rbbabbabb\rbb", |
|
290 7 }, |
|
291 { "b*", |
|
292 "bb", |
|
293 "aa\r\naa\r\n", |
|
294 "bbaa\r\naa\r\n", |
|
295 "bbabbabb\r\nbbabbabb\r\nbb", |
|
296 7 }, |
|
297 #ifdef SUPPORT_UTF8 |
|
298 { "b*", |
|
299 "bb", |
|
300 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 |
|
301 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", |
|
302 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb", |
|
303 5 }, |
|
304 { "b*", |
|
305 "bb", |
|
306 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8 |
|
307 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", |
|
308 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0" |
|
309 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"), |
|
310 9 }, |
|
311 #endif |
|
312 { "", NULL, NULL, NULL, NULL, 0 } |
|
313 }; |
|
314 |
|
315 #ifdef SUPPORT_UTF8 |
|
316 const bool support_utf8 = true; |
|
317 #else |
|
318 const bool support_utf8 = false; |
|
319 #endif |
|
320 |
|
321 for (const ReplaceTest *t = tests; t->original != NULL; ++t) { |
|
322 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8)); |
|
323 assert(re.error().empty()); |
|
324 string one(t->original); |
|
325 CHECK(re.Replace(t->rewrite, &one)); |
|
326 CHECK_EQ(one, t->single); |
|
327 string all(t->original); |
|
328 const int replace_count = re.GlobalReplace(t->rewrite, &all); |
|
329 CHECK_EQ(all, t->global); |
|
330 CHECK_EQ(replace_count, t->global_count); |
|
331 } |
|
332 |
|
333 // One final test: test \r\n replacement when we're not in CRLF mode |
|
334 { |
|
335 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8)); |
|
336 assert(re.error().empty()); |
|
337 string all("aa\r\naa\r\n"); |
|
338 CHECK_EQ(re.GlobalReplace("bb", &all), 9); |
|
339 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); |
|
340 } |
|
341 { |
|
342 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8)); |
|
343 assert(re.error().empty()); |
|
344 string all("aa\r\naa\r\n"); |
|
345 CHECK_EQ(re.GlobalReplace("bb", &all), 9); |
|
346 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); |
|
347 } |
|
348 // TODO: test what happens when no PCRE_NEWLINE_* flag is set. |
|
349 // Alas, the answer depends on how pcre was compiled. |
|
350 } |
|
351 |
|
352 static void TestExtract() { |
|
353 printf("Testing Extract\n"); |
|
354 |
|
355 string s; |
|
356 |
|
357 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s)); |
|
358 CHECK_EQ(s, "kremvax!boris"); |
|
359 |
|
360 // check the RE interface as well |
|
361 CHECK(RE(".*").Extract("'\\0'", "foo", &s)); |
|
362 CHECK_EQ(s, "'foo'"); |
|
363 CHECK(!RE("bar").Extract("'\\0'", "baz", &s)); |
|
364 CHECK_EQ(s, "'foo'"); |
|
365 } |
|
366 |
|
367 static void TestConsume() { |
|
368 printf("Testing Consume\n"); |
|
369 |
|
370 string word; |
|
371 |
|
372 string s(" aaa b!@#$@#$cccc"); |
|
373 StringPiece input(s); |
|
374 |
|
375 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace |
|
376 CHECK(r.Consume(&input, &word)); |
|
377 CHECK_EQ(word, "aaa"); |
|
378 CHECK(r.Consume(&input, &word)); |
|
379 CHECK_EQ(word, "b"); |
|
380 CHECK(! r.Consume(&input, &word)); |
|
381 } |
|
382 |
|
383 static void TestFindAndConsume() { |
|
384 printf("Testing FindAndConsume\n"); |
|
385 |
|
386 string word; |
|
387 |
|
388 string s(" aaa b!@#$@#$cccc"); |
|
389 StringPiece input(s); |
|
390 |
|
391 RE r("(\\w+)"); // matches a word |
|
392 CHECK(r.FindAndConsume(&input, &word)); |
|
393 CHECK_EQ(word, "aaa"); |
|
394 CHECK(r.FindAndConsume(&input, &word)); |
|
395 CHECK_EQ(word, "b"); |
|
396 CHECK(r.FindAndConsume(&input, &word)); |
|
397 CHECK_EQ(word, "cccc"); |
|
398 CHECK(! r.FindAndConsume(&input, &word)); |
|
399 } |
|
400 |
|
401 static void TestMatchNumberPeculiarity() { |
|
402 printf("Testing match-number peculiaraity\n"); |
|
403 |
|
404 string word1; |
|
405 string word2; |
|
406 string word3; |
|
407 |
|
408 RE r("(foo)|(bar)|(baz)"); |
|
409 CHECK(r.PartialMatch("foo", &word1, &word2, &word3)); |
|
410 CHECK_EQ(word1, "foo"); |
|
411 CHECK_EQ(word2, ""); |
|
412 CHECK_EQ(word3, ""); |
|
413 CHECK(r.PartialMatch("bar", &word1, &word2, &word3)); |
|
414 CHECK_EQ(word1, ""); |
|
415 CHECK_EQ(word2, "bar"); |
|
416 CHECK_EQ(word3, ""); |
|
417 CHECK(r.PartialMatch("baz", &word1, &word2, &word3)); |
|
418 CHECK_EQ(word1, ""); |
|
419 CHECK_EQ(word2, ""); |
|
420 CHECK_EQ(word3, "baz"); |
|
421 CHECK(!r.PartialMatch("f", &word1, &word2, &word3)); |
|
422 |
|
423 string a; |
|
424 CHECK(RE("(foo)|hello").FullMatch("hello", &a)); |
|
425 CHECK_EQ(a, ""); |
|
426 } |
|
427 |
|
428 static void TestRecursion() { |
|
429 printf("Testing recursion\n"); |
|
430 |
|
431 // Get one string that passes (sometimes), one that never does. |
|
432 string text_good("abcdefghijk"); |
|
433 string text_bad("acdefghijkl"); |
|
434 |
|
435 // According to pcretest, matching text_good against (\w+)*b |
|
436 // requires match_limit of at least 8192, and match_recursion_limit |
|
437 // of at least 37. |
|
438 |
|
439 RE_Options options_ml; |
|
440 options_ml.set_match_limit(8192); |
|
441 RE re("(\\w+)*b", options_ml); |
|
442 CHECK(re.PartialMatch(text_good) == true); |
|
443 CHECK(re.PartialMatch(text_bad) == false); |
|
444 CHECK(re.FullMatch(text_good) == false); |
|
445 CHECK(re.FullMatch(text_bad) == false); |
|
446 |
|
447 options_ml.set_match_limit(1024); |
|
448 RE re2("(\\w+)*b", options_ml); |
|
449 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit |
|
450 CHECK(re2.PartialMatch(text_bad) == false); |
|
451 CHECK(re2.FullMatch(text_good) == false); |
|
452 CHECK(re2.FullMatch(text_bad) == false); |
|
453 |
|
454 RE_Options options_mlr; |
|
455 options_mlr.set_match_limit_recursion(50); |
|
456 RE re3("(\\w+)*b", options_mlr); |
|
457 CHECK(re3.PartialMatch(text_good) == true); |
|
458 CHECK(re3.PartialMatch(text_bad) == false); |
|
459 CHECK(re3.FullMatch(text_good) == false); |
|
460 CHECK(re3.FullMatch(text_bad) == false); |
|
461 |
|
462 options_mlr.set_match_limit_recursion(10); |
|
463 RE re4("(\\w+)*b", options_mlr); |
|
464 CHECK(re4.PartialMatch(text_good) == false); |
|
465 CHECK(re4.PartialMatch(text_bad) == false); |
|
466 CHECK(re4.FullMatch(text_good) == false); |
|
467 CHECK(re4.FullMatch(text_bad) == false); |
|
468 } |
|
469 |
|
470 // A meta-quoted string, interpreted as a pattern, should always match |
|
471 // the original unquoted string. |
|
472 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) { |
|
473 string quoted = RE::QuoteMeta(unquoted); |
|
474 RE re(quoted, options); |
|
475 CHECK(re.FullMatch(unquoted)); |
|
476 } |
|
477 |
|
478 // A string containing meaningful regexp characters, which is then meta- |
|
479 // quoted, should not generally match a string the unquoted string does. |
|
480 static void NegativeTestQuoteMeta(string unquoted, string should_not_match, |
|
481 RE_Options options = RE_Options()) { |
|
482 string quoted = RE::QuoteMeta(unquoted); |
|
483 RE re(quoted, options); |
|
484 CHECK(!re.FullMatch(should_not_match)); |
|
485 } |
|
486 |
|
487 // Tests that quoted meta characters match their original strings, |
|
488 // and that a few things that shouldn't match indeed do not. |
|
489 static void TestQuotaMetaSimple() { |
|
490 TestQuoteMeta("foo"); |
|
491 TestQuoteMeta("foo.bar"); |
|
492 TestQuoteMeta("foo\\.bar"); |
|
493 TestQuoteMeta("[1-9]"); |
|
494 TestQuoteMeta("1.5-2.0?"); |
|
495 TestQuoteMeta("\\d"); |
|
496 TestQuoteMeta("Who doesn't like ice cream?"); |
|
497 TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); |
|
498 TestQuoteMeta("((?!)xxx).*yyy"); |
|
499 TestQuoteMeta("(["); |
|
500 TestQuoteMeta(string("foo\0bar", 7)); |
|
501 } |
|
502 |
|
503 static void TestQuoteMetaSimpleNegative() { |
|
504 NegativeTestQuoteMeta("foo", "bar"); |
|
505 NegativeTestQuoteMeta("...", "bar"); |
|
506 NegativeTestQuoteMeta("\\.", "."); |
|
507 NegativeTestQuoteMeta("\\.", ".."); |
|
508 NegativeTestQuoteMeta("(a)", "a"); |
|
509 NegativeTestQuoteMeta("(a|b)", "a"); |
|
510 NegativeTestQuoteMeta("(a|b)", "(a)"); |
|
511 NegativeTestQuoteMeta("(a|b)", "a|b"); |
|
512 NegativeTestQuoteMeta("[0-9]", "0"); |
|
513 NegativeTestQuoteMeta("[0-9]", "0-9"); |
|
514 NegativeTestQuoteMeta("[0-9]", "[9]"); |
|
515 NegativeTestQuoteMeta("((?!)xxx)", "xxx"); |
|
516 } |
|
517 |
|
518 static void TestQuoteMetaLatin1() { |
|
519 TestQuoteMeta("3\xb2 = 9"); |
|
520 } |
|
521 |
|
522 static void TestQuoteMetaUtf8() { |
|
523 #ifdef SUPPORT_UTF8 |
|
524 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8()); |
|
525 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8 |
|
526 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol) |
|
527 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character |
|
528 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime) |
|
529 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note) |
|
530 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work |
|
531 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol) |
|
532 "27\\\xc2\\\xb0", |
|
533 pcrecpp::UTF8()); |
|
534 #endif |
|
535 } |
|
536 |
|
537 static void TestQuoteMetaAll() { |
|
538 printf("Testing QuoteMeta\n"); |
|
539 TestQuotaMetaSimple(); |
|
540 TestQuoteMetaSimpleNegative(); |
|
541 TestQuoteMetaLatin1(); |
|
542 TestQuoteMetaUtf8(); |
|
543 } |
|
544 |
|
545 // |
|
546 // Options tests contributed by |
|
547 // Giuseppe Maxia, CTO, Stardata s.r.l. |
|
548 // July 2005 |
|
549 // |
|
550 static void GetOneOptionResult( |
|
551 const char *option_name, |
|
552 const char *regex, |
|
553 const char *str, |
|
554 RE_Options options, |
|
555 bool full, |
|
556 string expected) { |
|
557 |
|
558 printf("Testing Option <%s>\n", option_name); |
|
559 if(VERBOSE_TEST) |
|
560 printf("/%s/ finds \"%s\" within \"%s\" \n", |
|
561 regex, |
|
562 expected.c_str(), |
|
563 str); |
|
564 string captured(""); |
|
565 if (full) |
|
566 RE(regex,options).FullMatch(str, &captured); |
|
567 else |
|
568 RE(regex,options).PartialMatch(str, &captured); |
|
569 CHECK_EQ(captured, expected); |
|
570 } |
|
571 |
|
572 static void TestOneOption( |
|
573 const char *option_name, |
|
574 const char *regex, |
|
575 const char *str, |
|
576 RE_Options options, |
|
577 bool full, |
|
578 bool assertive = true) { |
|
579 |
|
580 printf("Testing Option <%s>\n", option_name); |
|
581 if (VERBOSE_TEST) |
|
582 printf("'%s' %s /%s/ \n", |
|
583 str, |
|
584 (assertive? "matches" : "doesn't match"), |
|
585 regex); |
|
586 if (assertive) { |
|
587 if (full) |
|
588 CHECK(RE(regex,options).FullMatch(str)); |
|
589 else |
|
590 CHECK(RE(regex,options).PartialMatch(str)); |
|
591 } else { |
|
592 if (full) |
|
593 CHECK(!RE(regex,options).FullMatch(str)); |
|
594 else |
|
595 CHECK(!RE(regex,options).PartialMatch(str)); |
|
596 } |
|
597 } |
|
598 |
|
599 static void Test_CASELESS() { |
|
600 RE_Options options; |
|
601 RE_Options options2; |
|
602 |
|
603 options.set_caseless(true); |
|
604 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false); |
|
605 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false); |
|
606 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false); |
|
607 |
|
608 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false); |
|
609 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false); |
|
610 options.set_caseless(false); |
|
611 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false); |
|
612 } |
|
613 |
|
614 static void Test_MULTILINE() { |
|
615 RE_Options options; |
|
616 RE_Options options2; |
|
617 const char *str = "HELLO\n" "cruel\n" "world\n"; |
|
618 |
|
619 options.set_multiline(true); |
|
620 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false); |
|
621 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false); |
|
622 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false); |
|
623 options.set_multiline(false); |
|
624 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false); |
|
625 } |
|
626 |
|
627 static void Test_DOTALL() { |
|
628 RE_Options options; |
|
629 RE_Options options2; |
|
630 const char *str = "HELLO\n" "cruel\n" "world"; |
|
631 |
|
632 options.set_dotall(true); |
|
633 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true); |
|
634 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true); |
|
635 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true); |
|
636 options.set_dotall(false); |
|
637 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false); |
|
638 } |
|
639 |
|
640 static void Test_DOLLAR_ENDONLY() { |
|
641 RE_Options options; |
|
642 RE_Options options2; |
|
643 const char *str = "HELLO world\n"; |
|
644 |
|
645 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false); |
|
646 options.set_dollar_endonly(true); |
|
647 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false); |
|
648 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false); |
|
649 } |
|
650 |
|
651 static void Test_EXTRA() { |
|
652 RE_Options options; |
|
653 const char *str = "HELLO"; |
|
654 |
|
655 options.set_extra(true); |
|
656 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false ); |
|
657 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false ); |
|
658 options.set_extra(false); |
|
659 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true ); |
|
660 } |
|
661 |
|
662 static void Test_EXTENDED() { |
|
663 RE_Options options; |
|
664 RE_Options options2; |
|
665 const char *str = "HELLO world"; |
|
666 |
|
667 options.set_extended(true); |
|
668 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false); |
|
669 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false); |
|
670 TestOneOption("EXTENDED (class)", |
|
671 "^ HE L{2} O " |
|
672 "\\s+ " |
|
673 "\\w+ $ ", |
|
674 str, |
|
675 options, |
|
676 false); |
|
677 |
|
678 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false); |
|
679 TestOneOption("EXTENDED (function)", |
|
680 "^ HE L{2} O " |
|
681 "\\s+ " |
|
682 "\\w+ $ ", |
|
683 str, |
|
684 pcrecpp::EXTENDED(), |
|
685 false); |
|
686 |
|
687 options.set_extended(false); |
|
688 TestOneOption("no EXTENDED", "HELLO world", str, options, false); |
|
689 } |
|
690 |
|
691 static void Test_NO_AUTO_CAPTURE() { |
|
692 RE_Options options; |
|
693 const char *str = "HELLO world"; |
|
694 string captured; |
|
695 |
|
696 printf("Testing Option <no NO_AUTO_CAPTURE>\n"); |
|
697 if (VERBOSE_TEST) |
|
698 printf("parentheses capture text\n"); |
|
699 RE re("(world|universe)$", options); |
|
700 CHECK(re.Extract("\\1", str , &captured)); |
|
701 CHECK_EQ(captured, "world"); |
|
702 options.set_no_auto_capture(true); |
|
703 printf("testing Option <NO_AUTO_CAPTURE>\n"); |
|
704 if (VERBOSE_TEST) |
|
705 printf("parentheses do not capture text\n"); |
|
706 re.Extract("\\1",str, &captured ); |
|
707 CHECK_EQ(captured, "world"); |
|
708 } |
|
709 |
|
710 static void Test_UNGREEDY() { |
|
711 RE_Options options; |
|
712 const char *str = "HELLO, 'this' is the 'world'"; |
|
713 |
|
714 options.set_ungreedy(true); |
|
715 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" ); |
|
716 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" ); |
|
717 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" ); |
|
718 |
|
719 options.set_ungreedy(false); |
|
720 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" ); |
|
721 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" ); |
|
722 } |
|
723 |
|
724 static void Test_all_options() { |
|
725 const char *str = "HELLO\n" "cruel\n" "world"; |
|
726 RE_Options options; |
|
727 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL); |
|
728 |
|
729 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false); |
|
730 options.set_all_options(0); |
|
731 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false); |
|
732 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED); |
|
733 |
|
734 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false); |
|
735 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor", |
|
736 " ^ c r u e l $ ", |
|
737 str, |
|
738 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED), |
|
739 false); |
|
740 |
|
741 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation", |
|
742 " ^ c r u e l $ ", |
|
743 str, |
|
744 RE_Options() |
|
745 .set_multiline(true) |
|
746 .set_extended(true), |
|
747 false); |
|
748 |
|
749 options.set_all_options(0); |
|
750 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false); |
|
751 |
|
752 } |
|
753 |
|
754 static void TestOptions() { |
|
755 printf("Testing Options\n"); |
|
756 Test_CASELESS(); |
|
757 Test_MULTILINE(); |
|
758 Test_DOTALL(); |
|
759 Test_DOLLAR_ENDONLY(); |
|
760 Test_EXTENDED(); |
|
761 Test_NO_AUTO_CAPTURE(); |
|
762 Test_UNGREEDY(); |
|
763 Test_EXTRA(); |
|
764 Test_all_options(); |
|
765 } |
|
766 |
|
767 static void TestConstructors() { |
|
768 printf("Testing constructors\n"); |
|
769 |
|
770 RE_Options options; |
|
771 options.set_dotall(true); |
|
772 const char *str = "HELLO\n" "cruel\n" "world"; |
|
773 |
|
774 RE orig("HELLO.*world", options); |
|
775 CHECK(orig.FullMatch(str)); |
|
776 |
|
777 RE copy1(orig); |
|
778 CHECK(copy1.FullMatch(str)); |
|
779 |
|
780 RE copy2("not a match"); |
|
781 CHECK(!copy2.FullMatch(str)); |
|
782 copy2 = copy1; |
|
783 CHECK(copy2.FullMatch(str)); |
|
784 copy2 = orig; |
|
785 CHECK(copy2.FullMatch(str)); |
|
786 |
|
787 // Make sure when we assign to ourselves, nothing bad happens |
|
788 orig = orig; |
|
789 copy1 = copy1; |
|
790 copy2 = copy2; |
|
791 CHECK(orig.FullMatch(str)); |
|
792 CHECK(copy1.FullMatch(str)); |
|
793 CHECK(copy2.FullMatch(str)); |
|
794 } |
|
795 |
|
796 int main(int argc, char** argv) { |
|
797 // Treat any flag as --help |
|
798 if (argc > 1 && argv[1][0] == '-') { |
|
799 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n" |
|
800 " If 'timingX ###' is specified, run the given timing test\n" |
|
801 " with the given number of iterations, rather than running\n" |
|
802 " the default corectness test.\n", argv[0]); |
|
803 return 0; |
|
804 } |
|
805 |
|
806 if (argc > 1) { |
|
807 if ( argc == 2 || atoi(argv[2]) == 0) { |
|
808 printf("timing mode needs a num-iters argument\n"); |
|
809 return 1; |
|
810 } |
|
811 if (!strcmp(argv[1], "timing1")) |
|
812 Timing1(atoi(argv[2])); |
|
813 else if (!strcmp(argv[1], "timing2")) |
|
814 Timing2(atoi(argv[2])); |
|
815 else if (!strcmp(argv[1], "timing3")) |
|
816 Timing3(atoi(argv[2])); |
|
817 else |
|
818 printf("Unknown argument '%s'\n", argv[1]); |
|
819 return 0; |
|
820 } |
|
821 |
|
822 printf("Testing FullMatch\n"); |
|
823 |
|
824 int i; |
|
825 string s; |
|
826 |
|
827 /***** FullMatch with no args *****/ |
|
828 |
|
829 CHECK(RE("h.*o").FullMatch("hello")); |
|
830 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front |
|
831 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end |
|
832 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op |
|
833 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op |
|
834 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops |
|
835 |
|
836 /***** FullMatch with args *****/ |
|
837 |
|
838 // Zero-arg |
|
839 CHECK(RE("\\d+").FullMatch("1001")); |
|
840 |
|
841 // Single-arg |
|
842 CHECK(RE("(\\d+)").FullMatch("1001", &i)); |
|
843 CHECK_EQ(i, 1001); |
|
844 CHECK(RE("(-?\\d+)").FullMatch("-123", &i)); |
|
845 CHECK_EQ(i, -123); |
|
846 CHECK(!RE("()\\d+").FullMatch("10", &i)); |
|
847 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890", |
|
848 &i)); |
|
849 |
|
850 // Digits surrounding integer-arg |
|
851 CHECK(RE("1(\\d*)4").FullMatch("1234", &i)); |
|
852 CHECK_EQ(i, 23); |
|
853 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i)); |
|
854 CHECK_EQ(i, 1); |
|
855 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i)); |
|
856 CHECK_EQ(i, -1); |
|
857 CHECK(RE("(\\d)").PartialMatch("1234", &i)); |
|
858 CHECK_EQ(i, 1); |
|
859 CHECK(RE("(-\\d)").PartialMatch("-1234", &i)); |
|
860 CHECK_EQ(i, -1); |
|
861 |
|
862 // String-arg |
|
863 CHECK(RE("h(.*)o").FullMatch("hello", &s)); |
|
864 CHECK_EQ(s, string("ell")); |
|
865 |
|
866 // StringPiece-arg |
|
867 StringPiece sp; |
|
868 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i)); |
|
869 CHECK_EQ(sp.size(), 4); |
|
870 CHECK(memcmp(sp.data(), "ruby", 4) == 0); |
|
871 CHECK_EQ(i, 1234); |
|
872 |
|
873 // Multi-arg |
|
874 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i)); |
|
875 CHECK_EQ(s, string("ruby")); |
|
876 CHECK_EQ(i, 1234); |
|
877 |
|
878 // Ignore non-void* NULL arg |
|
879 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL)); |
|
880 CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL)); |
|
881 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL)); |
|
882 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL)); |
|
883 #ifdef HAVE_LONG_LONG |
|
884 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL)); |
|
885 #endif |
|
886 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL)); |
|
887 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL)); |
|
888 |
|
889 // Fail on non-void* NULL arg if the match doesn't parse for the given type. |
|
890 CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL)); |
|
891 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL)); |
|
892 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL)); |
|
893 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL)); |
|
894 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL)); |
|
895 |
|
896 // Ignored arg |
|
897 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i)); |
|
898 CHECK_EQ(s, string("ruby")); |
|
899 CHECK_EQ(i, 1234); |
|
900 |
|
901 // Type tests |
|
902 { |
|
903 char c; |
|
904 CHECK(RE("(H)ello").FullMatch("Hello", &c)); |
|
905 CHECK_EQ(c, 'H'); |
|
906 } |
|
907 { |
|
908 unsigned char c; |
|
909 CHECK(RE("(H)ello").FullMatch("Hello", &c)); |
|
910 CHECK_EQ(c, static_cast<unsigned char>('H')); |
|
911 } |
|
912 { |
|
913 short v; |
|
914 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
|
915 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); |
|
916 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); |
|
917 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768); |
|
918 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v)); |
|
919 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v)); |
|
920 } |
|
921 { |
|
922 unsigned short v; |
|
923 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
|
924 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); |
|
925 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535); |
|
926 CHECK(!RE("(\\d+)").FullMatch("65536", &v)); |
|
927 } |
|
928 { |
|
929 int v; |
|
930 static const int max_value = 0x7fffffff; |
|
931 static const int min_value = -max_value - 1; |
|
932 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
|
933 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); |
|
934 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value); |
|
935 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value); |
|
936 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v)); |
|
937 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v)); |
|
938 } |
|
939 { |
|
940 unsigned int v; |
|
941 static const unsigned int max_value = 0xfffffffful; |
|
942 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
|
943 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value); |
|
944 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v)); |
|
945 } |
|
946 #ifdef HAVE_LONG_LONG |
|
947 # if defined(__MINGW__) || defined(__MINGW32__) |
|
948 # define LLD "%I64d" |
|
949 # define LLU "%I64u" |
|
950 # else |
|
951 # define LLD "%lld" |
|
952 # define LLU "%llu" |
|
953 # endif |
|
954 { |
|
955 long long v; |
|
956 static const long long max_value = 0x7fffffffffffffffLL; |
|
957 static const long long min_value = -max_value - 1; |
|
958 char buf[32]; // definitely big enough for a long long |
|
959 |
|
960 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
|
961 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100); |
|
962 |
|
963 sprintf(buf, LLD, max_value); |
|
964 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); |
|
965 |
|
966 sprintf(buf, LLD, min_value); |
|
967 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value); |
|
968 |
|
969 sprintf(buf, LLD, max_value); |
|
970 assert(buf[strlen(buf)-1] != '9'); |
|
971 buf[strlen(buf)-1]++; |
|
972 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
|
973 |
|
974 sprintf(buf, LLD, min_value); |
|
975 assert(buf[strlen(buf)-1] != '9'); |
|
976 buf[strlen(buf)-1]++; |
|
977 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
|
978 } |
|
979 #endif |
|
980 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG |
|
981 { |
|
982 unsigned long long v; |
|
983 long long v2; |
|
984 static const unsigned long long max_value = 0xffffffffffffffffULL; |
|
985 char buf[32]; // definitely big enough for a unsigned long long |
|
986 |
|
987 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100); |
|
988 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100); |
|
989 |
|
990 sprintf(buf, LLU, max_value); |
|
991 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); |
|
992 |
|
993 assert(buf[strlen(buf)-1] != '9'); |
|
994 buf[strlen(buf)-1]++; |
|
995 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
|
996 } |
|
997 #endif |
|
998 { |
|
999 float v; |
|
1000 CHECK(RE("(.*)").FullMatch("100", &v)); |
|
1001 CHECK(RE("(.*)").FullMatch("-100.", &v)); |
|
1002 CHECK(RE("(.*)").FullMatch("1e23", &v)); |
|
1003 } |
|
1004 { |
|
1005 double v; |
|
1006 CHECK(RE("(.*)").FullMatch("100", &v)); |
|
1007 CHECK(RE("(.*)").FullMatch("-100.", &v)); |
|
1008 CHECK(RE("(.*)").FullMatch("1e23", &v)); |
|
1009 } |
|
1010 |
|
1011 // Check that matching is fully anchored |
|
1012 CHECK(!RE("(\\d+)").FullMatch("x1001", &i)); |
|
1013 CHECK(!RE("(\\d+)").FullMatch("1001x", &i)); |
|
1014 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001); |
|
1015 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001); |
|
1016 |
|
1017 // Braces |
|
1018 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd")); |
|
1019 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde")); |
|
1020 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc")); |
|
1021 |
|
1022 // Complicated RE |
|
1023 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo")); |
|
1024 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar")); |
|
1025 CHECK(RE("foo|bar|[A-Z]").FullMatch("X")); |
|
1026 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY")); |
|
1027 |
|
1028 // Check full-match handling (needs '$' tacked on internally) |
|
1029 CHECK(RE("fo|foo").FullMatch("fo")); |
|
1030 CHECK(RE("fo|foo").FullMatch("foo")); |
|
1031 CHECK(RE("fo|foo$").FullMatch("fo")); |
|
1032 CHECK(RE("fo|foo$").FullMatch("foo")); |
|
1033 CHECK(RE("foo$").FullMatch("foo")); |
|
1034 CHECK(!RE("foo\\$").FullMatch("foo$bar")); |
|
1035 CHECK(!RE("fo|bar").FullMatch("fox")); |
|
1036 |
|
1037 // Uncomment the following if we change the handling of '$' to |
|
1038 // prevent it from matching a trailing newline |
|
1039 if (false) { |
|
1040 // Check that we don't get bitten by pcre's special handling of a |
|
1041 // '\n' at the end of the string matching '$' |
|
1042 CHECK(!RE("foo$").PartialMatch("foo\n")); |
|
1043 } |
|
1044 |
|
1045 // Number of args |
|
1046 int a[16]; |
|
1047 CHECK(RE("").FullMatch("")); |
|
1048 |
|
1049 memset(a, 0, sizeof(0)); |
|
1050 CHECK(RE("(\\d){1}").FullMatch("1", |
|
1051 &a[0])); |
|
1052 CHECK_EQ(a[0], 1); |
|
1053 |
|
1054 memset(a, 0, sizeof(0)); |
|
1055 CHECK(RE("(\\d)(\\d)").FullMatch("12", |
|
1056 &a[0], &a[1])); |
|
1057 CHECK_EQ(a[0], 1); |
|
1058 CHECK_EQ(a[1], 2); |
|
1059 |
|
1060 memset(a, 0, sizeof(0)); |
|
1061 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123", |
|
1062 &a[0], &a[1], &a[2])); |
|
1063 CHECK_EQ(a[0], 1); |
|
1064 CHECK_EQ(a[1], 2); |
|
1065 CHECK_EQ(a[2], 3); |
|
1066 |
|
1067 memset(a, 0, sizeof(0)); |
|
1068 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234", |
|
1069 &a[0], &a[1], &a[2], &a[3])); |
|
1070 CHECK_EQ(a[0], 1); |
|
1071 CHECK_EQ(a[1], 2); |
|
1072 CHECK_EQ(a[2], 3); |
|
1073 CHECK_EQ(a[3], 4); |
|
1074 |
|
1075 memset(a, 0, sizeof(0)); |
|
1076 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345", |
|
1077 &a[0], &a[1], &a[2], |
|
1078 &a[3], &a[4])); |
|
1079 CHECK_EQ(a[0], 1); |
|
1080 CHECK_EQ(a[1], 2); |
|
1081 CHECK_EQ(a[2], 3); |
|
1082 CHECK_EQ(a[3], 4); |
|
1083 CHECK_EQ(a[4], 5); |
|
1084 |
|
1085 memset(a, 0, sizeof(0)); |
|
1086 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456", |
|
1087 &a[0], &a[1], &a[2], |
|
1088 &a[3], &a[4], &a[5])); |
|
1089 CHECK_EQ(a[0], 1); |
|
1090 CHECK_EQ(a[1], 2); |
|
1091 CHECK_EQ(a[2], 3); |
|
1092 CHECK_EQ(a[3], 4); |
|
1093 CHECK_EQ(a[4], 5); |
|
1094 CHECK_EQ(a[5], 6); |
|
1095 |
|
1096 memset(a, 0, sizeof(0)); |
|
1097 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567", |
|
1098 &a[0], &a[1], &a[2], &a[3], |
|
1099 &a[4], &a[5], &a[6])); |
|
1100 CHECK_EQ(a[0], 1); |
|
1101 CHECK_EQ(a[1], 2); |
|
1102 CHECK_EQ(a[2], 3); |
|
1103 CHECK_EQ(a[3], 4); |
|
1104 CHECK_EQ(a[4], 5); |
|
1105 CHECK_EQ(a[5], 6); |
|
1106 CHECK_EQ(a[6], 7); |
|
1107 |
|
1108 memset(a, 0, sizeof(0)); |
|
1109 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)" |
|
1110 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch( |
|
1111 "1234567890123456", |
|
1112 &a[0], &a[1], &a[2], &a[3], |
|
1113 &a[4], &a[5], &a[6], &a[7], |
|
1114 &a[8], &a[9], &a[10], &a[11], |
|
1115 &a[12], &a[13], &a[14], &a[15])); |
|
1116 CHECK_EQ(a[0], 1); |
|
1117 CHECK_EQ(a[1], 2); |
|
1118 CHECK_EQ(a[2], 3); |
|
1119 CHECK_EQ(a[3], 4); |
|
1120 CHECK_EQ(a[4], 5); |
|
1121 CHECK_EQ(a[5], 6); |
|
1122 CHECK_EQ(a[6], 7); |
|
1123 CHECK_EQ(a[7], 8); |
|
1124 CHECK_EQ(a[8], 9); |
|
1125 CHECK_EQ(a[9], 0); |
|
1126 CHECK_EQ(a[10], 1); |
|
1127 CHECK_EQ(a[11], 2); |
|
1128 CHECK_EQ(a[12], 3); |
|
1129 CHECK_EQ(a[13], 4); |
|
1130 CHECK_EQ(a[14], 5); |
|
1131 CHECK_EQ(a[15], 6); |
|
1132 |
|
1133 /***** PartialMatch *****/ |
|
1134 |
|
1135 printf("Testing PartialMatch\n"); |
|
1136 |
|
1137 CHECK(RE("h.*o").PartialMatch("hello")); |
|
1138 CHECK(RE("h.*o").PartialMatch("othello")); |
|
1139 CHECK(RE("h.*o").PartialMatch("hello!")); |
|
1140 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x")); |
|
1141 |
|
1142 /***** other tests *****/ |
|
1143 |
|
1144 RadixTests(); |
|
1145 TestReplace(); |
|
1146 TestExtract(); |
|
1147 TestConsume(); |
|
1148 TestFindAndConsume(); |
|
1149 TestQuoteMetaAll(); |
|
1150 TestMatchNumberPeculiarity(); |
|
1151 |
|
1152 // Check the pattern() accessor |
|
1153 { |
|
1154 const string kPattern = "http://([^/]+)/.*"; |
|
1155 const RE re(kPattern); |
|
1156 CHECK_EQ(kPattern, re.pattern()); |
|
1157 } |
|
1158 |
|
1159 // Check RE error field. |
|
1160 { |
|
1161 RE re("foo"); |
|
1162 CHECK(re.error().empty()); // Must have no error |
|
1163 } |
|
1164 |
|
1165 #ifdef SUPPORT_UTF8 |
|
1166 // Check UTF-8 handling |
|
1167 { |
|
1168 printf("Testing UTF-8 handling\n"); |
|
1169 |
|
1170 // Three Japanese characters (nihongo) |
|
1171 const unsigned char utf8_string[] = { |
|
1172 0xe6, 0x97, 0xa5, // 65e5 |
|
1173 0xe6, 0x9c, 0xac, // 627c |
|
1174 0xe8, 0xaa, 0x9e, // 8a9e |
|
1175 0 |
|
1176 }; |
|
1177 const unsigned char utf8_pattern[] = { |
|
1178 '.', |
|
1179 0xe6, 0x9c, 0xac, // 627c |
|
1180 '.', |
|
1181 0 |
|
1182 }; |
|
1183 |
|
1184 // Both should match in either mode, bytes or UTF-8 |
|
1185 RE re_test1("........."); |
|
1186 CHECK(re_test1.FullMatch(utf8_string)); |
|
1187 RE re_test2("...", pcrecpp::UTF8()); |
|
1188 CHECK(re_test2.FullMatch(utf8_string)); |
|
1189 |
|
1190 // Check that '.' matches one byte or UTF-8 character |
|
1191 // according to the mode. |
|
1192 string ss; |
|
1193 RE re_test3("(.)"); |
|
1194 CHECK(re_test3.PartialMatch(utf8_string, &ss)); |
|
1195 CHECK_EQ(ss, string("\xe6")); |
|
1196 RE re_test4("(.)", pcrecpp::UTF8()); |
|
1197 CHECK(re_test4.PartialMatch(utf8_string, &ss)); |
|
1198 CHECK_EQ(ss, string("\xe6\x97\xa5")); |
|
1199 |
|
1200 // Check that string matches itself in either mode |
|
1201 RE re_test5(utf8_string); |
|
1202 CHECK(re_test5.FullMatch(utf8_string)); |
|
1203 RE re_test6(utf8_string, pcrecpp::UTF8()); |
|
1204 CHECK(re_test6.FullMatch(utf8_string)); |
|
1205 |
|
1206 // Check that pattern matches string only in UTF8 mode |
|
1207 RE re_test7(utf8_pattern); |
|
1208 CHECK(!re_test7.FullMatch(utf8_string)); |
|
1209 RE re_test8(utf8_pattern, pcrecpp::UTF8()); |
|
1210 CHECK(re_test8.FullMatch(utf8_string)); |
|
1211 } |
|
1212 |
|
1213 // Check that ungreedy, UTF8 regular expressions don't match when they |
|
1214 // oughtn't -- see bug 82246. |
|
1215 { |
|
1216 // This code always worked. |
|
1217 const char* pattern = "\\w+X"; |
|
1218 const string target = "a aX"; |
|
1219 RE match_sentence(pattern); |
|
1220 RE match_sentence_re(pattern, pcrecpp::UTF8()); |
|
1221 |
|
1222 CHECK(!match_sentence.FullMatch(target)); |
|
1223 CHECK(!match_sentence_re.FullMatch(target)); |
|
1224 } |
|
1225 |
|
1226 { |
|
1227 const char* pattern = "(?U)\\w+X"; |
|
1228 const string target = "a aX"; |
|
1229 RE match_sentence(pattern); |
|
1230 RE match_sentence_re(pattern, pcrecpp::UTF8()); |
|
1231 |
|
1232 CHECK(!match_sentence.FullMatch(target)); |
|
1233 CHECK(!match_sentence_re.FullMatch(target)); |
|
1234 } |
|
1235 #endif /* def SUPPORT_UTF8 */ |
|
1236 |
|
1237 printf("Testing error reporting\n"); |
|
1238 |
|
1239 { RE re("a\\1"); CHECK(!re.error().empty()); } |
|
1240 { |
|
1241 RE re("a[x"); |
|
1242 CHECK(!re.error().empty()); |
|
1243 } |
|
1244 { |
|
1245 RE re("a[z-a]"); |
|
1246 CHECK(!re.error().empty()); |
|
1247 } |
|
1248 { |
|
1249 RE re("a[[:foobar:]]"); |
|
1250 CHECK(!re.error().empty()); |
|
1251 } |
|
1252 { |
|
1253 RE re("a(b"); |
|
1254 CHECK(!re.error().empty()); |
|
1255 } |
|
1256 { |
|
1257 RE re("a\\"); |
|
1258 CHECK(!re.error().empty()); |
|
1259 } |
|
1260 |
|
1261 // Test that recursion is stopped |
|
1262 TestRecursion(); |
|
1263 |
|
1264 // Test Options |
|
1265 if (getenv("VERBOSE_TEST") != NULL) |
|
1266 VERBOSE_TEST = true; |
|
1267 TestOptions(); |
|
1268 |
|
1269 // Test the constructors |
|
1270 TestConstructors(); |
|
1271 |
|
1272 // Done |
|
1273 printf("OK\n"); |
|
1274 |
|
1275 return 0; |
|
1276 } |