libraries/spcre/libpcre/pcre/pcrecpp_unittest.cc
changeset 0 7f656887cf89
equal deleted inserted replaced
-1:000000000000 0:7f656887cf89
       
     1 // -*- coding: utf-8 -*-
       
     2 //
       
     3 // Copyright (c) 2005 - 2006, Google Inc.
       
     4 // All rights reserved.
       
     5 //
       
     6 // Redistribution and use in source and binary forms, with or without
       
     7 // modification, are permitted provided that the following conditions are
       
     8 // met:
       
     9 //
       
    10 //     * Redistributions of source code must retain the above copyright
       
    11 // notice, this list of conditions and the following disclaimer.
       
    12 //     * Redistributions in binary form must reproduce the above
       
    13 // copyright notice, this list of conditions and the following disclaimer
       
    14 // in the documentation and/or other materials provided with the
       
    15 // distribution.
       
    16 //     * Neither the name of Google Inc. nor the names of its
       
    17 // contributors may be used to endorse or promote products derived from
       
    18 // this software without specific prior written permission.
       
    19 //
       
    20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
       
    21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
       
    22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
       
    23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
       
    24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
       
    25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
       
    26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       
    27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       
    28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    31 //
       
    32 // Author: Sanjay Ghemawat
       
    33 //
       
    34 // TODO: Test extractions for PartialMatch/Consume
       
    35 
       
    36 #ifdef HAVE_CONFIG_H
       
    37 #include "config.h"
       
    38 #endif
       
    39 
       
    40 #include <stdio.h>
       
    41 #include <cassert>
       
    42 #include <vector>
       
    43 #include "pcrecpp.h"
       
    44 
       
    45 using pcrecpp::StringPiece;
       
    46 using pcrecpp::RE;
       
    47 using pcrecpp::RE_Options;
       
    48 using pcrecpp::Hex;
       
    49 using pcrecpp::Octal;
       
    50 using pcrecpp::CRadix;
       
    51 
       
    52 static bool VERBOSE_TEST  = false;
       
    53 
       
    54 // CHECK dies with a fatal error if condition is not true.  It is *not*
       
    55 // controlled by NDEBUG, so the check will be executed regardless of
       
    56 // compilation mode.  Therefore, it is safe to do things like:
       
    57 //    CHECK_EQ(fp->Write(x), 4)
       
    58 #define CHECK(condition) do {                           \
       
    59   if (!(condition)) {                                   \
       
    60     fprintf(stderr, "%s:%d: Check failed: %s\n",        \
       
    61             __FILE__, __LINE__, #condition);            \
       
    62     exit(1);                                            \
       
    63   }                                                     \
       
    64 } while (0)
       
    65 
       
    66 #define CHECK_EQ(a, b)   CHECK(a == b)
       
    67 
       
    68 static void Timing1(int num_iters) {
       
    69   // Same pattern lots of times
       
    70   RE pattern("ruby:\\d+");
       
    71   StringPiece p("ruby:1234");
       
    72   for (int j = num_iters; j > 0; j--) {
       
    73     CHECK(pattern.FullMatch(p));
       
    74   }
       
    75 }
       
    76 
       
    77 static void Timing2(int num_iters) {
       
    78   // Same pattern lots of times
       
    79   RE pattern("ruby:(\\d+)");
       
    80   int i;
       
    81   for (int j = num_iters; j > 0; j--) {
       
    82     CHECK(pattern.FullMatch("ruby:1234", &i));
       
    83     CHECK_EQ(i, 1234);
       
    84   }
       
    85 }
       
    86 
       
    87 static void Timing3(int num_iters) {
       
    88   string text_string;
       
    89   for (int j = num_iters; j > 0; j--) {
       
    90     text_string += "this is another line\n";
       
    91   }
       
    92 
       
    93   RE line_matcher(".*\n");
       
    94   string line;
       
    95   StringPiece text(text_string);
       
    96   int counter = 0;
       
    97   while (line_matcher.Consume(&text)) {
       
    98     counter++;
       
    99   }
       
   100   printf("Matched %d lines\n", counter);
       
   101 }
       
   102 
       
   103 #if 0  // uncomment this if you have a way of defining VirtualProcessSize()
       
   104 
       
   105 static void LeakTest() {
       
   106   // Check for memory leaks
       
   107   unsigned long long initial_size = 0;
       
   108   for (int i = 0; i < 100000; i++) {
       
   109     if (i == 50000) {
       
   110       initial_size = VirtualProcessSize();
       
   111       printf("Size after 50000: %llu\n", initial_size);
       
   112     }
       
   113     char buf[100];  // definitely big enough
       
   114     sprintf(buf, "pat%09d", i);
       
   115     RE newre(buf);
       
   116   }
       
   117   uint64 final_size = VirtualProcessSize();
       
   118   printf("Size after 100000: %llu\n", final_size);
       
   119   const double growth = double(final_size - initial_size) / final_size;
       
   120   printf("Growth: %0.2f%%", growth * 100);
       
   121   CHECK(growth < 0.02);       // Allow < 2% growth
       
   122 }
       
   123 
       
   124 #endif
       
   125 
       
   126 static void RadixTests() {
       
   127   printf("Testing hex\n");
       
   128 
       
   129 #define CHECK_HEX(type, value) \
       
   130   do { \
       
   131     type v; \
       
   132     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
       
   133     CHECK_EQ(v, 0x ## value); \
       
   134     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
       
   135     CHECK_EQ(v, 0x ## value); \
       
   136   } while(0)
       
   137 
       
   138   CHECK_HEX(short,              2bad);
       
   139   CHECK_HEX(unsigned short,     2badU);
       
   140   CHECK_HEX(int,                dead);
       
   141   CHECK_HEX(unsigned int,       deadU);
       
   142   CHECK_HEX(long,               7eadbeefL);
       
   143   CHECK_HEX(unsigned long,      deadbeefUL);
       
   144 #ifdef HAVE_LONG_LONG
       
   145   CHECK_HEX(long long,          12345678deadbeefLL);
       
   146 #endif
       
   147 #ifdef HAVE_UNSIGNED_LONG_LONG
       
   148   CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
       
   149 #endif
       
   150 
       
   151 #undef CHECK_HEX
       
   152 
       
   153   printf("Testing octal\n");
       
   154 
       
   155 #define CHECK_OCTAL(type, value) \
       
   156   do { \
       
   157     type v; \
       
   158     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
       
   159     CHECK_EQ(v, 0 ## value); \
       
   160     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
       
   161     CHECK_EQ(v, 0 ## value); \
       
   162   } while(0)
       
   163 
       
   164   CHECK_OCTAL(short,              77777);
       
   165   CHECK_OCTAL(unsigned short,     177777U);
       
   166   CHECK_OCTAL(int,                17777777777);
       
   167   CHECK_OCTAL(unsigned int,       37777777777U);
       
   168   CHECK_OCTAL(long,               17777777777L);
       
   169   CHECK_OCTAL(unsigned long,      37777777777UL);
       
   170 #ifdef HAVE_LONG_LONG
       
   171   CHECK_OCTAL(long long,          777777777777777777777LL);
       
   172 #endif
       
   173 #ifdef HAVE_UNSIGNED_LONG_LONG
       
   174   CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
       
   175 #endif
       
   176 
       
   177 #undef CHECK_OCTAL
       
   178 
       
   179   printf("Testing decimal\n");
       
   180 
       
   181 #define CHECK_DECIMAL(type, value) \
       
   182   do { \
       
   183     type v; \
       
   184     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
       
   185     CHECK_EQ(v, value); \
       
   186     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
       
   187     CHECK_EQ(v, value); \
       
   188   } while(0)
       
   189 
       
   190   CHECK_DECIMAL(short,              -1);
       
   191   CHECK_DECIMAL(unsigned short,     9999);
       
   192   CHECK_DECIMAL(int,                -1000);
       
   193   CHECK_DECIMAL(unsigned int,       12345U);
       
   194   CHECK_DECIMAL(long,               -10000000L);
       
   195   CHECK_DECIMAL(unsigned long,      3083324652U);
       
   196 #ifdef HAVE_LONG_LONG
       
   197   CHECK_DECIMAL(long long,          -100000000000000LL);
       
   198 #endif
       
   199 #ifdef HAVE_UNSIGNED_LONG_LONG
       
   200   CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
       
   201 #endif
       
   202 
       
   203 #undef CHECK_DECIMAL
       
   204 
       
   205 }
       
   206 
       
   207 static void TestReplace() {
       
   208   printf("Testing Replace\n");
       
   209 
       
   210   struct ReplaceTest {
       
   211     const char *regexp;
       
   212     const char *rewrite;
       
   213     const char *original;
       
   214     const char *single;
       
   215     const char *global;
       
   216     int global_count;         // the expected return value from ReplaceAll
       
   217   };
       
   218   static const ReplaceTest tests[] = {
       
   219     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
       
   220       "\\2\\1ay",
       
   221       "the quick brown fox jumps over the lazy dogs.",
       
   222       "ethay quick brown fox jumps over the lazy dogs.",
       
   223       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
       
   224       9 },
       
   225     { "\\w+",
       
   226       "\\0-NOSPAM",
       
   227       "paul.haahr@google.com",
       
   228       "paul-NOSPAM.haahr@google.com",
       
   229       "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
       
   230       4 },
       
   231     { "^",
       
   232       "(START)",
       
   233       "foo",
       
   234       "(START)foo",
       
   235       "(START)foo",
       
   236       1 },
       
   237     { "^",
       
   238       "(START)",
       
   239       "",
       
   240       "(START)",
       
   241       "(START)",
       
   242       1 },
       
   243     { "$",
       
   244       "(END)",
       
   245       "",
       
   246       "(END)",
       
   247       "(END)",
       
   248       1 },
       
   249     { "b",
       
   250       "bb",
       
   251       "ababababab",
       
   252       "abbabababab",
       
   253       "abbabbabbabbabb",
       
   254        5 },
       
   255     { "b",
       
   256       "bb",
       
   257       "bbbbbb",
       
   258       "bbbbbbb",
       
   259       "bbbbbbbbbbbb",
       
   260       6 },
       
   261     { "b+",
       
   262       "bb",
       
   263       "bbbbbb",
       
   264       "bb",
       
   265       "bb",
       
   266       1 },
       
   267     { "b*",
       
   268       "bb",
       
   269       "bbbbbb",
       
   270       "bb",
       
   271       "bb",
       
   272       1 },
       
   273     { "b*",
       
   274       "bb",
       
   275       "aaaaa",
       
   276       "bbaaaaa",
       
   277       "bbabbabbabbabbabb",
       
   278       6 },
       
   279     { "b*",
       
   280       "bb",
       
   281       "aa\naa\n",
       
   282       "bbaa\naa\n",
       
   283       "bbabbabb\nbbabbabb\nbb",
       
   284       7 },
       
   285     { "b*",
       
   286       "bb",
       
   287       "aa\raa\r",
       
   288       "bbaa\raa\r",
       
   289       "bbabbabb\rbbabbabb\rbb",
       
   290       7 },
       
   291     { "b*",
       
   292       "bb",
       
   293       "aa\r\naa\r\n",
       
   294       "bbaa\r\naa\r\n",
       
   295       "bbabbabb\r\nbbabbabb\r\nbb",
       
   296       7 },
       
   297 #ifdef SUPPORT_UTF8
       
   298     { "b*",
       
   299       "bb",
       
   300       "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
       
   301       "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
       
   302       "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
       
   303       5 },
       
   304     { "b*",
       
   305       "bb",
       
   306       "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
       
   307       "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
       
   308       ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
       
   309        "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
       
   310       9 },
       
   311 #endif
       
   312     { "", NULL, NULL, NULL, NULL, 0 }
       
   313   };
       
   314 
       
   315 #ifdef SUPPORT_UTF8
       
   316   const bool support_utf8 = true;
       
   317 #else
       
   318   const bool support_utf8 = false;
       
   319 #endif
       
   320 
       
   321   for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
       
   322     RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
       
   323     assert(re.error().empty());
       
   324     string one(t->original);
       
   325     CHECK(re.Replace(t->rewrite, &one));
       
   326     CHECK_EQ(one, t->single);
       
   327     string all(t->original);
       
   328     const int replace_count = re.GlobalReplace(t->rewrite, &all);
       
   329     CHECK_EQ(all, t->global);
       
   330     CHECK_EQ(replace_count, t->global_count);
       
   331   }
       
   332 
       
   333   // One final test: test \r\n replacement when we're not in CRLF mode
       
   334   {
       
   335     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
       
   336     assert(re.error().empty());
       
   337     string all("aa\r\naa\r\n");
       
   338     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
       
   339     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
       
   340   }
       
   341   {
       
   342     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
       
   343     assert(re.error().empty());
       
   344     string all("aa\r\naa\r\n");
       
   345     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
       
   346     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
       
   347   }
       
   348   // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
       
   349   //       Alas, the answer depends on how pcre was compiled.
       
   350 }
       
   351 
       
   352 static void TestExtract() {
       
   353   printf("Testing Extract\n");
       
   354 
       
   355   string s;
       
   356 
       
   357   CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
       
   358   CHECK_EQ(s, "kremvax!boris");
       
   359 
       
   360   // check the RE interface as well
       
   361   CHECK(RE(".*").Extract("'\\0'", "foo", &s));
       
   362   CHECK_EQ(s, "'foo'");
       
   363   CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
       
   364   CHECK_EQ(s, "'foo'");
       
   365 }
       
   366 
       
   367 static void TestConsume() {
       
   368   printf("Testing Consume\n");
       
   369 
       
   370   string word;
       
   371 
       
   372   string s("   aaa b!@#$@#$cccc");
       
   373   StringPiece input(s);
       
   374 
       
   375   RE r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
       
   376   CHECK(r.Consume(&input, &word));
       
   377   CHECK_EQ(word, "aaa");
       
   378   CHECK(r.Consume(&input, &word));
       
   379   CHECK_EQ(word, "b");
       
   380   CHECK(! r.Consume(&input, &word));
       
   381 }
       
   382 
       
   383 static void TestFindAndConsume() {
       
   384   printf("Testing FindAndConsume\n");
       
   385 
       
   386   string word;
       
   387 
       
   388   string s("   aaa b!@#$@#$cccc");
       
   389   StringPiece input(s);
       
   390 
       
   391   RE r("(\\w+)");      // matches a word
       
   392   CHECK(r.FindAndConsume(&input, &word));
       
   393   CHECK_EQ(word, "aaa");
       
   394   CHECK(r.FindAndConsume(&input, &word));
       
   395   CHECK_EQ(word, "b");
       
   396   CHECK(r.FindAndConsume(&input, &word));
       
   397   CHECK_EQ(word, "cccc");
       
   398   CHECK(! r.FindAndConsume(&input, &word));
       
   399 }
       
   400 
       
   401 static void TestMatchNumberPeculiarity() {
       
   402   printf("Testing match-number peculiaraity\n");
       
   403 
       
   404   string word1;
       
   405   string word2;
       
   406   string word3;
       
   407 
       
   408   RE r("(foo)|(bar)|(baz)");
       
   409   CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
       
   410   CHECK_EQ(word1, "foo");
       
   411   CHECK_EQ(word2, "");
       
   412   CHECK_EQ(word3, "");
       
   413   CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
       
   414   CHECK_EQ(word1, "");
       
   415   CHECK_EQ(word2, "bar");
       
   416   CHECK_EQ(word3, "");
       
   417   CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
       
   418   CHECK_EQ(word1, "");
       
   419   CHECK_EQ(word2, "");
       
   420   CHECK_EQ(word3, "baz");
       
   421   CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
       
   422 
       
   423   string a;
       
   424   CHECK(RE("(foo)|hello").FullMatch("hello", &a));
       
   425   CHECK_EQ(a, "");
       
   426 }
       
   427 
       
   428 static void TestRecursion() {
       
   429   printf("Testing recursion\n");
       
   430 
       
   431   // Get one string that passes (sometimes), one that never does.
       
   432   string text_good("abcdefghijk");
       
   433   string text_bad("acdefghijkl");
       
   434 
       
   435   // According to pcretest, matching text_good against (\w+)*b
       
   436   // requires match_limit of at least 8192, and match_recursion_limit
       
   437   // of at least 37.
       
   438 
       
   439   RE_Options options_ml;
       
   440   options_ml.set_match_limit(8192);
       
   441   RE re("(\\w+)*b", options_ml);
       
   442   CHECK(re.PartialMatch(text_good) == true);
       
   443   CHECK(re.PartialMatch(text_bad) == false);
       
   444   CHECK(re.FullMatch(text_good) == false);
       
   445   CHECK(re.FullMatch(text_bad) == false);
       
   446 
       
   447   options_ml.set_match_limit(1024);
       
   448   RE re2("(\\w+)*b", options_ml);
       
   449   CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
       
   450   CHECK(re2.PartialMatch(text_bad) == false);
       
   451   CHECK(re2.FullMatch(text_good) == false);
       
   452   CHECK(re2.FullMatch(text_bad) == false);
       
   453 
       
   454   RE_Options options_mlr;
       
   455   options_mlr.set_match_limit_recursion(50);
       
   456   RE re3("(\\w+)*b", options_mlr);
       
   457   CHECK(re3.PartialMatch(text_good) == true);
       
   458   CHECK(re3.PartialMatch(text_bad) == false);
       
   459   CHECK(re3.FullMatch(text_good) == false);
       
   460   CHECK(re3.FullMatch(text_bad) == false);
       
   461 
       
   462   options_mlr.set_match_limit_recursion(10);
       
   463   RE re4("(\\w+)*b", options_mlr);
       
   464   CHECK(re4.PartialMatch(text_good) == false);
       
   465   CHECK(re4.PartialMatch(text_bad) == false);
       
   466   CHECK(re4.FullMatch(text_good) == false);
       
   467   CHECK(re4.FullMatch(text_bad) == false);
       
   468 }
       
   469 
       
   470 // A meta-quoted string, interpreted as a pattern, should always match
       
   471 // the original unquoted string.
       
   472 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
       
   473   string quoted = RE::QuoteMeta(unquoted);
       
   474   RE re(quoted, options);
       
   475   CHECK(re.FullMatch(unquoted));
       
   476 }
       
   477 
       
   478 // A string containing meaningful regexp characters, which is then meta-
       
   479 // quoted, should not generally match a string the unquoted string does.
       
   480 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
       
   481                                   RE_Options options = RE_Options()) {
       
   482   string quoted = RE::QuoteMeta(unquoted);
       
   483   RE re(quoted, options);
       
   484   CHECK(!re.FullMatch(should_not_match));
       
   485 }
       
   486 
       
   487 // Tests that quoted meta characters match their original strings,
       
   488 // and that a few things that shouldn't match indeed do not.
       
   489 static void TestQuotaMetaSimple() {
       
   490   TestQuoteMeta("foo");
       
   491   TestQuoteMeta("foo.bar");
       
   492   TestQuoteMeta("foo\\.bar");
       
   493   TestQuoteMeta("[1-9]");
       
   494   TestQuoteMeta("1.5-2.0?");
       
   495   TestQuoteMeta("\\d");
       
   496   TestQuoteMeta("Who doesn't like ice cream?");
       
   497   TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
       
   498   TestQuoteMeta("((?!)xxx).*yyy");
       
   499   TestQuoteMeta("([");
       
   500   TestQuoteMeta(string("foo\0bar", 7));
       
   501 }
       
   502 
       
   503 static void TestQuoteMetaSimpleNegative() {
       
   504   NegativeTestQuoteMeta("foo", "bar");
       
   505   NegativeTestQuoteMeta("...", "bar");
       
   506   NegativeTestQuoteMeta("\\.", ".");
       
   507   NegativeTestQuoteMeta("\\.", "..");
       
   508   NegativeTestQuoteMeta("(a)", "a");
       
   509   NegativeTestQuoteMeta("(a|b)", "a");
       
   510   NegativeTestQuoteMeta("(a|b)", "(a)");
       
   511   NegativeTestQuoteMeta("(a|b)", "a|b");
       
   512   NegativeTestQuoteMeta("[0-9]", "0");
       
   513   NegativeTestQuoteMeta("[0-9]", "0-9");
       
   514   NegativeTestQuoteMeta("[0-9]", "[9]");
       
   515   NegativeTestQuoteMeta("((?!)xxx)", "xxx");
       
   516 }
       
   517 
       
   518 static void TestQuoteMetaLatin1() {
       
   519   TestQuoteMeta("3\xb2 = 9");
       
   520 }
       
   521 
       
   522 static void TestQuoteMetaUtf8() {
       
   523 #ifdef SUPPORT_UTF8
       
   524   TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
       
   525   TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
       
   526   TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
       
   527   TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
       
   528   TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
       
   529   TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
       
   530   TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
       
   531   NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
       
   532                         "27\\\xc2\\\xb0",
       
   533                         pcrecpp::UTF8());
       
   534 #endif
       
   535 }
       
   536 
       
   537 static void TestQuoteMetaAll() {
       
   538   printf("Testing QuoteMeta\n");
       
   539   TestQuotaMetaSimple();
       
   540   TestQuoteMetaSimpleNegative();
       
   541   TestQuoteMetaLatin1();
       
   542   TestQuoteMetaUtf8();
       
   543 }
       
   544 
       
   545 //
       
   546 // Options tests contributed by
       
   547 // Giuseppe Maxia, CTO, Stardata s.r.l.
       
   548 // July 2005
       
   549 //
       
   550 static void GetOneOptionResult(
       
   551                 const char *option_name,
       
   552                 const char *regex,
       
   553                 const char *str,
       
   554                 RE_Options options,
       
   555                 bool full,
       
   556                 string expected) {
       
   557 
       
   558   printf("Testing Option <%s>\n", option_name);
       
   559   if(VERBOSE_TEST)
       
   560     printf("/%s/ finds \"%s\" within \"%s\" \n",
       
   561                     regex,
       
   562                     expected.c_str(),
       
   563                     str);
       
   564   string captured("");
       
   565   if (full)
       
   566     RE(regex,options).FullMatch(str, &captured);
       
   567   else
       
   568     RE(regex,options).PartialMatch(str, &captured);
       
   569   CHECK_EQ(captured, expected);
       
   570 }
       
   571 
       
   572 static void TestOneOption(
       
   573                 const char *option_name,
       
   574                 const char *regex,
       
   575                 const char *str,
       
   576                 RE_Options options,
       
   577                 bool full,
       
   578                 bool assertive = true) {
       
   579 
       
   580   printf("Testing Option <%s>\n", option_name);
       
   581   if (VERBOSE_TEST)
       
   582     printf("'%s' %s /%s/ \n",
       
   583                   str,
       
   584                   (assertive? "matches" : "doesn't match"),
       
   585                   regex);
       
   586   if (assertive) {
       
   587     if (full)
       
   588       CHECK(RE(regex,options).FullMatch(str));
       
   589     else
       
   590       CHECK(RE(regex,options).PartialMatch(str));
       
   591   } else {
       
   592     if (full)
       
   593       CHECK(!RE(regex,options).FullMatch(str));
       
   594     else
       
   595       CHECK(!RE(regex,options).PartialMatch(str));
       
   596   }
       
   597 }
       
   598 
       
   599 static void Test_CASELESS() {
       
   600   RE_Options options;
       
   601   RE_Options options2;
       
   602 
       
   603   options.set_caseless(true);
       
   604   TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
       
   605   TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
       
   606   TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
       
   607 
       
   608   TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
       
   609   TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
       
   610   options.set_caseless(false);
       
   611   TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
       
   612 }
       
   613 
       
   614 static void Test_MULTILINE() {
       
   615   RE_Options options;
       
   616   RE_Options options2;
       
   617   const char *str = "HELLO\n" "cruel\n" "world\n";
       
   618 
       
   619   options.set_multiline(true);
       
   620   TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
       
   621   TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
       
   622   TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
       
   623   options.set_multiline(false);
       
   624   TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
       
   625 }
       
   626 
       
   627 static void Test_DOTALL() {
       
   628   RE_Options options;
       
   629   RE_Options options2;
       
   630   const char *str = "HELLO\n" "cruel\n" "world";
       
   631 
       
   632   options.set_dotall(true);
       
   633   TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
       
   634   TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
       
   635   TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
       
   636   options.set_dotall(false);
       
   637   TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
       
   638 }
       
   639 
       
   640 static void Test_DOLLAR_ENDONLY() {
       
   641   RE_Options options;
       
   642   RE_Options options2;
       
   643   const char *str = "HELLO world\n";
       
   644 
       
   645   TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
       
   646   options.set_dollar_endonly(true);
       
   647   TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
       
   648   TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
       
   649 }
       
   650 
       
   651 static void Test_EXTRA() {
       
   652   RE_Options options;
       
   653   const char *str = "HELLO";
       
   654 
       
   655   options.set_extra(true);
       
   656   TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
       
   657   TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
       
   658   options.set_extra(false);
       
   659   TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
       
   660 }
       
   661 
       
   662 static void Test_EXTENDED() {
       
   663   RE_Options options;
       
   664   RE_Options options2;
       
   665   const char *str = "HELLO world";
       
   666 
       
   667   options.set_extended(true);
       
   668   TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
       
   669   TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
       
   670   TestOneOption("EXTENDED (class)",
       
   671                     "^ HE L{2} O "
       
   672                     "\\s+        "
       
   673                     "\\w+ $      ",
       
   674                     str,
       
   675                     options,
       
   676                     false);
       
   677 
       
   678   TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
       
   679   TestOneOption("EXTENDED (function)",
       
   680                     "^ HE L{2} O "
       
   681                     "\\s+        "
       
   682                     "\\w+ $      ",
       
   683                     str,
       
   684                     pcrecpp::EXTENDED(),
       
   685                     false);
       
   686 
       
   687   options.set_extended(false);
       
   688   TestOneOption("no EXTENDED", "HELLO world", str, options, false);
       
   689 }
       
   690 
       
   691 static void Test_NO_AUTO_CAPTURE() {
       
   692   RE_Options options;
       
   693   const char *str = "HELLO world";
       
   694   string captured;
       
   695 
       
   696   printf("Testing Option <no NO_AUTO_CAPTURE>\n");
       
   697   if (VERBOSE_TEST)
       
   698     printf("parentheses capture text\n");
       
   699   RE re("(world|universe)$", options);
       
   700   CHECK(re.Extract("\\1", str , &captured));
       
   701   CHECK_EQ(captured, "world");
       
   702   options.set_no_auto_capture(true);
       
   703   printf("testing Option <NO_AUTO_CAPTURE>\n");
       
   704   if (VERBOSE_TEST)
       
   705     printf("parentheses do not capture text\n");
       
   706   re.Extract("\\1",str, &captured );
       
   707   CHECK_EQ(captured, "world");
       
   708 }
       
   709 
       
   710 static void Test_UNGREEDY() {
       
   711   RE_Options options;
       
   712   const char *str = "HELLO, 'this' is the 'world'";
       
   713 
       
   714   options.set_ungreedy(true);
       
   715   GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
       
   716   GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
       
   717   GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
       
   718 
       
   719   options.set_ungreedy(false);
       
   720   GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
       
   721   GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
       
   722 }
       
   723 
       
   724 static void Test_all_options() {
       
   725   const char *str = "HELLO\n" "cruel\n" "world";
       
   726   RE_Options options;
       
   727   options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
       
   728 
       
   729   TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
       
   730   options.set_all_options(0);
       
   731   TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
       
   732   options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
       
   733 
       
   734   TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
       
   735   TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
       
   736                   " ^ c r u e l $ ",
       
   737                   str,
       
   738                   RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
       
   739                   false);
       
   740 
       
   741   TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
       
   742                   " ^ c r u e l $ ",
       
   743                   str,
       
   744                   RE_Options()
       
   745                        .set_multiline(true)
       
   746                        .set_extended(true),
       
   747                   false);
       
   748 
       
   749   options.set_all_options(0);
       
   750   TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
       
   751 
       
   752 }
       
   753 
       
   754 static void TestOptions() {
       
   755   printf("Testing Options\n");
       
   756   Test_CASELESS();
       
   757   Test_MULTILINE();
       
   758   Test_DOTALL();
       
   759   Test_DOLLAR_ENDONLY();
       
   760   Test_EXTENDED();
       
   761   Test_NO_AUTO_CAPTURE();
       
   762   Test_UNGREEDY();
       
   763   Test_EXTRA();
       
   764   Test_all_options();
       
   765 }
       
   766 
       
   767 static void TestConstructors() {
       
   768   printf("Testing constructors\n");
       
   769 
       
   770   RE_Options options;
       
   771   options.set_dotall(true);
       
   772   const char *str = "HELLO\n" "cruel\n" "world";
       
   773 
       
   774   RE orig("HELLO.*world", options);
       
   775   CHECK(orig.FullMatch(str));
       
   776 
       
   777   RE copy1(orig);
       
   778   CHECK(copy1.FullMatch(str));
       
   779 
       
   780   RE copy2("not a match");
       
   781   CHECK(!copy2.FullMatch(str));
       
   782   copy2 = copy1;
       
   783   CHECK(copy2.FullMatch(str));
       
   784   copy2 = orig;
       
   785   CHECK(copy2.FullMatch(str));
       
   786 
       
   787   // Make sure when we assign to ourselves, nothing bad happens
       
   788   orig = orig;
       
   789   copy1 = copy1;
       
   790   copy2 = copy2;
       
   791   CHECK(orig.FullMatch(str));
       
   792   CHECK(copy1.FullMatch(str));
       
   793   CHECK(copy2.FullMatch(str));
       
   794 }
       
   795 
       
   796 int main(int argc, char** argv) {
       
   797   // Treat any flag as --help
       
   798   if (argc > 1 && argv[1][0] == '-') {
       
   799     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
       
   800            "       If 'timingX ###' is specified, run the given timing test\n"
       
   801            "       with the given number of iterations, rather than running\n"
       
   802            "       the default corectness test.\n", argv[0]);
       
   803     return 0;
       
   804   }
       
   805 
       
   806   if (argc > 1) {
       
   807     if ( argc == 2 || atoi(argv[2]) == 0) {
       
   808       printf("timing mode needs a num-iters argument\n");
       
   809       return 1;
       
   810     }
       
   811     if (!strcmp(argv[1], "timing1"))
       
   812       Timing1(atoi(argv[2]));
       
   813     else if (!strcmp(argv[1], "timing2"))
       
   814       Timing2(atoi(argv[2]));
       
   815     else if (!strcmp(argv[1], "timing3"))
       
   816       Timing3(atoi(argv[2]));
       
   817     else
       
   818       printf("Unknown argument '%s'\n", argv[1]);
       
   819     return 0;
       
   820   }
       
   821 
       
   822   printf("Testing FullMatch\n");
       
   823 
       
   824   int i;
       
   825   string s;
       
   826 
       
   827   /***** FullMatch with no args *****/
       
   828 
       
   829   CHECK(RE("h.*o").FullMatch("hello"));
       
   830   CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
       
   831   CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
       
   832   CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
       
   833   CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
       
   834   CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
       
   835 
       
   836   /***** FullMatch with args *****/
       
   837 
       
   838   // Zero-arg
       
   839   CHECK(RE("\\d+").FullMatch("1001"));
       
   840 
       
   841   // Single-arg
       
   842   CHECK(RE("(\\d+)").FullMatch("1001",   &i));
       
   843   CHECK_EQ(i, 1001);
       
   844   CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
       
   845   CHECK_EQ(i, -123);
       
   846   CHECK(!RE("()\\d+").FullMatch("10", &i));
       
   847   CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
       
   848                                 &i));
       
   849 
       
   850   // Digits surrounding integer-arg
       
   851   CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
       
   852   CHECK_EQ(i, 23);
       
   853   CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
       
   854   CHECK_EQ(i, 1);
       
   855   CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
       
   856   CHECK_EQ(i, -1);
       
   857   CHECK(RE("(\\d)").PartialMatch("1234", &i));
       
   858   CHECK_EQ(i, 1);
       
   859   CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
       
   860   CHECK_EQ(i, -1);
       
   861 
       
   862   // String-arg
       
   863   CHECK(RE("h(.*)o").FullMatch("hello", &s));
       
   864   CHECK_EQ(s, string("ell"));
       
   865 
       
   866   // StringPiece-arg
       
   867   StringPiece sp;
       
   868   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
       
   869   CHECK_EQ(sp.size(), 4);
       
   870   CHECK(memcmp(sp.data(), "ruby", 4) == 0);
       
   871   CHECK_EQ(i, 1234);
       
   872 
       
   873   // Multi-arg
       
   874   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
       
   875   CHECK_EQ(s, string("ruby"));
       
   876   CHECK_EQ(i, 1234);
       
   877 
       
   878   // Ignore non-void* NULL arg
       
   879   CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
       
   880   CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
       
   881   CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
       
   882   CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
       
   883 #ifdef HAVE_LONG_LONG
       
   884   CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
       
   885 #endif
       
   886   CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
       
   887   CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
       
   888 
       
   889   // Fail on non-void* NULL arg if the match doesn't parse for the given type.
       
   890   CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
       
   891   CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
       
   892   CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
       
   893   CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
       
   894   CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
       
   895 
       
   896   // Ignored arg
       
   897   CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
       
   898   CHECK_EQ(s, string("ruby"));
       
   899   CHECK_EQ(i, 1234);
       
   900 
       
   901   // Type tests
       
   902   {
       
   903     char c;
       
   904     CHECK(RE("(H)ello").FullMatch("Hello", &c));
       
   905     CHECK_EQ(c, 'H');
       
   906   }
       
   907   {
       
   908     unsigned char c;
       
   909     CHECK(RE("(H)ello").FullMatch("Hello", &c));
       
   910     CHECK_EQ(c, static_cast<unsigned char>('H'));
       
   911   }
       
   912   {
       
   913     short v;
       
   914     CHECK(RE("(-?\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
       
   915     CHECK(RE("(-?\\d+)").FullMatch("-100",    &v));    CHECK_EQ(v, -100);
       
   916     CHECK(RE("(-?\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
       
   917     CHECK(RE("(-?\\d+)").FullMatch("-32768",  &v));    CHECK_EQ(v, -32768);
       
   918     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
       
   919     CHECK(!RE("(-?\\d+)").FullMatch("32768",  &v));
       
   920   }
       
   921   {
       
   922     unsigned short v;
       
   923     CHECK(RE("(\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
       
   924     CHECK(RE("(\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
       
   925     CHECK(RE("(\\d+)").FullMatch("65535",   &v));    CHECK_EQ(v, 65535);
       
   926     CHECK(!RE("(\\d+)").FullMatch("65536",  &v));
       
   927   }
       
   928   {
       
   929     int v;
       
   930     static const int max_value = 0x7fffffff;
       
   931     static const int min_value = -max_value - 1;
       
   932     CHECK(RE("(-?\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
       
   933     CHECK(RE("(-?\\d+)").FullMatch("-100",        &v)); CHECK_EQ(v, -100);
       
   934     CHECK(RE("(-?\\d+)").FullMatch("2147483647",  &v)); CHECK_EQ(v, max_value);
       
   935     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
       
   936     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
       
   937     CHECK(!RE("(-?\\d+)").FullMatch("2147483648",  &v));
       
   938   }
       
   939   {
       
   940     unsigned int v;
       
   941     static const unsigned int max_value = 0xfffffffful;
       
   942     CHECK(RE("(\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
       
   943     CHECK(RE("(\\d+)").FullMatch("4294967295",  &v)); CHECK_EQ(v, max_value);
       
   944     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
       
   945   }
       
   946 #ifdef HAVE_LONG_LONG
       
   947 # if defined(__MINGW__) || defined(__MINGW32__)
       
   948 #   define LLD "%I64d"
       
   949 #   define LLU "%I64u"
       
   950 # else
       
   951 #   define LLD "%lld"
       
   952 #   define LLU "%llu"
       
   953 # endif
       
   954   {
       
   955     long long v;
       
   956     static const long long max_value = 0x7fffffffffffffffLL;
       
   957     static const long long min_value = -max_value - 1;
       
   958     char buf[32];  // definitely big enough for a long long
       
   959 
       
   960     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
       
   961     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
       
   962 
       
   963     sprintf(buf, LLD, max_value);
       
   964     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
       
   965 
       
   966     sprintf(buf, LLD, min_value);
       
   967     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
       
   968 
       
   969     sprintf(buf, LLD, max_value);
       
   970     assert(buf[strlen(buf)-1] != '9');
       
   971     buf[strlen(buf)-1]++;
       
   972     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
       
   973 
       
   974     sprintf(buf, LLD, min_value);
       
   975     assert(buf[strlen(buf)-1] != '9');
       
   976     buf[strlen(buf)-1]++;
       
   977     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
       
   978   }
       
   979 #endif
       
   980 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
       
   981   {
       
   982     unsigned long long v;
       
   983     long long v2;
       
   984     static const unsigned long long max_value = 0xffffffffffffffffULL;
       
   985     char buf[32];  // definitely big enough for a unsigned long long
       
   986 
       
   987     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
       
   988     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
       
   989 
       
   990     sprintf(buf, LLU, max_value);
       
   991     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
       
   992 
       
   993     assert(buf[strlen(buf)-1] != '9');
       
   994     buf[strlen(buf)-1]++;
       
   995     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
       
   996   }
       
   997 #endif
       
   998   {
       
   999     float v;
       
  1000     CHECK(RE("(.*)").FullMatch("100", &v));
       
  1001     CHECK(RE("(.*)").FullMatch("-100.", &v));
       
  1002     CHECK(RE("(.*)").FullMatch("1e23", &v));
       
  1003   }
       
  1004   {
       
  1005     double v;
       
  1006     CHECK(RE("(.*)").FullMatch("100", &v));
       
  1007     CHECK(RE("(.*)").FullMatch("-100.", &v));
       
  1008     CHECK(RE("(.*)").FullMatch("1e23", &v));
       
  1009   }
       
  1010 
       
  1011   // Check that matching is fully anchored
       
  1012   CHECK(!RE("(\\d+)").FullMatch("x1001",  &i));
       
  1013   CHECK(!RE("(\\d+)").FullMatch("1001x",  &i));
       
  1014   CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
       
  1015   CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
       
  1016 
       
  1017   // Braces
       
  1018   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
       
  1019   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
       
  1020   CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
       
  1021 
       
  1022   // Complicated RE
       
  1023   CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
       
  1024   CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
       
  1025   CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
       
  1026   CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
       
  1027 
       
  1028   // Check full-match handling (needs '$' tacked on internally)
       
  1029   CHECK(RE("fo|foo").FullMatch("fo"));
       
  1030   CHECK(RE("fo|foo").FullMatch("foo"));
       
  1031   CHECK(RE("fo|foo$").FullMatch("fo"));
       
  1032   CHECK(RE("fo|foo$").FullMatch("foo"));
       
  1033   CHECK(RE("foo$").FullMatch("foo"));
       
  1034   CHECK(!RE("foo\\$").FullMatch("foo$bar"));
       
  1035   CHECK(!RE("fo|bar").FullMatch("fox"));
       
  1036 
       
  1037   // Uncomment the following if we change the handling of '$' to
       
  1038   // prevent it from matching a trailing newline
       
  1039   if (false) {
       
  1040     // Check that we don't get bitten by pcre's special handling of a
       
  1041     // '\n' at the end of the string matching '$'
       
  1042     CHECK(!RE("foo$").PartialMatch("foo\n"));
       
  1043   }
       
  1044 
       
  1045   // Number of args
       
  1046   int a[16];
       
  1047   CHECK(RE("").FullMatch(""));
       
  1048 
       
  1049   memset(a, 0, sizeof(0));
       
  1050   CHECK(RE("(\\d){1}").FullMatch("1",
       
  1051                                  &a[0]));
       
  1052   CHECK_EQ(a[0], 1);
       
  1053 
       
  1054   memset(a, 0, sizeof(0));
       
  1055   CHECK(RE("(\\d)(\\d)").FullMatch("12",
       
  1056                                    &a[0],  &a[1]));
       
  1057   CHECK_EQ(a[0], 1);
       
  1058   CHECK_EQ(a[1], 2);
       
  1059 
       
  1060   memset(a, 0, sizeof(0));
       
  1061   CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
       
  1062                                         &a[0],  &a[1],  &a[2]));
       
  1063   CHECK_EQ(a[0], 1);
       
  1064   CHECK_EQ(a[1], 2);
       
  1065   CHECK_EQ(a[2], 3);
       
  1066 
       
  1067   memset(a, 0, sizeof(0));
       
  1068   CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
       
  1069                                              &a[0],  &a[1],  &a[2],  &a[3]));
       
  1070   CHECK_EQ(a[0], 1);
       
  1071   CHECK_EQ(a[1], 2);
       
  1072   CHECK_EQ(a[2], 3);
       
  1073   CHECK_EQ(a[3], 4);
       
  1074 
       
  1075   memset(a, 0, sizeof(0));
       
  1076   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
       
  1077                                                   &a[0],  &a[1],  &a[2],
       
  1078                                                   &a[3],  &a[4]));
       
  1079   CHECK_EQ(a[0], 1);
       
  1080   CHECK_EQ(a[1], 2);
       
  1081   CHECK_EQ(a[2], 3);
       
  1082   CHECK_EQ(a[3], 4);
       
  1083   CHECK_EQ(a[4], 5);
       
  1084 
       
  1085   memset(a, 0, sizeof(0));
       
  1086   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
       
  1087                                                        &a[0],  &a[1],  &a[2],
       
  1088                                                        &a[3],  &a[4],  &a[5]));
       
  1089   CHECK_EQ(a[0], 1);
       
  1090   CHECK_EQ(a[1], 2);
       
  1091   CHECK_EQ(a[2], 3);
       
  1092   CHECK_EQ(a[3], 4);
       
  1093   CHECK_EQ(a[4], 5);
       
  1094   CHECK_EQ(a[5], 6);
       
  1095 
       
  1096   memset(a, 0, sizeof(0));
       
  1097   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
       
  1098                                                             &a[0],  &a[1],  &a[2],  &a[3],
       
  1099                                                             &a[4],  &a[5],  &a[6]));
       
  1100   CHECK_EQ(a[0], 1);
       
  1101   CHECK_EQ(a[1], 2);
       
  1102   CHECK_EQ(a[2], 3);
       
  1103   CHECK_EQ(a[3], 4);
       
  1104   CHECK_EQ(a[4], 5);
       
  1105   CHECK_EQ(a[5], 6);
       
  1106   CHECK_EQ(a[6], 7);
       
  1107 
       
  1108   memset(a, 0, sizeof(0));
       
  1109   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
       
  1110            "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
       
  1111                "1234567890123456",
       
  1112                &a[0],  &a[1],  &a[2],  &a[3],
       
  1113                &a[4],  &a[5],  &a[6],  &a[7],
       
  1114                &a[8],  &a[9],  &a[10], &a[11],
       
  1115                &a[12], &a[13], &a[14], &a[15]));
       
  1116   CHECK_EQ(a[0], 1);
       
  1117   CHECK_EQ(a[1], 2);
       
  1118   CHECK_EQ(a[2], 3);
       
  1119   CHECK_EQ(a[3], 4);
       
  1120   CHECK_EQ(a[4], 5);
       
  1121   CHECK_EQ(a[5], 6);
       
  1122   CHECK_EQ(a[6], 7);
       
  1123   CHECK_EQ(a[7], 8);
       
  1124   CHECK_EQ(a[8], 9);
       
  1125   CHECK_EQ(a[9], 0);
       
  1126   CHECK_EQ(a[10], 1);
       
  1127   CHECK_EQ(a[11], 2);
       
  1128   CHECK_EQ(a[12], 3);
       
  1129   CHECK_EQ(a[13], 4);
       
  1130   CHECK_EQ(a[14], 5);
       
  1131   CHECK_EQ(a[15], 6);
       
  1132 
       
  1133   /***** PartialMatch *****/
       
  1134 
       
  1135   printf("Testing PartialMatch\n");
       
  1136 
       
  1137   CHECK(RE("h.*o").PartialMatch("hello"));
       
  1138   CHECK(RE("h.*o").PartialMatch("othello"));
       
  1139   CHECK(RE("h.*o").PartialMatch("hello!"));
       
  1140   CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
       
  1141 
       
  1142   /***** other tests *****/
       
  1143 
       
  1144   RadixTests();
       
  1145   TestReplace();
       
  1146   TestExtract();
       
  1147   TestConsume();
       
  1148   TestFindAndConsume();
       
  1149   TestQuoteMetaAll();
       
  1150   TestMatchNumberPeculiarity();
       
  1151 
       
  1152   // Check the pattern() accessor
       
  1153   {
       
  1154     const string kPattern = "http://([^/]+)/.*";
       
  1155     const RE re(kPattern);
       
  1156     CHECK_EQ(kPattern, re.pattern());
       
  1157   }
       
  1158 
       
  1159   // Check RE error field.
       
  1160   {
       
  1161     RE re("foo");
       
  1162     CHECK(re.error().empty());  // Must have no error
       
  1163   }
       
  1164 
       
  1165 #ifdef SUPPORT_UTF8
       
  1166   // Check UTF-8 handling
       
  1167   {
       
  1168     printf("Testing UTF-8 handling\n");
       
  1169 
       
  1170     // Three Japanese characters (nihongo)
       
  1171     const unsigned char utf8_string[] = {
       
  1172          0xe6, 0x97, 0xa5, // 65e5
       
  1173          0xe6, 0x9c, 0xac, // 627c
       
  1174          0xe8, 0xaa, 0x9e, // 8a9e
       
  1175          0
       
  1176     };
       
  1177     const unsigned char utf8_pattern[] = {
       
  1178          '.',
       
  1179          0xe6, 0x9c, 0xac, // 627c
       
  1180          '.',
       
  1181          0
       
  1182     };
       
  1183 
       
  1184     // Both should match in either mode, bytes or UTF-8
       
  1185     RE re_test1(".........");
       
  1186     CHECK(re_test1.FullMatch(utf8_string));
       
  1187     RE re_test2("...", pcrecpp::UTF8());
       
  1188     CHECK(re_test2.FullMatch(utf8_string));
       
  1189 
       
  1190     // Check that '.' matches one byte or UTF-8 character
       
  1191     // according to the mode.
       
  1192     string ss;
       
  1193     RE re_test3("(.)");
       
  1194     CHECK(re_test3.PartialMatch(utf8_string, &ss));
       
  1195     CHECK_EQ(ss, string("\xe6"));
       
  1196     RE re_test4("(.)", pcrecpp::UTF8());
       
  1197     CHECK(re_test4.PartialMatch(utf8_string, &ss));
       
  1198     CHECK_EQ(ss, string("\xe6\x97\xa5"));
       
  1199 
       
  1200     // Check that string matches itself in either mode
       
  1201     RE re_test5(utf8_string);
       
  1202     CHECK(re_test5.FullMatch(utf8_string));
       
  1203     RE re_test6(utf8_string, pcrecpp::UTF8());
       
  1204     CHECK(re_test6.FullMatch(utf8_string));
       
  1205 
       
  1206     // Check that pattern matches string only in UTF8 mode
       
  1207     RE re_test7(utf8_pattern);
       
  1208     CHECK(!re_test7.FullMatch(utf8_string));
       
  1209     RE re_test8(utf8_pattern, pcrecpp::UTF8());
       
  1210     CHECK(re_test8.FullMatch(utf8_string));
       
  1211   }
       
  1212 
       
  1213   // Check that ungreedy, UTF8 regular expressions don't match when they
       
  1214   // oughtn't -- see bug 82246.
       
  1215   {
       
  1216     // This code always worked.
       
  1217     const char* pattern = "\\w+X";
       
  1218     const string target = "a aX";
       
  1219     RE match_sentence(pattern);
       
  1220     RE match_sentence_re(pattern, pcrecpp::UTF8());
       
  1221 
       
  1222     CHECK(!match_sentence.FullMatch(target));
       
  1223     CHECK(!match_sentence_re.FullMatch(target));
       
  1224   }
       
  1225 
       
  1226   {
       
  1227     const char* pattern = "(?U)\\w+X";
       
  1228     const string target = "a aX";
       
  1229     RE match_sentence(pattern);
       
  1230     RE match_sentence_re(pattern, pcrecpp::UTF8());
       
  1231 
       
  1232     CHECK(!match_sentence.FullMatch(target));
       
  1233     CHECK(!match_sentence_re.FullMatch(target));
       
  1234   }
       
  1235 #endif  /* def SUPPORT_UTF8 */
       
  1236 
       
  1237   printf("Testing error reporting\n");
       
  1238 
       
  1239   { RE re("a\\1"); CHECK(!re.error().empty()); }
       
  1240   {
       
  1241     RE re("a[x");
       
  1242     CHECK(!re.error().empty());
       
  1243   }
       
  1244   {
       
  1245     RE re("a[z-a]");
       
  1246     CHECK(!re.error().empty());
       
  1247   }
       
  1248   {
       
  1249     RE re("a[[:foobar:]]");
       
  1250     CHECK(!re.error().empty());
       
  1251   }
       
  1252   {
       
  1253     RE re("a(b");
       
  1254     CHECK(!re.error().empty());
       
  1255   }
       
  1256   {
       
  1257     RE re("a\\");
       
  1258     CHECK(!re.error().empty());
       
  1259   }
       
  1260 
       
  1261   // Test that recursion is stopped
       
  1262   TestRecursion();
       
  1263 
       
  1264   // Test Options
       
  1265   if (getenv("VERBOSE_TEST") != NULL)
       
  1266     VERBOSE_TEST  = true;
       
  1267   TestOptions();
       
  1268 
       
  1269   // Test the constructors
       
  1270   TestConstructors();
       
  1271 
       
  1272   // Done
       
  1273   printf("OK\n");
       
  1274 
       
  1275   return 0;
       
  1276 }