libraries/spcre/libpcre/pcre/pcrecpp.h
changeset 0 7f656887cf89
equal deleted inserted replaced
-1:000000000000 0:7f656887cf89
       
     1 // Copyright (c) 2005, Google Inc.
       
     2 // All rights reserved.
       
     3 //
       
     4 // Redistribution and use in source and binary forms, with or without
       
     5 // modification, are permitted provided that the following conditions are
       
     6 // met:
       
     7 //
       
     8 //     * Redistributions of source code must retain the above copyright
       
     9 // notice, this list of conditions and the following disclaimer.
       
    10 //     * Redistributions in binary form must reproduce the above
       
    11 // copyright notice, this list of conditions and the following disclaimer
       
    12 // in the documentation and/or other materials provided with the
       
    13 // distribution.
       
    14 //     * Neither the name of Google Inc. nor the names of its
       
    15 // contributors may be used to endorse or promote products derived from
       
    16 // this software without specific prior written permission.
       
    17 //
       
    18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
       
    19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
       
    20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
       
    21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
       
    22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
       
    23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
       
    24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       
    25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       
    26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    29 //
       
    30 // Author: Sanjay Ghemawat
       
    31 // Support for PCRE_XXX modifiers added by Giuseppe Maxia, July 2005
       
    32 
       
    33 #ifndef _PCRECPP_H
       
    34 #define _PCRECPP_H
       
    35 
       
    36 // C++ interface to the pcre regular-expression library.  RE supports
       
    37 // Perl-style regular expressions (with extensions like \d, \w, \s,
       
    38 // ...).
       
    39 //
       
    40 // -----------------------------------------------------------------------
       
    41 // REGEXP SYNTAX:
       
    42 //
       
    43 // This module is part of the pcre library and hence supports its syntax
       
    44 // for regular expressions.
       
    45 //
       
    46 // The syntax is pretty similar to Perl's.  For those not familiar
       
    47 // with Perl's regular expressions, here are some examples of the most
       
    48 // commonly used extensions:
       
    49 //
       
    50 //   "hello (\\w+) world"  -- \w matches a "word" character
       
    51 //   "version (\\d+)"      -- \d matches a digit
       
    52 //   "hello\\s+world"      -- \s matches any whitespace character
       
    53 //   "\\b(\\w+)\\b"        -- \b matches empty string at a word boundary
       
    54 //   "(?i)hello"           -- (?i) turns on case-insensitive matching
       
    55 //   "/\\*(.*?)\\*/"       -- .*? matches . minimum no. of times possible
       
    56 //
       
    57 // -----------------------------------------------------------------------
       
    58 // MATCHING INTERFACE:
       
    59 //
       
    60 // The "FullMatch" operation checks that supplied text matches a
       
    61 // supplied pattern exactly.
       
    62 //
       
    63 // Example: successful match
       
    64 //    pcrecpp::RE re("h.*o");
       
    65 //    re.FullMatch("hello");
       
    66 //
       
    67 // Example: unsuccessful match (requires full match):
       
    68 //    pcrecpp::RE re("e");
       
    69 //    !re.FullMatch("hello");
       
    70 //
       
    71 // Example: creating a temporary RE object:
       
    72 //    pcrecpp::RE("h.*o").FullMatch("hello");
       
    73 //
       
    74 // You can pass in a "const char*" or a "string" for "text".  The
       
    75 // examples below tend to use a const char*.
       
    76 //
       
    77 // You can, as in the different examples above, store the RE object
       
    78 // explicitly in a variable or use a temporary RE object.  The
       
    79 // examples below use one mode or the other arbitrarily.  Either
       
    80 // could correctly be used for any of these examples.
       
    81 //
       
    82 // -----------------------------------------------------------------------
       
    83 // MATCHING WITH SUB-STRING EXTRACTION:
       
    84 //
       
    85 // You can supply extra pointer arguments to extract matched subpieces.
       
    86 //
       
    87 // Example: extracts "ruby" into "s" and 1234 into "i"
       
    88 //    int i;
       
    89 //    string s;
       
    90 //    pcrecpp::RE re("(\\w+):(\\d+)");
       
    91 //    re.FullMatch("ruby:1234", &s, &i);
       
    92 //
       
    93 // Example: does not try to extract any extra sub-patterns
       
    94 //    re.FullMatch("ruby:1234", &s);
       
    95 //
       
    96 // Example: does not try to extract into NULL
       
    97 //    re.FullMatch("ruby:1234", NULL, &i);
       
    98 //
       
    99 // Example: integer overflow causes failure
       
   100 //    !re.FullMatch("ruby:1234567891234", NULL, &i);
       
   101 //
       
   102 // Example: fails because there aren't enough sub-patterns:
       
   103 //    !pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s);
       
   104 //
       
   105 // Example: fails because string cannot be stored in integer
       
   106 //    !pcrecpp::RE("(.*)").FullMatch("ruby", &i);
       
   107 //
       
   108 // The provided pointer arguments can be pointers to any scalar numeric
       
   109 // type, or one of
       
   110 //    string        (matched piece is copied to string)
       
   111 //    StringPiece   (StringPiece is mutated to point to matched piece)
       
   112 //    T             (where "bool T::ParseFrom(const char*, int)" exists)
       
   113 //    NULL          (the corresponding matched sub-pattern is not copied)
       
   114 //
       
   115 // CAVEAT: An optional sub-pattern that does not exist in the matched
       
   116 // string is assigned the empty string.  Therefore, the following will
       
   117 // return false (because the empty string is not a valid number):
       
   118 //    int number;
       
   119 //    pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
       
   120 //
       
   121 // -----------------------------------------------------------------------
       
   122 // DO_MATCH
       
   123 //
       
   124 // The matching interface supports at most 16 arguments per call.
       
   125 // If you need more, consider using the more general interface
       
   126 // pcrecpp::RE::DoMatch().  See pcrecpp.h for the signature for DoMatch.
       
   127 //
       
   128 // -----------------------------------------------------------------------
       
   129 // PARTIAL MATCHES
       
   130 //
       
   131 // You can use the "PartialMatch" operation when you want the pattern
       
   132 // to match any substring of the text.
       
   133 //
       
   134 // Example: simple search for a string:
       
   135 //    pcrecpp::RE("ell").PartialMatch("hello");
       
   136 //
       
   137 // Example: find first number in a string:
       
   138 //    int number;
       
   139 //    pcrecpp::RE re("(\\d+)");
       
   140 //    re.PartialMatch("x*100 + 20", &number);
       
   141 //    assert(number == 100);
       
   142 //
       
   143 // -----------------------------------------------------------------------
       
   144 // UTF-8 AND THE MATCHING INTERFACE:
       
   145 //
       
   146 // By default, pattern and text are plain text, one byte per character.
       
   147 // The UTF8 flag, passed to the constructor, causes both pattern
       
   148 // and string to be treated as UTF-8 text, still a byte stream but
       
   149 // potentially multiple bytes per character. In practice, the text
       
   150 // is likelier to be UTF-8 than the pattern, but the match returned
       
   151 // may depend on the UTF8 flag, so always use it when matching
       
   152 // UTF8 text.  E.g., "." will match one byte normally but with UTF8
       
   153 // set may match up to three bytes of a multi-byte character.
       
   154 //
       
   155 // Example:
       
   156 //    pcrecpp::RE_Options options;
       
   157 //    options.set_utf8();
       
   158 //    pcrecpp::RE re(utf8_pattern, options);
       
   159 //    re.FullMatch(utf8_string);
       
   160 //
       
   161 // Example: using the convenience function UTF8():
       
   162 //    pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8());
       
   163 //    re.FullMatch(utf8_string);
       
   164 //
       
   165 // NOTE: The UTF8 option is ignored if pcre was not configured with the
       
   166 //       --enable-utf8 flag.
       
   167 //
       
   168 // -----------------------------------------------------------------------
       
   169 // PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE
       
   170 //
       
   171 // PCRE defines some modifiers to change the behavior of the regular
       
   172 // expression engine.
       
   173 // The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle
       
   174 // to pass such modifiers to a RE class.
       
   175 //
       
   176 // Currently, the following modifiers are supported
       
   177 //
       
   178 //    modifier              description               Perl corresponding
       
   179 //
       
   180 //    PCRE_CASELESS         case insensitive match    /i
       
   181 //    PCRE_MULTILINE        multiple lines match      /m
       
   182 //    PCRE_DOTALL           dot matches newlines      /s
       
   183 //    PCRE_DOLLAR_ENDONLY   $ matches only at end     N/A
       
   184 //    PCRE_EXTRA            strict escape parsing     N/A
       
   185 //    PCRE_EXTENDED         ignore whitespaces        /x
       
   186 //    PCRE_UTF8             handles UTF8 chars        built-in
       
   187 //    PCRE_UNGREEDY         reverses * and *?         N/A
       
   188 //    PCRE_NO_AUTO_CAPTURE  disables matching parens  N/A (*)
       
   189 //
       
   190 // (For a full account on how each modifier works, please check the
       
   191 // PCRE API reference manual).
       
   192 //
       
   193 // (*) Both Perl and PCRE allow non matching parentheses by means of the
       
   194 // "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
       
   195 // capture, while (ab|cd) does.
       
   196 //
       
   197 // For each modifier, there are two member functions whose name is made
       
   198 // out of the modifier in lowercase, without the "PCRE_" prefix. For
       
   199 // instance, PCRE_CASELESS is handled by
       
   200 //    bool caseless(),
       
   201 // which returns true if the modifier is set, and
       
   202 //    RE_Options & set_caseless(bool),
       
   203 // which sets or unsets the modifier.
       
   204 //
       
   205 // Moreover, PCRE_EXTRA_MATCH_LIMIT can be accessed through the
       
   206 // set_match_limit() and match_limit() member functions.
       
   207 // Setting match_limit to a non-zero value will limit the executation of
       
   208 // pcre to keep it from doing bad things like blowing the stack or taking
       
   209 // an eternity to return a result.  A value of 5000 is good enough to stop
       
   210 // stack blowup in a 2MB thread stack.  Setting match_limit to zero will
       
   211 // disable match limiting.  Alternately, you can set match_limit_recursion()
       
   212 // which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much pcre
       
   213 // recurses.  match_limit() caps the number of matches pcre does;
       
   214 // match_limit_recrusion() caps the depth of recursion.
       
   215 //
       
   216 // Normally, to pass one or more modifiers to a RE class, you declare
       
   217 // a RE_Options object, set the appropriate options, and pass this
       
   218 // object to a RE constructor. Example:
       
   219 //
       
   220 //    RE_options opt;
       
   221 //    opt.set_caseless(true);
       
   222 //
       
   223 //    if (RE("HELLO", opt).PartialMatch("hello world")) ...
       
   224 //
       
   225 // RE_options has two constructors. The default constructor takes no
       
   226 // arguments and creates a set of flags that are off by default.
       
   227 //
       
   228 // The optional parameter 'option_flags' is to facilitate transfer
       
   229 // of legacy code from C programs.  This lets you do
       
   230 //    RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
       
   231 //
       
   232 // But new code is better off doing
       
   233 //    RE(pattern,
       
   234 //      RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
       
   235 // (See below)
       
   236 //
       
   237 // If you are going to pass one of the most used modifiers, there are some
       
   238 // convenience functions that return a RE_Options class with the
       
   239 // appropriate modifier already set:
       
   240 // CASELESS(), UTF8(), MULTILINE(), DOTALL(), EXTENDED()
       
   241 //
       
   242 // If you need to set several options at once, and you don't want to go
       
   243 // through the pains of declaring a RE_Options object and setting several
       
   244 // options, there is a parallel method that give you such ability on the
       
   245 // fly. You can concatenate several set_xxxxx member functions, since each
       
   246 // of them returns a reference to its class object.  e.g.: to pass
       
   247 // PCRE_CASELESS, PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one
       
   248 // statement, you may write
       
   249 //
       
   250 //    RE(" ^ xyz \\s+ .* blah$", RE_Options()
       
   251 //                            .set_caseless(true)
       
   252 //                            .set_extended(true)
       
   253 //                            .set_multiline(true)).PartialMatch(sometext);
       
   254 //
       
   255 // -----------------------------------------------------------------------
       
   256 // SCANNING TEXT INCREMENTALLY
       
   257 //
       
   258 // The "Consume" operation may be useful if you want to repeatedly
       
   259 // match regular expressions at the front of a string and skip over
       
   260 // them as they match.  This requires use of the "StringPiece" type,
       
   261 // which represents a sub-range of a real string.  Like RE, StringPiece
       
   262 // is defined in the pcrecpp namespace.
       
   263 //
       
   264 // Example: read lines of the form "var = value" from a string.
       
   265 //    string contents = ...;                 // Fill string somehow
       
   266 //    pcrecpp::StringPiece input(contents);  // Wrap in a StringPiece
       
   267 //
       
   268 //    string var;
       
   269 //    int value;
       
   270 //    pcrecpp::RE re("(\\w+) = (\\d+)\n");
       
   271 //    while (re.Consume(&input, &var, &value)) {
       
   272 //      ...;
       
   273 //    }
       
   274 //
       
   275 // Each successful call to "Consume" will set "var/value", and also
       
   276 // advance "input" so it points past the matched text.
       
   277 //
       
   278 // The "FindAndConsume" operation is similar to "Consume" but does not
       
   279 // anchor your match at the beginning of the string.  For example, you
       
   280 // could extract all words from a string by repeatedly calling
       
   281 //     pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word)
       
   282 //
       
   283 // -----------------------------------------------------------------------
       
   284 // PARSING HEX/OCTAL/C-RADIX NUMBERS
       
   285 //
       
   286 // By default, if you pass a pointer to a numeric value, the
       
   287 // corresponding text is interpreted as a base-10 number.  You can
       
   288 // instead wrap the pointer with a call to one of the operators Hex(),
       
   289 // Octal(), or CRadix() to interpret the text in another base.  The
       
   290 // CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
       
   291 // prefixes, but defaults to base-10.
       
   292 //
       
   293 // Example:
       
   294 //   int a, b, c, d;
       
   295 //   pcrecpp::RE re("(.*) (.*) (.*) (.*)");
       
   296 //   re.FullMatch("100 40 0100 0x40",
       
   297 //                pcrecpp::Octal(&a), pcrecpp::Hex(&b),
       
   298 //                pcrecpp::CRadix(&c), pcrecpp::CRadix(&d));
       
   299 // will leave 64 in a, b, c, and d.
       
   300 //
       
   301 // -----------------------------------------------------------------------
       
   302 // REPLACING PARTS OF STRINGS
       
   303 //
       
   304 // You can replace the first match of "pattern" in "str" with
       
   305 // "rewrite".  Within "rewrite", backslash-escaped digits (\1 to \9)
       
   306 // can be used to insert text matching corresponding parenthesized
       
   307 // group from the pattern.  \0 in "rewrite" refers to the entire
       
   308 // matching text.  E.g.,
       
   309 //
       
   310 //   string s = "yabba dabba doo";
       
   311 //   pcrecpp::RE("b+").Replace("d", &s);
       
   312 //
       
   313 // will leave "s" containing "yada dabba doo".  The result is true if
       
   314 // the pattern matches and a replacement occurs, or false otherwise.
       
   315 //
       
   316 // GlobalReplace() is like Replace(), except that it replaces all
       
   317 // occurrences of the pattern in the string with the rewrite.
       
   318 // Replacements are not subject to re-matching.  E.g.,
       
   319 //
       
   320 //   string s = "yabba dabba doo";
       
   321 //   pcrecpp::RE("b+").GlobalReplace("d", &s);
       
   322 //
       
   323 // will leave "s" containing "yada dada doo".  It returns the number
       
   324 // of replacements made.
       
   325 //
       
   326 // Extract() is like Replace(), except that if the pattern matches,
       
   327 // "rewrite" is copied into "out" (an additional argument) with
       
   328 // substitutions.  The non-matching portions of "text" are ignored.
       
   329 // Returns true iff a match occurred and the extraction happened
       
   330 // successfully.  If no match occurs, the string is left unaffected.
       
   331 
       
   332 
       
   333 #include <string>
       
   334 #include <pcre.h>
       
   335 #include <pcrecpparg.h>   // defines the Arg class
       
   336 // This isn't technically needed here, but we include it
       
   337 // anyway so folks who include pcrecpp.h don't have to.
       
   338 #include <pcre_stringpiece.h>
       
   339 
       
   340 namespace pcrecpp {
       
   341 
       
   342 #define PCRE_SET_OR_CLEAR(b, o) \
       
   343     if (b) all_options_ |= (o); else all_options_ &= ~(o); \
       
   344     return *this
       
   345 
       
   346 #define PCRE_IS_SET(o)  \
       
   347         (all_options_ & o) == o
       
   348 
       
   349 /***** Compiling regular expressions: the RE class *****/
       
   350 
       
   351 // RE_Options allow you to set options to be passed along to pcre,
       
   352 // along with other options we put on top of pcre.
       
   353 // Only 9 modifiers, plus match_limit and match_limit_recursion,
       
   354 // are supported now.
       
   355 class PCRECPP_EXP_DEFN RE_Options {
       
   356  public:
       
   357   // constructor
       
   358   RE_Options() : match_limit_(0), match_limit_recursion_(0), all_options_(0) {}
       
   359 
       
   360   // alternative constructor.
       
   361   // To facilitate transfer of legacy code from C programs
       
   362   //
       
   363   // This lets you do
       
   364   //    RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
       
   365   // But new code is better off doing
       
   366   //    RE(pattern,
       
   367   //      RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
       
   368   RE_Options(int option_flags) : match_limit_(0), match_limit_recursion_(0),
       
   369                                  all_options_(option_flags) {}
       
   370   // we're fine with the default destructor, copy constructor, etc.
       
   371 
       
   372   // accessors and mutators
       
   373   int match_limit() const { return match_limit_; };
       
   374   RE_Options &set_match_limit(int limit) {
       
   375     match_limit_ = limit;
       
   376     return *this;
       
   377   }
       
   378 
       
   379   int match_limit_recursion() const { return match_limit_recursion_; };
       
   380   RE_Options &set_match_limit_recursion(int limit) {
       
   381     match_limit_recursion_ = limit;
       
   382     return *this;
       
   383   }
       
   384 
       
   385   bool caseless() const {
       
   386     return PCRE_IS_SET(PCRE_CASELESS);
       
   387   }
       
   388   RE_Options &set_caseless(bool x) {
       
   389     PCRE_SET_OR_CLEAR(x, PCRE_CASELESS);
       
   390   }
       
   391 
       
   392   bool multiline() const {
       
   393     return PCRE_IS_SET(PCRE_MULTILINE);
       
   394   }
       
   395   RE_Options &set_multiline(bool x) {
       
   396     PCRE_SET_OR_CLEAR(x, PCRE_MULTILINE);
       
   397   }
       
   398 
       
   399   bool dotall() const {
       
   400     return PCRE_IS_SET(PCRE_DOTALL);
       
   401   }
       
   402   RE_Options &set_dotall(bool x) {
       
   403     PCRE_SET_OR_CLEAR(x, PCRE_DOTALL);
       
   404   }
       
   405 
       
   406   bool extended() const {
       
   407     return PCRE_IS_SET(PCRE_EXTENDED);
       
   408   }
       
   409   RE_Options &set_extended(bool x) {
       
   410     PCRE_SET_OR_CLEAR(x, PCRE_EXTENDED);
       
   411   }
       
   412 
       
   413   bool dollar_endonly() const {
       
   414     return PCRE_IS_SET(PCRE_DOLLAR_ENDONLY);
       
   415   }
       
   416   RE_Options &set_dollar_endonly(bool x) {
       
   417     PCRE_SET_OR_CLEAR(x, PCRE_DOLLAR_ENDONLY);
       
   418   }
       
   419 
       
   420   bool extra() const {
       
   421     return PCRE_IS_SET(PCRE_EXTRA);
       
   422   }
       
   423   RE_Options &set_extra(bool x) {
       
   424     PCRE_SET_OR_CLEAR(x, PCRE_EXTRA);
       
   425   }
       
   426 
       
   427   bool ungreedy() const {
       
   428     return PCRE_IS_SET(PCRE_UNGREEDY);
       
   429   }
       
   430   RE_Options &set_ungreedy(bool x) {
       
   431     PCRE_SET_OR_CLEAR(x, PCRE_UNGREEDY);
       
   432   }
       
   433 
       
   434   bool utf8() const {
       
   435     return PCRE_IS_SET(PCRE_UTF8);
       
   436   }
       
   437   RE_Options &set_utf8(bool x) {
       
   438     PCRE_SET_OR_CLEAR(x, PCRE_UTF8);
       
   439   }
       
   440 
       
   441   bool no_auto_capture() const {
       
   442     return PCRE_IS_SET(PCRE_NO_AUTO_CAPTURE);
       
   443   }
       
   444   RE_Options &set_no_auto_capture(bool x) {
       
   445     PCRE_SET_OR_CLEAR(x, PCRE_NO_AUTO_CAPTURE);
       
   446   }
       
   447 
       
   448   RE_Options &set_all_options(int opt) {
       
   449     all_options_ = opt;
       
   450     return *this;
       
   451   }
       
   452   int all_options() const {
       
   453     return all_options_ ;
       
   454   }
       
   455 
       
   456   // TODO: add other pcre flags
       
   457 
       
   458  private:
       
   459   int match_limit_;
       
   460   int match_limit_recursion_;
       
   461   int all_options_;
       
   462 };
       
   463 
       
   464 // These functions return some common RE_Options
       
   465 static inline RE_Options UTF8() {
       
   466   return RE_Options().set_utf8(true);
       
   467 }
       
   468 
       
   469 static inline RE_Options CASELESS() {
       
   470   return RE_Options().set_caseless(true);
       
   471 }
       
   472 static inline RE_Options MULTILINE() {
       
   473   return RE_Options().set_multiline(true);
       
   474 }
       
   475 
       
   476 static inline RE_Options DOTALL() {
       
   477   return RE_Options().set_dotall(true);
       
   478 }
       
   479 
       
   480 static inline RE_Options EXTENDED() {
       
   481   return RE_Options().set_extended(true);
       
   482 }
       
   483 
       
   484 // Interface for regular expression matching.  Also corresponds to a
       
   485 // pre-compiled regular expression.  An "RE" object is safe for
       
   486 // concurrent use by multiple threads.
       
   487 class PCRECPP_EXP_DEFN RE {
       
   488  public:
       
   489   // We provide implicit conversions from strings so that users can
       
   490   // pass in a string or a "const char*" wherever an "RE" is expected.
       
   491   RE(const string& pat) { Init(pat, NULL); }
       
   492   RE(const string& pat, const RE_Options& option) { Init(pat, &option); }
       
   493   RE(const char* pat) { Init(pat, NULL); }
       
   494   RE(const char* pat, const RE_Options& option) { Init(pat, &option); }
       
   495   RE(const unsigned char* pat) {
       
   496     Init(reinterpret_cast<const char*>(pat), NULL);
       
   497   }
       
   498   RE(const unsigned char* pat, const RE_Options& option) {
       
   499     Init(reinterpret_cast<const char*>(pat), &option);
       
   500   }
       
   501 
       
   502   // Copy constructor & assignment - note that these are expensive
       
   503   // because they recompile the expression.
       
   504   RE(const RE& re) { Init(re.pattern_, &re.options_); }
       
   505   const RE& operator=(const RE& re) {
       
   506     if (this != &re) {
       
   507       Cleanup();
       
   508 
       
   509       // This is the code that originally came from Google
       
   510       // Init(re.pattern_.c_str(), &re.options_);
       
   511 
       
   512       // This is the replacement from Ari Pollak
       
   513       Init(re.pattern_, &re.options_);
       
   514     }
       
   515     return *this;
       
   516   }
       
   517 
       
   518 
       
   519   ~RE();
       
   520 
       
   521   // The string specification for this RE.  E.g.
       
   522   //   RE re("ab*c?d+");
       
   523   //   re.pattern();    // "ab*c?d+"
       
   524   const string& pattern() const { return pattern_; }
       
   525 
       
   526   // If RE could not be created properly, returns an error string.
       
   527   // Else returns the empty string.
       
   528   const string& error() const { return *error_; }
       
   529 
       
   530   /***** The useful part: the matching interface *****/
       
   531 
       
   532   // This is provided so one can do pattern.ReplaceAll() just as
       
   533   // easily as ReplaceAll(pattern-text, ....)
       
   534 
       
   535   bool FullMatch(const StringPiece& text,
       
   536                  const Arg& ptr1 = no_arg,
       
   537                  const Arg& ptr2 = no_arg,
       
   538                  const Arg& ptr3 = no_arg,
       
   539                  const Arg& ptr4 = no_arg,
       
   540                  const Arg& ptr5 = no_arg,
       
   541                  const Arg& ptr6 = no_arg,
       
   542                  const Arg& ptr7 = no_arg,
       
   543                  const Arg& ptr8 = no_arg,
       
   544                  const Arg& ptr9 = no_arg,
       
   545                  const Arg& ptr10 = no_arg,
       
   546                  const Arg& ptr11 = no_arg,
       
   547                  const Arg& ptr12 = no_arg,
       
   548                  const Arg& ptr13 = no_arg,
       
   549                  const Arg& ptr14 = no_arg,
       
   550                  const Arg& ptr15 = no_arg,
       
   551                  const Arg& ptr16 = no_arg) const;
       
   552 
       
   553   bool PartialMatch(const StringPiece& text,
       
   554                     const Arg& ptr1 = no_arg,
       
   555                     const Arg& ptr2 = no_arg,
       
   556                     const Arg& ptr3 = no_arg,
       
   557                     const Arg& ptr4 = no_arg,
       
   558                     const Arg& ptr5 = no_arg,
       
   559                     const Arg& ptr6 = no_arg,
       
   560                     const Arg& ptr7 = no_arg,
       
   561                     const Arg& ptr8 = no_arg,
       
   562                     const Arg& ptr9 = no_arg,
       
   563                     const Arg& ptr10 = no_arg,
       
   564                     const Arg& ptr11 = no_arg,
       
   565                     const Arg& ptr12 = no_arg,
       
   566                     const Arg& ptr13 = no_arg,
       
   567                     const Arg& ptr14 = no_arg,
       
   568                     const Arg& ptr15 = no_arg,
       
   569                     const Arg& ptr16 = no_arg) const;
       
   570 
       
   571   bool Consume(StringPiece* input,
       
   572                const Arg& ptr1 = no_arg,
       
   573                const Arg& ptr2 = no_arg,
       
   574                const Arg& ptr3 = no_arg,
       
   575                const Arg& ptr4 = no_arg,
       
   576                const Arg& ptr5 = no_arg,
       
   577                const Arg& ptr6 = no_arg,
       
   578                const Arg& ptr7 = no_arg,
       
   579                const Arg& ptr8 = no_arg,
       
   580                const Arg& ptr9 = no_arg,
       
   581                const Arg& ptr10 = no_arg,
       
   582                const Arg& ptr11 = no_arg,
       
   583                const Arg& ptr12 = no_arg,
       
   584                const Arg& ptr13 = no_arg,
       
   585                const Arg& ptr14 = no_arg,
       
   586                const Arg& ptr15 = no_arg,
       
   587                const Arg& ptr16 = no_arg) const;
       
   588 
       
   589   bool FindAndConsume(StringPiece* input,
       
   590                       const Arg& ptr1 = no_arg,
       
   591                       const Arg& ptr2 = no_arg,
       
   592                       const Arg& ptr3 = no_arg,
       
   593                       const Arg& ptr4 = no_arg,
       
   594                       const Arg& ptr5 = no_arg,
       
   595                       const Arg& ptr6 = no_arg,
       
   596                       const Arg& ptr7 = no_arg,
       
   597                       const Arg& ptr8 = no_arg,
       
   598                       const Arg& ptr9 = no_arg,
       
   599                       const Arg& ptr10 = no_arg,
       
   600                       const Arg& ptr11 = no_arg,
       
   601                       const Arg& ptr12 = no_arg,
       
   602                       const Arg& ptr13 = no_arg,
       
   603                       const Arg& ptr14 = no_arg,
       
   604                       const Arg& ptr15 = no_arg,
       
   605                       const Arg& ptr16 = no_arg) const;
       
   606 
       
   607   bool Replace(const StringPiece& rewrite,
       
   608                string *str) const;
       
   609 
       
   610   int GlobalReplace(const StringPiece& rewrite,
       
   611                     string *str) const;
       
   612 
       
   613   bool Extract(const StringPiece &rewrite,
       
   614                const StringPiece &text,
       
   615                string *out) const;
       
   616 
       
   617   // Escapes all potentially meaningful regexp characters in
       
   618   // 'unquoted'.  The returned string, used as a regular expression,
       
   619   // will exactly match the original string.  For example,
       
   620   //           1.5-2.0?
       
   621   // may become:
       
   622   //           1\.5\-2\.0\?
       
   623   // Note QuoteMeta behaves the same as perl's QuoteMeta function,
       
   624   // *except* that it escapes the NUL character (\0) as backslash + 0,
       
   625   // rather than backslash + NUL.
       
   626   static string QuoteMeta(const StringPiece& unquoted);
       
   627 
       
   628 
       
   629   /***** Generic matching interface *****/
       
   630 
       
   631   // Type of match (TODO: Should be restructured as part of RE_Options)
       
   632   enum Anchor {
       
   633     UNANCHORED,         // No anchoring
       
   634     ANCHOR_START,       // Anchor at start only
       
   635     ANCHOR_BOTH         // Anchor at start and end
       
   636   };
       
   637 
       
   638   // General matching routine.  Stores the length of the match in
       
   639   // "*consumed" if successful.
       
   640   bool DoMatch(const StringPiece& text,
       
   641                Anchor anchor,
       
   642                int* consumed,
       
   643                const Arg* const* args, int n) const;
       
   644 
       
   645   // Return the number of capturing subpatterns, or -1 if the
       
   646   // regexp wasn't valid on construction.
       
   647   int NumberOfCapturingGroups() const;
       
   648 
       
   649   // The default value for an argument, to indicate no arg was passed in
       
   650   static Arg no_arg;
       
   651 
       
   652  private:
       
   653 
       
   654   void Init(const string& pattern, const RE_Options* options);
       
   655   void Cleanup();
       
   656 
       
   657   // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
       
   658   // pairs of integers for the beginning and end positions of matched
       
   659   // text.  The first pair corresponds to the entire matched text;
       
   660   // subsequent pairs correspond, in order, to parentheses-captured
       
   661   // matches.  Returns the number of pairs (one more than the number of
       
   662   // the last subpattern with a match) if matching was successful
       
   663   // and zero if the match failed.
       
   664   // I.e. for RE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
       
   665   // against "foo", "bar", and "baz" respectively.
       
   666   // When matching RE("(foo)|hello") against "hello", it will return 1.
       
   667   // But the values for all subpattern are filled in into "vec".
       
   668   int TryMatch(const StringPiece& text,
       
   669                int startpos,
       
   670                Anchor anchor,
       
   671                int *vec,
       
   672                int vecsize) const;
       
   673 
       
   674   // Append the "rewrite" string, with backslash subsitutions from "text"
       
   675   // and "vec", to string "out".
       
   676   bool Rewrite(string *out,
       
   677                const StringPiece& rewrite,
       
   678                const StringPiece& text,
       
   679                int *vec,
       
   680                int veclen) const;
       
   681 
       
   682   // internal implementation for DoMatch
       
   683   bool DoMatchImpl(const StringPiece& text,
       
   684                    Anchor anchor,
       
   685                    int* consumed,
       
   686                    const Arg* const args[],
       
   687                    int n,
       
   688                    int* vec,
       
   689                    int vecsize) const;
       
   690 
       
   691   // Compile the regexp for the specified anchoring mode
       
   692   pcre* Compile(Anchor anchor);
       
   693 
       
   694   string        pattern_;
       
   695   RE_Options    options_;
       
   696   pcre*         re_full_;       // For full matches
       
   697   pcre*         re_partial_;    // For partial matches
       
   698   const string* error_;         // Error indicator (or points to empty string)
       
   699 };
       
   700 
       
   701 }   // namespace pcrecpp
       
   702 
       
   703 #endif /* _PCRECPP_H */