diff -r 000000000000 -r 7f656887cf89 libraries/spcre/inc/cregex.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libraries/spcre/inc/cregex.h Wed Jun 23 15:52:26 2010 +0100 @@ -0,0 +1,792 @@ +// Copyright (c) 2005 - 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Sanjay Ghemawat +// + +// Heavily refactored for Symbian OS by Accenture. + +#ifndef CREGEX_H_ +#define CREGXX_H_ + +#include +#include "tregexoptions.h" + +// Forward declarations +class TRegExArg; +struct real_pcre; +typedef struct real_pcre pcre; +struct pcre_extra; + +// Errors + +/** Base offset for CRegEx Errors */ +const TInt KErrRegExBase = -20000; + +/** Base offset for RegEx pattern compile errors */ +const TInt KErrRegExCompileBase = KErrRegExBase; + +/** \ at end of pattern */ +const TInt KErrRegExCmpBackslashAtEOP = KErrRegExCompileBase - 1; +/** \c at end of pattern */ +const TInt KErrRegExCmpBackslashCAtEOP = KErrRegExCompileBase - 2; +/** unrecognized character follows \ */ +const TInt KErrRegExCmpUnrecCharAftBackslash = KErrRegExCompileBase - 3; +/** numbers out of order in {} quantifier */ +const TInt KErrRegExCmpNumsOutOfOrderInBraceQuantifier = KErrRegExCompileBase - 4; +/** number too big in {} quantifier */ +const TInt KErrRegExCmpNumTooBigInBraceQuantifier = KErrRegExCompileBase - 5; +/** missing terminating ] for character class */ +const TInt KErrRegExCmpMissingTermBracketInCharClass = KErrRegExCompileBase - 6; +/** invalid escape sequence in character class */ +const TInt KErrRegExCmpInvalidEscapeSeqInCharClass = KErrRegExCompileBase - 7; +/** range out of order in character class */ +const TInt KErrRegExCmpRangeOutOfOrderInCharClass = KErrRegExCompileBase - 8; +/** nothing to repeat */ +const TInt KErrRegExCmpNothingToRepeat = KErrRegExCompileBase - 9; +/** operand of unlimited repeat could match the empty string - no longer used */ +const TInt KErrRegExCmpUnused01 = KErrRegExCompileBase - 10; +/** internal error: unexpected repeat */ +const TInt KErrRegExCmpUnexpectedRepeat = KErrRegExCompileBase - 11; +/** unrecognized character after (? or (?-\0t */ +const TInt KErrRegExCmpUnexpectedCharAftParenthQuest = KErrRegExCompileBase - 12; +/** POSIX named classes are supported only within a class */ +const TInt KErrRegExCmpPosixNamedSupportedWithinClass = KErrRegExCompileBase - 13; +/** missing ) */ +const TInt KErrRegExCmpMissingCloseParenth = KErrRegExCompileBase - 14; +/** reference to non-existent subpattern */ +const TInt KErrRegExCmpRefNonExistSubpattern = KErrRegExCompileBase - 15; +/** internal error: erroffset passed as NULL */ +const TInt KErrRegExCmpErrOffsetNull = KErrRegExCompileBase - 16; +/** unknown option bit(s) set */ +const TInt KErrRegExCmpUnknownOptionBitsSet = KErrRegExCompileBase - 17; +/** missing ) after comment */ +const TInt KErrRegExCmpMissingCloseParenthAftComment = KErrRegExCompileBase - 18; +/** parentheses nested too deeply - no longer used */ +const TInt KErrRegExCmpUnused02 = KErrRegExCompileBase - 19; +/** regular expression is too large */ +const TInt KErrRegExCmpExprTooLarge = KErrRegExCompileBase - 20; +/** failed to get memory */ +const TInt KErrRegExCmpFailedGetMemory = KErrRegExCompileBase - 21; +/** unmatched parentheses */ +const TInt KErrRegExCmpUnmatchedParenth = KErrRegExCompileBase - 22; +/** internal error: code overflow */ +const TInt KErrRegExCmpCodeOverflow = KErrRegExCompileBase - 23; +/** unrecognized character after (?< */ +const TInt KErrRegExCmpUnRecogCharAftParenthQuestAngle = KErrRegExCompileBase - 24; +/** lookbehind assertion is not fixed length*/ +const TInt KErrRegExCmpLookbehindAssertNotFixedLen = KErrRegExCompileBase - 25; +/** malformed number or name after (?( */ +const TInt KErrRegExCmpMalformedAftParenthQuestParenth = KErrRegExCompileBase - 26; +/** conditional group contains more than two branches */ +const TInt KErrRegExCmpCondGroupMoreThanTwoBranches = KErrRegExCompileBase - 27; +/** assertion expected after (?(\0 */ +const TInt KErrRegExCmpAssertExpAftParenthQuestParent = KErrRegExCompileBase - 28; +/** (?R or (?[+-]digits must be followed by ) */ +const TInt KErrRegExCmpMustFollowedByCloseParenth = KErrRegExCompileBase - 29; +/** unknown POSIX class name */ +const TInt KErrRegExCmpUnknownPosixClassName = KErrRegExCompileBase - 30; +/** POSIX collating elements are not supported */ +const TInt KErrRegExCmpPosixCollElemsNotSupported = KErrRegExCompileBase - 31; +/** this version of PCRE is not compiled with PCRE_UTF8 support */ +const TInt KErrRegExCmpNotCompiledWithUtf8Support = KErrRegExCompileBase - 32; +/** spare error - no longer user */ +const TInt KErrRegExCmpUnused03 = KErrRegExCompileBase - 33; +/** character value in \x{...} sequence is too large */ +const TInt KErrRegExCmpCharValueInBackslashXSeqTooLarge = KErrRegExCompileBase - 34; +/** invalid condition (?(0) */ +const TInt KErrRegExCmpInvalidCondition = KErrRegExCompileBase - 35; +/** \C not allowed in lookbehind assertion */ +const TInt KErrRegExCmpBackslashCNotAllowedinLookbehind = KErrRegExCompileBase - 36; +/** PCRE does not support \L, \l, \N, \U, or \u\0 */ +const TInt KErrRegExCmpLNUEscapeSeqNotSupported = KErrRegExCompileBase - 37; +/** number after (?C is > 255 */ +const TInt KErrRegExCmpNumAftParenthQuestCIsGreatherThan = KErrRegExCompileBase - 38; +/** closing ) for (?C expected\ */ +const TInt KErrRegExCmpCloseParenthAftParenthQuestCExp = KErrRegExCompileBase - 39; +/** recursive call could loop indefinitely */ +const TInt KErrRegExCmpRecuriveCallLoopIndef = KErrRegExCompileBase - 40; +/** unrecognized character after (?P */ +const TInt KErrRegExCmpUnrecCharaftParenthQuestP = KErrRegExCompileBase - 41; +/** syntax error in subpattern name (missing terminator) */ +const TInt KErrRegExCmpSyntaxInSubpatternName = KErrRegExCompileBase - 42; +/** two named subpatterns have the same name */ +const TInt KErrRegExCmpTwoSubpatternsHaveSameName = KErrRegExCompileBase - 43; +/** invalid UTF-8 string */ +const TInt KErrRegExCmpInvalidUtf8String = KErrRegExCompileBase - 44; +/** support for \P, \p, and \X has not been compiled */ +const TInt KErrRegExCmpSupportForEscapeSeqNotCompiled = KErrRegExCompileBase - 45; +/** malformed \P or \p sequence */ +const TInt KErrRegExCmpMalformedBackslashPSeq = KErrRegExCompileBase - 46; +/** unknown property name after \P or \p */ +const TInt KErrRegExCmpUnknownPropNameAftBackslashPSeq = KErrRegExCompileBase - 47; +/** subpattern name is too long. Default max = 32 chars. See MAX_NAME_SIZE */ +const TInt KErrRegExCmpSubpatternNameTooLong = KErrRegExCompileBase - 48; +/** subpattern name is too long. Default max = 10000. See MAX_NAME_COUNT */ +const TInt KErrRegExCmpTooManyNamesSubpatterns = KErrRegExCompileBase - 49; +/** repeated subpattern is too long - no longer used */ +const TInt KErrRegExCmpUnused04 = KErrRegExCompileBase - 50; +/** octal value is greater than \\377 (not in UTF-8 mode) */ +const TInt KErrRegExCmpOctalValueGreatherThan377 = KErrRegExCompileBase - 51; +/** internal error: overran compiling workspace */ +const TInt KErrRegExCmpOverranCompilingSpace = KErrRegExCompileBase - 52; +/** internal error: previously-checked referenced subpattern not found */ +const TInt KErrRegExCmpCheckedSubpatternNotFound = KErrRegExCompileBase - 53; +/** DEFINE group contains more than one branch */ +const TInt KErrRegExCmpDefineGroupMoreThanOneBranch = KErrRegExCompileBase - 54; +/** repeating a DEFINE group is not allowed */ +const TInt KErrRegExCmpRepeatingDefineGroupNotAllowed = KErrRegExCompileBase - 55; +/** inconsistent NEWLINE options */ +const TInt KErrRegExCmpInconsistantNewlineOpts = KErrRegExCompileBase - 56; +/** \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number */ +const TInt KErrRegExCmpBackslashGNotFollowed = KErrRegExCompileBase - 57; +/** a numbered reference must not be zero */ +const TInt KErrRegExCmpNumberedRefIsZero = KErrRegExCompileBase - 58; +/** (*VERB) with an argument is not supported */ +const TInt KErrRegExCmpAsteriskVerbWithArgNotSupported = KErrRegExCompileBase - 59; +/** (*VERB) not recognized */ +const TInt KErrRegExCmpAsteriskVerbNotRecog = KErrRegExCompileBase - 60; +/** number is too big */ +const TInt KErrRegExCmpNumTooBig = KErrRegExCompileBase - 61; +/** subpattern name expected */ +const TInt KErrRegExCmpSubpatternNameExp = KErrRegExCompileBase - 62; +/** digit expected after (?+ */ +const TInt KErrRegExCmpDigitExpAftParenthQuestPlus = KErrRegExCompileBase - 63; +/** ] is an invalid data character in JavaScript compatibility mode */ +const TInt KErrRegExCloseBracketInvalidInJSCompatMode = KErrRegExCompileBase - 64; + +/** Base offset for RegEx pattern general errors */ +const TInt KErrRegExGeneralBase = KErrRegExCompileBase - 512; + +/** Zero Matches */ +const TInt KErrRegExZeroMatches = KErrRegExGeneralBase - 1; + +/** Regular expression has fewer capturing groups than number of args passed in */ +const TInt KErrRegExFewerCaptureGroupsThanArgs = KErrRegExGeneralBase - 2; + +/** Failed to parse argument, e.g. the supplied argument does not have enough capacity */ +const TInt KErrRegExFailedToParseArg = KErrRegExGeneralBase - 3; + +/** The modifiable descriptor supplied to recieve output does not have a large enough maximum length */ +const TInt KErrRegExOutputTooBig = KErrRegExGeneralBase - 4; + +/** Invalid rewrite pattern */ +const TInt KErrRegExInvalidRewritePattern = KErrRegExGeneralBase - 5; + +/** Error with one of the backslash substitutions in the supplied rewrite string */ +const TInt KErrRegExBadBackslashSubsitution = KErrRegExGeneralBase - 6; + +// Panic Codes +_LIT(KRegExPanic, "CRegEx"); + +/** + * Internal CRegEx panics (Debug only) + */ +enum TRegExPanic + { + EInvalidMatchResults, + EUnexpectedRetValFromPcre, + EVectorTooSmall, + EInvalidNumArgs + }; + +/* + * Newlines are indicated by a single LF character. + * @see KNewLineCr + * @see KNewLineCrLf + * @see KNewLineAnyCrLf + * @see KNewLineAny + */ +static const TInt KNewLineLf = 10; +/* + * Newlines are indicated by a single CR character. + * @see KNewLineLf + * @see KNewLineCrLf + * @see KNewLineAnyCrLf + * @see KNewLineAny + */ +static const TInt KNewLineCr = 13; + +/* + * Newlines are indicated by the two-character CRLF sequence. + * @see KNewLineCr + * @see KNewLineLf + * @see KNewLineAnyCrLf + * @see KNewLineAny + */ +static const TInt KNewLineCrLf = 3338; + +/* + * Newlines are indicated by any of the following: + * - A single CR character. + * - A single LF character. + * - The two-character CRLF sequence. + * @see KNewLineLf + * @see KNewLineCr + * @see KNewLineCrLf + * @see KNewLineAny + */ +static const TInt KNewLineAnyCrLf = -2; + +/* + * Newlines are indicated by any Unicode sequence: + * - A single CR character. + * - A single LF character. + * - The two-character CRLF sequence. + * - A single VT character (vertical tab, U+000B). + * - A single FF character (formfeed, U+000C). + * - A single NEL character (next line, U+0085). + * - A single LS character (line separator, U+2028). + * - A single PS character (paragraph separator, U+2029). + * The last two are recognized only in UTF-8 mode. + * @see KNewLineLf + * @see KNewLineCr + * @see KNewLineCrLf + * @see KNewLineAnyCrLf + */ +static const TInt KNewLineAny = -1; + +/** + * Symbian C++ interface to the pcre regular-expression library. This class, its + * supporting classes and most of the following documentation is largely based + * on or taken from the C++ wrapper included with source distributions of PCRE + * to which all credit should be given. + * + * CRegEx supports + * Perl-style regular expressions (with extensions like \d, \w, \s, + * ...). + * + * NOTE: These following examples make liberal use of _L8() purely for clarity + * and not because it is recommend. In fact, it is strongly discouraged + * in favour of _LIT8() as per the standard Symbian coding conventions. + * ----------------------------------------------------------------------- + * REGEXP SYNTAX: + * + * This module is part of the pcre library and hence supports its syntax + * for regular expressions. + * + * The syntax is pretty similar to Perl's. For those not familiar + * with Perl's regular expressions, here are some examples of the most + * commonly used extensions: + * + * "hello (\\w+) world" -- \w matches a "word" character + * "version (\\d+)" -- \d matches a digit + * "hello\\s+world" -- \s matches any whitespace character + * "\\b(\\w+)\\b" -- \b matches empty string at a word boundary + * "(?i)hello" -- (?i) turns on case-insensitive matching + * + * ----------------------------------------------------------------------- + * MATCHING INTERFACE: + * + * The FullMatchL() operation checks that supplied text matches a + * supplied pattern exactly. + * + * Example: successful match + * @code + * CRegEx* re = CRegEx::NewLC(_L8("h.*o")); + * re->FullMatchL(_L8("hello")); + * CleanupStack::PopAndDestroy(re); + * @endcode + * + * Example: unsuccessful match (requires full match): + * @code + * CRegEx* re = CRegEx::NewLC(_L8("e")); + * !re->FullMatchL(_L8("hello")); + * CleanupStack::PopAndDestroy(re); + * @endcode + * + * ----------------------------------------------------------------------- + * MATCHING WITH SUB-STRING EXTRACTION: + * + * You can supply extra pointer arguments to extract matched subpieces. + * + * Example: extracts "ruby" into "s" and 1234 into "i" + * @code + * TInt i; + * TBuf<4> s; + * CRegEx* re = CRegEx::NewLC(_L8("(\\w+):(\\d+)")); + * re->FullMatchL(_L8("ruby:1234"), &s, &i); + * CleanupStack::PopAndDestroy(re); + * @endcode + * + * Example: does not try to extract any extra sub-patterns + * @code + * re->FullMatchL(_L8("ruby:1234"), &s); + * @endcode + * + * Example: does not try to extract into NULL + * @code + * re->FullMatchL(_L8("ruby:1234"), NULL, &i); + * @endcode + * + * Example: integer overflow causes failure + * @code + * !re.FullMatchL(_L8("ruby:1234567891234"), NULL, &i); + * @endcode + * + * Example: fails because there aren't enough sub-patterns: + * @code + * TBuf<4> s; + * CRegEx* re = CRegEx::NewLC(_L8("\\w+:\\d+")); + * !re->FullMatchL(_L8("ruby:1234"), &s); + * CleanupStack::PopAndDestroy(re); + * @endcode + * + * Example: fails because string cannot be stored in integer + * @code + * TInt i; + * CRegEx* re = CRegEx::NewLC(_L8("(.*)")); + * !re->FullMatchL(_L8("ruby"), &i); + * CleanupStack::PopAndDestroy(re); + * @endcode + * + * The provided pointer arguments can be pointers to any scalar numeric + * type, or one of + * TDes8 (matched piece is copied to descriptor) + * TPtrC8 (matched piece is pointed to by) + * T (where "TBool T::ParseFrom(const TDesC8&)" exists) + * NULL (the corresponding matched sub-pattern is not copied) + * + * CAVEAT: An optional sub-pattern that does not exist in the matched + * string is assigned the empty string. Therefore, the following will + * return false (because the empty string is not a valid number): + * @code + * TInt number; + * CRegEx* re = CRegEx::NewLC(_L8("abc")); + * re->FullMatchL(_L8("[a-z]+(\\d+)?"), &number); + * CleanupStack::PopAndDestroy(re); + * @endcode + * + * ----------------------------------------------------------------------- + * DO_MATCH + * + * The matching interface supports at most 4 arguments per call. + * If you need more, consider using the more general interface + * CRegEx::DoMatchL(). + * + * ----------------------------------------------------------------------- + * PARTIAL MATCHES + * + * You can use the PartialMatchL() operation when you want the pattern + * to match any substring of the text. + * + * Example: simple search for a string: + * @code + * CRegEx* re = CRegEx::NewLC(_L8("ell")); + * re->PartialMatchL(_L8("hello")); + * CleanupStack::PopAndDestroy(re); + * @endcode + * + * Example: find first number in a string: + * @code + * TInt number; + * CRegEx* re = CRegEx::NewLC(_L8("(\\d+)")); + * re->PartialMatchL(_L8("x*100 + 20"), &number); + * ASSERT(number == 100); + * CleanupStack::PopAndDestroy(re); + * @endcode + * + * ----------------------------------------------------------------------- + * UTF-8 AND THE MATCHING INTERFACE: + * + * By default, pattern and text are plain text, one byte per character. + * The UTF8 flag, passed to the constructor, causes both pattern + * and string to be treated as UTF-8 text, still a byte stream but + * potentially multiple bytes per character. In practice, the text + * is likelier to be UTF-8 than the pattern, but the match returned + * may depend on the UTF8 flag, so always use it when matching + * UTF8 text. E.g., "." will match one byte normally but with UTF8 + * set may match up to three bytes of a multi-byte character. + * + * Example: + * @code + * TRegExOptions options; + * options.SetUtf8(ETrue); + * CRegEx* re = CRegEx::NewLC(utf8Pattern); + * re->FullMatchL(utf8String); + * CleanupStack::PopAndDestroy(re); + * @endcode + * NOTE: The UTF8 option is ignored if libpcre was not compiled with the + * SUPPORT_UTF8 macro. + * + * ----------------------------------------------------------------------- + * PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE + * + * SPCRE defines some modifiers to change the behavior of the regular + * expression engine. + * The C++ wrapper defines an auxiliary class, TRegExOptions, as a vehicle + * to pass such modifiers to a CRegEx class. + * + * Currently, the following modifiers are supported + * + * modifier description Perl corresponding + * + * EPcreCaseless case insensitive match /i + * EPcreMultiline multiple lines match /m + * EPcreDotAll dot matches newlines /s + * EPcreDollarEndOnly $ matches only at end N/A + * EPcreExtra strict escape parsing N/A + * EPcreExtended ignore whitespaces /x + * EPcreUtf8 handles UTF8 chars built-in + * EPcreUngreedy reverses * and *? N/A + * EPcreNoAutoCapture disables matching parens N/A (*) + * + * (For a full account on how each modifier works, please check the + * PCRE API reference manual). + * + * (*) Both Perl and PCRE allow non matching parentheses by means of the + * "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not + * capture, while (ab|cd) does. + * + * For each modifier, there are two member functions whose name is made + * out of the modifier , without the "EPcre" prefix. For + * instance, EPcreCaseless is handled by + * TBool Caseless(), + * which returns ETrue if the modifier is set, and + * TRegExOptions SetCaseless(TBool), + * which sets or unsets the modifier. + * + * Moreover, PCRE_EXTRA_MATCH_LIMIT can be accessed through the + * SetMatchLimit() and MatchLimit() member functions. + * Setting the match limit to a non-zero value will limit the executation of + * SPCRE to keep it from doing bad things like blowing the stack or taking + * an eternity to return a result. A value of 5000 is good enough to stop + * stack blowup in a 2MB thread stack. Setting MathLimit to zero will + * disable match limiting. Alternately, you can set MatchLimitRecursion() + * which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much SPCRE + * recurses. MatchLimit() caps the number of matches pcre does; + * MatchLimitRecursion() caps the depth of recursion. + * + * Normally, to pass one or more modifiers to a CRegEx class, you declare + * a TRegExOptions object, set the appropriate options, and pass this + * object to a CRegEx constructor. Example: + * + * @code + * TRegExOptions opt; + * opt.setCaseless(ETrue); + * CRegEx* re = CRegEx::NewLC(_L8("HELLO"), opt); + * if(re->PartialMatchL(_L8("hello world"))) ... + * @endcode + * + * ----------------------------------------------------------------------- + * SCANNING TEXT INCREMENTALLY + * + * The ConsumeL() operation may be useful if you want to repeatedly + * match regular expressions at the front of a string and skip over + * them as they match. This requires use of the "StringPiece" type, + * which represents a sub-range of a real string. Like RE, StringPiece + * is defined in the pcrecpp namespace. + * + * Example: read lines of the form "var = value" from a string. + * @code + * TBuf8 contents; + * // fill contents somehow + * TBuf8 var; + * TInt value; + * CRegEx* re = CRegEx::NewLC(_L8("(\\w+) = (\\d+)\n")); + * while(re->ConsumeL(contents, &var, &value)) + * { + * ... + * } + * CleanupStack::PopAndDestroy(re); + * @endcode + * + * Each successful call to ConsumeL will set "var/value", and also + * advance "contents" so it points past the matched text. + * + * The FindAndConsumeL() operation is similar to ConsumeL() but does not + * anchor your match at the beginning of the string. For example, you + * could extract all words from a string by repeatedly calling + * @code + * TBuf8 contents; + * // fill contents somehow + * TBuf8 word; + * TInt value; + * CRegEx* re = CRegEx::NewLC(_L8("(\\w+)")); + * while(re->FindAndConsumeL(contents, &word)) + * { + * ... + * } + * CleanupStack::PopAndDestroy(re); + * @endcode + * ----------------------------------------------------------------------- + * PARSING HEX/OCTAL NUMBERS + * + * By default, if you pass a pointer to a numeric value, the + * corresponding text is interpreted as a base-10 number. You can + * instead wrap the pointer with a call to one of the operators Hex(), + * or Octal() to interpret the text in another base. + * + * Example: + * @code + * TInt a, b; + * CRegEx* re = CRegEx::NewLC(_L8("(.*) (.*)")); + * re->FullMatchL(_L8("100 40"), Hex(&a), Octal(&b)); + * CleanupStack::PopAndDestroy(re); + * @endcode + * will leave 64 in a and b. + * + * ----------------------------------------------------------------------- + * REPLACING PARTS OF STRINGS + * + * You can replace the first match of aPattern in aString with + * aRewrite. Within aRewrite, backslash-escaped digits (\1 to \9) + * can be used to insert text matching corresponding parenthesized + * group from the pattern. \0 in aRewrite refers to the entire + * matching text. E.g., + * @code + * _LIT8(KYabbaDabbaDoo, "yabba dabba doo"); + * TBuf8<20> s; + * CRegEx* re = CRegEx::NewLC(_L8("b+"); + * re->ReplaceL(KYabbaDabbaDoo(), s); + * CleanupStack::PopAndDestroy(re); + * @endcode + * will leave "s" containing "yada dabba doo". The result is ETrue if + * the pattern matches and a replacement occurs, or Efalse otherwise. + * + * GlobalReplaceL() is like Replace:(), except that it replaces all + * occurrences of the pattern in the string with the rewrite. + * Replacements are not subject to re-matching. E.g., + * @code + * _LIT8(KYabbaDabbaDoo, "yabba dabba doo"); + * TBuf8<20> s; + * CRegEx* re = CRegEx::NewLC(_L8("b+"); + * re->GlobalReplaceL(_L8("d"), s); + * CleanupStack::PopAndDestroy(re); + * @endcode + * will leave "s" containing "yada dada doo". It returns the number + * of replacements made. + * + * ExtractL() is like Replace(), except that if the pattern matches, + * aRewrite is copied into aOut (an additional argument) with + * substitutions. The non-matching portions of aText are ignored. + * Returns ETrue if a match occurred and the extraction happened + * successfully. If no match occurs, the string is left unaffected. + */ +class CRegEx : public CBase + { +public: + // Type of match (TODO: Should be restructured as part of TRegExOptions) + enum TAnchor + { + EUnanchored, /** No anchoring */ + EAnchorStart, /** Anchor at start only */ + EAnchorBoth /** Anchor at start and end */ + }; + +public: + IMPORT_C static CRegEx* NewL(const TDesC8& aPattern); + IMPORT_C static CRegEx* NewL(const TDesC8& aPattern, const TRegExOptions& aOptions); + IMPORT_C static CRegEx* NewLC(const TDesC8& aPattern); + IMPORT_C static CRegEx* NewLC(const TDesC8& aPattern, const TRegExOptions& aOptions); + + IMPORT_C static CRegEx* NewL(const TDesC16& aPattern, const TRegExOptions& aOptions); + + IMPORT_C ~CRegEx(); + + inline const TDesC8& Pattern() const; + inline TInt Error() const; + + IMPORT_C TBool FullMatchL(const TDesC8& aText) const; + + IMPORT_C TBool FullMatchL(const TDesC8& aText, + const TRegExArg& aArg1) const; + + IMPORT_C TBool FullMatchL(const TDesC8& aText, + const TRegExArg& aArg1, + const TRegExArg& aArg2) const; + + IMPORT_C TBool FullMatchL(const TDesC8& aText, + const TRegExArg& aArg1, + const TRegExArg& aArg2, + const TRegExArg& aArg3) const; + + IMPORT_C TBool FullMatchL(const TDesC8& aText, + const TRegExArg& aArg1, + const TRegExArg& aArg2, + const TRegExArg& aArg3, + const TRegExArg& aArg4) const; + + IMPORT_C TBool PartialMatchL(const TDesC8& aText) const; + + IMPORT_C TBool PartialMatchL(const TDesC8& aText, + const TRegExArg& aArg1) const; + + IMPORT_C TBool PartialMatchL(const TDesC8& aText, + const TRegExArg& aArg1, + const TRegExArg& aArg2) const; + + IMPORT_C TBool PartialMatchL(const TDesC8& aText, + const TRegExArg& aArg1, + const TRegExArg& aArg2, + const TRegExArg& aArg3) const; + + IMPORT_C TBool PartialMatchL(const TDesC8& aText, + const TRegExArg& aArg1, + const TRegExArg& aArg2, + const TRegExArg& aArg3, + const TRegExArg& aArg4) const; + + IMPORT_C TBool DoMatchL(const TDesC8& aText, + TAnchor aAnchor, + TInt& aConsumed) const; + + IMPORT_C TBool DoMatchL(const TDesC8& aText, + TAnchor aAnchor, + TInt& aConsumed, + const TRegExArg& aArg1) const; + + IMPORT_C TBool DoMatchL(const TDesC8& aText, + TAnchor aAnchor, + TInt& aConsumed, + const TRegExArg& aArg1, + const TRegExArg& aArg2) const; + + IMPORT_C TBool DoMatchL(const TDesC8& aText, + TAnchor aAnchor, + TInt& aConsumed, + const TRegExArg& aArg1, + const TRegExArg& aArg2, + const TRegExArg& aArg3) const; + + IMPORT_C TBool DoMatchL(const TDesC8& aText, + TAnchor aAnchor, + TInt& aConsumed, + const TRegExArg& aArg1, + const TRegExArg& aArg2, + const TRegExArg& aArg3, + const TRegExArg& aArg4) const; + + IMPORT_C TBool DoMatchL(const TDesC8& aText, + TAnchor aAnchor, + TInt& aConsumed, + const RPointerArray& aArgs) const; + + + IMPORT_C TBool ConsumeL(TDes8& aText) const; + + IMPORT_C TBool ConsumeL(TDes8& aText, + const TRegExArg& aArg1) const; + + IMPORT_C TBool ConsumeL(TDes8& aText, + const TRegExArg& aArg1, + const TRegExArg& aArg2) const; + + IMPORT_C TBool ConsumeL(TDes8& aText, + const TRegExArg& aArg1, + const TRegExArg& aArg2, + const TRegExArg& aArg3) const; + + IMPORT_C TBool ConsumeL(TDes8& aText, + const TRegExArg& aArg1, + const TRegExArg& aArg2, + const TRegExArg& aArg3, + const TRegExArg& aArg4) const; + + IMPORT_C TBool FindAndConsumeL(TDes8& aText) const; + + IMPORT_C TBool FindAndConsumeL(TDes8& aText, + const TRegExArg& aArg1) const; + + IMPORT_C TBool FindAndConsumeL(TDes8& aText, + const TRegExArg& aArg1, + const TRegExArg& aArg2) const; + + IMPORT_C TBool FindAndConsumeL(TDes8& aText, + const TRegExArg& aArg1, + const TRegExArg& aArg2, + const TRegExArg& aArg3) const; + + IMPORT_C TBool FindAndConsumeL(TDes8& aText, + const TRegExArg& aArg1, + const TRegExArg& aArg2, + const TRegExArg& aArg3, + const TRegExArg& aArg4) const; + + IMPORT_C TBool ReplaceL(const TDesC8& aRewrite, TDes8& aString) const; + + IMPORT_C TInt GlobalReplaceL(const TDesC8& aRewrite, TDes8& aString) const; + + IMPORT_C TBool ExtractL(const TDesC8& aRewrite, const TDesC8& aText, TDes8& aOut) const; + + IMPORT_C static TInt NewlineMode(TInt aOptions); + + IMPORT_C static HBufC8* QuoteMetaL(const TDesC8& aUnquoted); + + IMPORT_C TInt NumberOfCapturingGroups() const; + + IMPORT_C void Study(); + +private: + CRegEx(); + CRegEx(const TRegExOptions& aOptions); + + void ConstructL(const TDesC8& aPattern); + void ConstructL(const TDesC16& aPattern); + void CommonConstructL(); + + pcre* CompileL(TAnchor anchor); + void Cleanup(); + + TInt TryMatch(const TDesC8& aText, + TInt aStartPos, + TAnchor aAnchor, + TInt* aVector, + TInt aVectorSize) const; + + + TBool Rewrite(TDes8& aOut, + const TDesC8& aRewrite, + const TDesC8& aText, + TInt* aVector, + TInt aVectorSize, + TInt aMatches) const; + + TBool DoMatchImpl(const TDesC8& aText, + TAnchor aAnchor, + TInt& aConsumed, + const RPointerArray& aArgs, + TInt* aVector, + TInt aVectorSize) const; + + + static void Panic(TRegExPanic aPanic); + + mutable TInt iErrorCode; // Error code for the alst unsuccessful operation. + TInt iErrorOffset; // Offset in pattern where error was detected + HBufC8* iPattern; // Regular expression pattern + TRegExOptions iOptions; // Options used to compile RE pattern. + pcre* iReFull; // For full matches + pcre* iRePartial; // For partial matches + pcre_extra* iExtraPartial; // Study Data for iRePartial + TRegExArg* iNoArg; // Default argument + }; + +#include +#endif /* CREGEX_H_ */