// Copyright (c) 2005 - 2006, Google Inc.// All rights reserved.//// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are// met://// * Redistributions of source code must retain the above copyright// notice, this list of conditions and the following disclaimer.// * Redistributions in binary form must reproduce the above// copyright notice, this list of conditions and the following disclaimer// in the documentation and/or other materials provided with the// distribution.// * Neither the name of Google Inc. nor the names of its// contributors may be used to endorse or promote products derived from// this software without specific prior written permission.//// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.//// Author: Sanjay Ghemawat//// Heavily refactored for Symbian OS by Accenture.#define PCRE_EXP_DECL IMPORT_C // We need to define this because the config.h that defines it when building libpcre isn't exported.#include <fshell/descriptorutils.h>#include "pcre.h"#include "cregex.h"#include "tregexarg.h"/** * Maximum number of optional arguments supported by the matching interface. * If more arguments are required then use the more generic DoMatchL() function. */static const TInt KMaxArgs = 4;/** * Multiplier used to calculate size of vector that stores results from PCRE. * @see KVecSize */static const TInt KPcreWorkspaceMultiplier = 3;/*** "The first two-thirds of the vector is used to pass back captured subpatterns,* each subpattern using a pair of integers. The remaining third of the vector* is used as workspace by pcre_exec() while matching capturing subpatterns,* and is not available for passing back information. The number passed in * vecsize should always be a multiple of three. If it is not, it is rounded* down."*/ static const TInt KVecSize = (1 + KMaxArgs) * KPcreWorkspaceMultiplier;/** * Number of characters in a triplet escape sequence. */static const TInt KEscapeTripletLength = 3;/** * Number of characters to reserve for escape sequences * Reserves enough room for several. */static const TInt KReserveForEscapeChars = KEscapeTripletLength * 4;// Public Functions ///** * 2-Phase constructor for CRegEx objects. * @param aPattern regular expression pattern * @return a pre-compiled regular expression object ready to perform matching. */EXPORT_C CRegEx* CRegEx::NewL(const TDesC8& aPattern) { CRegEx* self = NewLC(aPattern); CleanupStack::Pop(self); return self; }/** * 2-Phase constructor for CRegEx objects. * @param aPattern regular expression pattern. * @param aOptions options to use when compiling regular expression. * @return a pre-compiled regular expression object ready to perform matching. */EXPORT_C CRegEx* CRegEx::NewL(const TDesC8& aPattern, const TRegExOptions& aOptions) { CRegEx* self = NewLC(aPattern, aOptions); CleanupStack::Pop(self); return self; }/** * 2-Phase constructor for CRegEx objects. Object is left on the cleanup stack. * @param aPattern regular expression pattern. * @return a pre-compiled regular expression object ready to perform matching. */EXPORT_C CRegEx* CRegEx::NewLC(const TDesC8& aPattern) { CRegEx* self = new(ELeave) CRegEx(); CleanupStack::PushL(self); self->ConstructL(aPattern); return self; }/** * 2-Phase constructor for CRegEx objects. Object is left on the cleanup stack. * @param aPattern regular expression pattern. * @param aOptions options to use when compiling regular expression. * @return a pre-compiled regular expression object ready to perform matching. */EXPORT_C CRegEx* CRegEx::NewLC(const TDesC8& aPattern, const TRegExOptions& aOptions) { CRegEx* self = new(ELeave) CRegEx(aOptions); CleanupStack::PushL(self); self->ConstructL(aPattern); return self; }EXPORT_C CRegEx* CRegEx::NewL(const TDesC16& aPattern, const TRegExOptions& aOptions) { CRegEx* self = new(ELeave) CRegEx(aOptions); CleanupStack::PushL(self); self->ConstructL(aPattern); CleanupStack::Pop(self); return self; }/** * Standard destructor to free resources. */EXPORT_C CRegEx::~CRegEx() { delete iNoArg; delete iPattern; Cleanup(); }// Public matching interface ///** * Checks if the regular expression (RE) matches the supplied text entirely. * @param aText the text to match against the regular expression. * @return ETrue if the RE matches the supplied text entirely, EFalse otherwise. * @see PartialMatchL() */EXPORT_C TBool CRegEx::FullMatchL(const TDesC8& aText) const { return FullMatchL(aText, *iNoArg, *iNoArg, *iNoArg, *iNoArg); }/** * Checks if the regular expression (RE) matches the supplied text entirely. * @param aText the text to match against the regular expression. * @param aArg1 contains the first extracted subpattern. * @return ETrue if the RE matches the supplied text entirely, EFalse otherwise. * @see PartialMatchL() */EXPORT_C TBool CRegEx::FullMatchL(const TDesC8& aText, const TRegExArg& aArg1) const { return FullMatchL(aText, aArg1, *iNoArg, *iNoArg, *iNoArg); }/** * Checks if the regular expression (RE) matches the supplied text entirely. * @param aText the text to match against the regular expression. * @param aArg1 contains the first extracted subpattern. * @param aArg2 contains the second extracted subpattern. * @return ETrue if the RE matches the supplied text entirely, EFalse otherwise. * @see PartialMatchL() */EXPORT_C TBool CRegEx::FullMatchL(const TDesC8& aText, const TRegExArg& aArg1, const TRegExArg& aArg2) const { return FullMatchL(aText, aArg1, aArg2, *iNoArg, *iNoArg); }/** * Checks if the regular expression (RE) matches the supplied text entirely. * @param aText the text to match against the regular expression. * @param aArg1 contains the first extracted subpattern. * @param aArg2 contains the second extracted subpattern. * @param aArg3 contains the third extracted subpattern. * @return ETrue if the RE matches the supplied text entirely, EFalse otherwise. * @see PartialMatchL() */EXPORT_C TBool CRegEx::FullMatchL(const TDesC8& aText, const TRegExArg& aArg1, const TRegExArg& aArg2, const TRegExArg& aArg3) const { return FullMatchL(aText, aArg1, aArg2, aArg3, *iNoArg); }/** * Checks if the regular expression (RE) matches the supplied text entirely. * @param aText the text to match against the regular expression. * @param aArg1 contains the first extracted subpattern. * @param aArg2 contains the second extracted subpattern. * @param aArg3 contains the third extracted subpattern. * @param aArg4 contains the fourth extracted subpattern. * @return ETrue if the RE matches the supplied text entirely, EFalse otherwise. * @see PartialMatchL() */EXPORT_C TBool CRegEx::FullMatchL(const TDesC8& aText, const TRegExArg& aArg1, const TRegExArg& aArg2, const TRegExArg& aArg3, const TRegExArg& aArg4) const { RPointerArray<const TRegExArg> args; CleanupClosePushL(args); args.ReserveL(KMaxArgs); if (&aArg1 != iNoArg) { args.AppendL(&aArg1); } if (&aArg2 != iNoArg) { args.AppendL(&aArg2); } if (&aArg3 != iNoArg) { args.AppendL(&aArg3); } if (&aArg4 != iNoArg) { args.AppendL(&aArg4); } TInt consumed = 0; TInt vector[KVecSize]; TBool r = DoMatchImpl(aText, EAnchorBoth, consumed, args, vector, KVecSize); CleanupStack::PopAndDestroy(&args); return r; }/** * Checks if the regular expression (RE) matches any substring of the text. * @param aText the text to match against the regular expression. * @return ETrue if the RE matches any substring of the supplied text, * EFalse otherwise. * @see FullMatchL() */EXPORT_C TBool CRegEx::PartialMatchL(const TDesC8& aText) const { return PartialMatchL(aText, *iNoArg, *iNoArg, *iNoArg, *iNoArg); }/** * Checks if the regular expression (RE) matches any substring of the text. * @param aText the text to match against the regular expression. * @param aArg1 contains the first extracted subpattern. * @return ETrue if the RE matches any substring of the supplied text, * EFalse otherwise. * @see FullMatchL() */EXPORT_C TBool CRegEx::PartialMatchL(const TDesC8& aText, const TRegExArg& aArg1) const { return PartialMatchL(aText, aArg1, *iNoArg, *iNoArg, *iNoArg); }/** * Checks if the regular expression (RE) matches any substring of the text. * @param aText the text to match against the regular expression. * @param aArg1 contains the first extracted subpattern. * @param aArg2 contains the second extracted subpattern. * @return ETrue if the RE matches any substring of the supplied text, * EFalse otherwise. * @see FullMatchL() */EXPORT_C TBool CRegEx::PartialMatchL(const TDesC8& aText, const TRegExArg& aArg1, const TRegExArg& aArg2) const { return PartialMatchL(aText, aArg1, aArg2, *iNoArg, *iNoArg); }/** * Checks if the regular expression (RE) matches any substring of the text. * @param aText the text to match against the regular expression. * @param aArg1 contains the first extracted subpattern. * @param aArg2 contains the second extracted subpattern. * @param aArg3 contains the third extracted subpattern. * @return ETrue if the RE matches any substring of the supplied text, * EFalse otherwise. * @see FullMatchL() */EXPORT_C TBool CRegEx::PartialMatchL(const TDesC8& aText, const TRegExArg& aArg1, const TRegExArg& aArg2, const TRegExArg& aArg3) const { return PartialMatchL(aText, aArg1, aArg2, aArg3, *iNoArg); }/** * Checks if the regular expression (RE) matches any substring of the text. * @param aText the text to match against the regular expression. * @param aArg1 contains the first extracted subpattern. * @param aArg2 contains the second extracted subpattern. * @param aArg3 contains the third extracted subpattern. * @param aArg4 contains the fourth extracted subpattern. * @return ETrue if the RE matches any substring of the supplied text, * EFalse otherwise. * @see FullMatchL() */EXPORT_C TBool CRegEx::PartialMatchL(const TDesC8& aText, const TRegExArg& aArg1, const TRegExArg& aArg2, const TRegExArg& aArg3, const TRegExArg& aArg4) const { RPointerArray<const TRegExArg> args; CleanupClosePushL(args); args.ReserveL(KMaxArgs); if (&aArg1 != iNoArg) { args.AppendL(&aArg1); } if (&aArg2 != iNoArg) { args.AppendL(&aArg2); } if (&aArg3 != iNoArg) { args.AppendL(&aArg3); } if (&aArg4 != iNoArg) { args.AppendL(&aArg4); } TInt consumed = 0; TInt vector[KVecSize]; TBool r = DoMatchImpl(aText, EUnanchored, consumed, args, vector, KVecSize); CleanupStack::PopAndDestroy(&args); return r; }/** * General function to perform a regular expression (RE) match on a substring * of the text. * @param aText the text to match against the regular expression. * @param aAnchor the type of match to perform * @return ETrue if the RE matches a substring of the supplied text, * EFalse otherwise. */EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText, TAnchor aAnchor, TInt& aConsumed) const { return DoMatchL(aText, aAnchor, aConsumed, *iNoArg, *iNoArg, *iNoArg, *iNoArg); }/** * General function to perform a regular expression (RE) match on a substring * of the text. * @param aText the text to match against the regular expression. * @param aAnchor the type of match to perform * @param aArg1 contains the first extracted subpattern. * @return ETrue if the RE matches a substring of the supplied text, * EFalse otherwise. */EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText, TAnchor aAnchor, TInt& aConsumed, const TRegExArg& aArg1) const { return DoMatchL(aText, aAnchor, aConsumed, aArg1, *iNoArg, *iNoArg, *iNoArg); }/** * General function to perform a regular expression (RE) match on a substring * of the text. * @param aText the text to match against the regular expression. * @param aAnchor the type of match to perform * @param aArg1 contains the first extracted subpattern. * @param aArg2 contains the second extracted subpattern. * @return ETrue if the RE matches a substring of the supplied text, * EFalse otherwise. */EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText, TAnchor aAnchor, TInt& aConsumed, const TRegExArg& aArg1, const TRegExArg& aArg2) const { return DoMatchL(aText, aAnchor, aConsumed, aArg1, aArg2, *iNoArg, *iNoArg); }/** * General function to perform a regular expression (RE) match on a substring * of the text. * @param aText the text to match against the regular expression. * @param aAnchor the type of match to perform * @param aArg1 contains the first extracted subpattern. * @param aArg2 contains the second extracted subpattern. * @param aArg3 contains the third extracted subpattern. * @return ETrue if the RE matches a substring of the supplied text, * EFalse otherwise. */EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText, TAnchor aAnchor, TInt& aConsumed, const TRegExArg& aArg1, const TRegExArg& aArg2, const TRegExArg& aArg3) const { return DoMatchL(aText, aAnchor, aConsumed, aArg1, aArg2, aArg3, *iNoArg); }/** * General function to perform a regular expression (RE) match on a substring * of the text. * @param aText the text to match against the regular expression. * @param aAnchor the type of match to perform * @param aArg1 contains the first extracted subpattern. * @param aArg2 contains the second extracted subpattern. * @param aArg3 contains the third extracted subpattern. * @param aArg4 contains the fourth extracted subpattern. * @return ETrue if the RE matches a substring of the supplied text, * EFalse otherwise. */EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText, TAnchor aAnchor, TInt& aConsumed, const TRegExArg& aArg1, const TRegExArg& aArg2, const TRegExArg& aArg3, const TRegExArg& aArg4) const { RPointerArray<const TRegExArg> args; CleanupClosePushL(args); args.ReserveL(KMaxArgs); if (&aArg1 != iNoArg) { args.AppendL(&aArg1); } if (&aArg2 != iNoArg) { args.AppendL(&aArg2); } if (&aArg3 != iNoArg) { args.AppendL(&aArg3); } if (&aArg4 != iNoArg) { args.AppendL(&aArg4); } TInt r = DoMatchL(aText, aAnchor, aConsumed, args); CleanupStack::PopAndDestroy(&args); return r; }/** * General function to perform a regular expression (RE) match on a substring * of the text. * @param aText the text to match against the regular expression. * @param aAnchor the type of match to perform * @param array of arguments that will contain the extracted subpatterns. * @return ETrue if the RE matches a substring of the supplied text, * EFalse otherwise. */EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText, TAnchor aAnchor, TInt& aConsumed, const RPointerArray<const TRegExArg>& aArgs) const { const TInt vectorSize = (1 + aArgs.Count()) * KPcreWorkspaceMultiplier; TInt* vector = new( ELeave ) TInt[ vectorSize ]; CleanupArrayDeletePushL( vector ); TBool r = DoMatchImpl(aText, aAnchor, aConsumed, aArgs, vector, vectorSize); CleanupStack::PopAndDestroy(vector); return r; }/** * Allows text to be scanned incrementally. Call this function repeatidly to * match regular expressions at the front of a string and skip over them as * they match. * @param aText the text to match against the regular expression. * @return ETrue if the RE matched and a substring was consumed, * EFalse otherwise. * @see FindAndConsumeL() */EXPORT_C TBool CRegEx::ConsumeL(TDes8& aText) const { return ConsumeL(aText, *iNoArg, *iNoArg, *iNoArg, *iNoArg); }/** * Allows text to be scanned incrementally. Repeatidly call this function to * match regular expressions at the front of a string and skip over them as * they match. * @param aText the text to match against the regular expression. * @param aArg1 contains the first extracted subpattern. * @return ETrue if the RE matched and a substring was consumed, * EFalse otherwise. * @see FindAndConsumeL() */EXPORT_C TBool CRegEx::ConsumeL(TDes8& aText, const TRegExArg& aArg1) const { return ConsumeL(aText, aArg1, *iNoArg, *iNoArg, *iNoArg); }/** * Allows text to be scanned incrementally. Repeatidly call this function to * match regular expressions at the front of a string and skip over them as * they match. * @param aText the text to match against the regular expression. * @param aArg1 contains the first extracted subpattern. * @param aArg2 contains the second extracted subpattern. * @return ETrue if the RE matched and a substring was consumed, * EFalse otherwise. * @see FindAndConsumeL() */EXPORT_C TBool CRegEx::ConsumeL(TDes8& aText, const TRegExArg& aArg1, const TRegExArg& aArg2) const { return ConsumeL(aText, aArg1, aArg2, *iNoArg, *iNoArg); }/** * Allows text to be scanned incrementally. Repeatidly call this function to * match regular expressions at the front of a string and skip over them as * they match. * @param aText the text to match against the regular expression. * @param aArg1 contains the first extracted subpattern. * @param aArg2 contains the second extracted subpattern. * @param aArg3 contains the third extracted subpattern. * @return ETrue if the RE matched and a substring was consumed, * EFalse otherwise. * @see FindAndConsumeL() */EXPORT_C TBool CRegEx::ConsumeL(TDes8& aText, const TRegExArg& aArg1, const TRegExArg& aArg2, const TRegExArg& aArg3) const { return ConsumeL(aText, aArg1, aArg2, aArg3, *iNoArg); }/** * Allows text to be scanned incrementally. Repeatidly call this function to * match regular expressions at the front of a string and skip over them as * they match. * @param aText the text to match against the regular expression. * @param aArg1 contains the first extracted subpattern. * @param aArg2 contains the second extracted subpattern. * @param aArg3 contains the third extracted subpattern. * @param aArg4 contains the fourth extracted subpattern. * @return ETrue if the RE matched and a substring was consumed, * EFalse otherwise. * @see FindAndConsumeL() */EXPORT_C TBool CRegEx::ConsumeL(TDes8& aText, const TRegExArg& aArg1, const TRegExArg& aArg2, const TRegExArg& aArg3, const TRegExArg& aArg4) const { RPointerArray<const TRegExArg> args; CleanupClosePushL(args); args.ReserveL(KMaxArgs); if (&aArg1 != iNoArg) { args.AppendL(&aArg1); } if (&aArg2 != iNoArg) { args.AppendL(&aArg2); } if (&aArg3 != iNoArg) { args.AppendL(&aArg3); } if (&aArg4 != iNoArg) { args.AppendL(&aArg4); } TInt consumed = 0; TInt vector[KVecSize]; TBool r = DoMatchImpl(aText, EAnchorStart, consumed, args, vector, KVecSize); if (r) { // Remove prefix aText.Delete(0, consumed); } CleanupStack::PopAndDestroy(&args); return r; }/** * Allows text to be scanned incrementally. Repeatidly call this function to * match regular expression in a string and extract them as they match. * Unlike ConsumeL, your match is not anchored to the start of the string. * @param aText the text to match against the regular expression. * @return ETrue if the RE matched and a substring was consumed, * EFalse otherwise. * @see ConsumeL() */EXPORT_C TBool CRegEx::FindAndConsumeL(TDes8& aText) const { return FindAndConsumeL(aText, *iNoArg, *iNoArg, *iNoArg, *iNoArg); }/** * Allows text to be scanned incrementally. Repeatidly call this function to * match regular expression in a string and extract them as they match. * Unlike ConsumeL, your match is not anchored to the start of the string. * @param aText the text to match against the regular expression. * @param aArg1 contains the first extracted subpattern. * @return ETrue if the RE matched and a substring was consumed, * EFalse otherwise. * @see ConsumeL() */EXPORT_C TBool CRegEx::FindAndConsumeL(TDes8& aText, const TRegExArg& aArg1) const { return FindAndConsumeL(aText, aArg1, *iNoArg, *iNoArg, *iNoArg); }/** * Allows text to be scanned incrementally. Repeatidly call this function to * match regular expression in a string and extract them as they match. * Unlike ConsumeL, your match is not anchored to the start of the string. * @param aText the text to match against the regular expression. * @param aArg1 contains the first extracted subpattern. * @param aArg2 contains the second extracted subpattern. * @return ETrue if the RE matched and a substring was consumed, * EFalse otherwise. * @see ConsumeL() */EXPORT_C TBool CRegEx::FindAndConsumeL(TDes8& aText, const TRegExArg& aArg1, const TRegExArg& aArg2) const { return FindAndConsumeL(aText, aArg1, aArg2, *iNoArg, *iNoArg); }/** * Allows text to be scanned incrementally. Repeatidly call this function to * match regular expression in a string and extract them as they match. * Unlike ConsumeL, your match is not anchored to the start of the string. * @param aText the text to match against the regular expression. * @param aArg1 contains the first extracted subpattern. * @param aArg2 contains the second extracted subpattern. * @param aArg3 contains the third extracted subpattern. * @return ETrue if the RE matched and a substring was consumed, * EFalse otherwise. * @see ConsumeL() */EXPORT_C TBool CRegEx::FindAndConsumeL(TDes8& aText, const TRegExArg& aArg1, const TRegExArg& aArg2, const TRegExArg& aArg3) const { return FindAndConsumeL(aText, aArg1, aArg2, aArg3, *iNoArg); }/** * Allows text to be scanned incrementally. Repeatidly call this function to * match regular expression in a string and extract them as they match. * Unlike ConsumeL, your match is not anchored to the start of the string. * @param aText the text to match against the regular expression. * @param aArg1 contains the first extracted subpattern. * @param aArg2 contains the second extracted subpattern. * @param aArg3 contains the third extracted subpattern. * @param aArg4 contains the fourth extracted subpattern. * @return ETrue if the RE matched and a substring was consumed, * EFalse otherwise. * @see ConsumeL() */EXPORT_C TBool CRegEx::FindAndConsumeL(TDes8& aText, const TRegExArg& aArg1, const TRegExArg& aArg2, const TRegExArg& aArg3, const TRegExArg& aArg4) const { RPointerArray<const TRegExArg> args; CleanupClosePushL(args); args.ReserveL(KMaxArgs); if (&aArg1 != iNoArg) { args.AppendL(&aArg1); } if (&aArg2 != iNoArg) { args.AppendL(&aArg2); } if (&aArg3 != iNoArg) { args.AppendL(&aArg3); } if (&aArg4 != iNoArg) { args.AppendL(&aArg4); } TInt consumed = 0; TInt vector[KVecSize]; TBool r = DoMatchImpl(aText, EUnanchored, consumed, args, vector, KVecSize); CleanupStack::PopAndDestroy(&args); if (r) { // Remove prefix aText.Delete(0, consumed); } return r; }/** * The first match of the regular expression in aString is * replaced by aRewrite. * * Within aRewrite, backslash-escaped digits (\1 to \9) can be * used to insert text matching a corresponding parenthesized group from * the pattern. \0 in aRewrite refers to the entire matching text. * @param aRewrite the text to replace the matching substring with. * @param aText the text to match against the regular expression. * @return ETrue if match occurred and replace was succsessful, * EFalse otherwise. * @see GlobalReplaceL() */EXPORT_C TBool CRegEx::ReplaceL(const TDesC8& aRewrite, TDes8& aString) const { TInt r = EFalse; TInt vector[KVecSize]; TInt matches = TryMatch(aString, 0, EUnanchored, vector, KVecSize); if (matches == 0) { return r; } HBufC8* s = HBufC8::NewLC(aString.MaxLength()); TPtr8 pS = s->Des(); if (!Rewrite(pS, aRewrite, aString, vector, KVecSize, matches)) { CleanupStack::PopAndDestroy(s); return r; } __ASSERT_DEBUG(vector[0] >= 0, Panic(EInvalidMatchResults)); __ASSERT_DEBUG(vector[1] >= 0, Panic(EInvalidMatchResults)); TInt replacementLength = vector[1] - vector[0]; if((aString.Length() + s->Length() - replacementLength) <= aString.MaxLength()) { aString.Replace(vector[0], replacementLength, *s); } else { CleanupStack::PopAndDestroy(s); iErrorCode = KErrRegExOutputTooBig; return r; } CleanupStack::PopAndDestroy(s); r = ETrue; return r; }/** * All matches of the regular expression in aString are * replaced by aRewrite. * * Within the rewrite string, backslash-escaped digits (\1 to \9) can be * used to insert text matching a corresponding parenthesized group from * the pattern. \0 in "aRewrite" refers to the entire matching text. * @param aRewrite the text to replace the matching substring with. * @param aText the text to match against the regular expression. * @return ETrue if matches occurred and replace was succsessful, * EFalse otherwise. * @see ReplaceL() */EXPORT_C TInt CRegEx::GlobalReplaceL(const TDesC8& aRewrite, TDes8& aString) const { TInt count = 0; TInt vector[KVecSize]; HBufC8* out = HBufC8::NewLC(aString.MaxLength()); TPtr8 pOut = out->Des(); TInt start = 0; TInt lastend = -1; while (start <= aString.Length()) { TInt matches = TryMatch(aString, start, EUnanchored, vector, KVecSize); if (matches <= 0) { break; } TInt matchstart = vector[0]; TInt matchend = vector[1]; __ASSERT_DEBUG(matchstart >= start, EInvalidMatchResults); __ASSERT_DEBUG(matchend >= matchstart, EInvalidMatchResults); if (matchstart == matchend && matchstart == lastend) { // advance one character if we matched an empty string at the same // place as the last match occurred matchend = start + 1; // If the current char is CR and we're in CRLF mode, skip LF too. // Note it's better to call pcre_fullinfo() than to examine // all_options(), since options_ could have changed bewteen // compile-time and now, but this is simpler and safe enough. // Modified by PH to add ANY and ANYCRLF. if ((start + 1 < aString.Length()) && aString[start] == '\r' && aString[start+1] == '\n' && (NewlineMode(iOptions.AllOptions()) == PCRE_NEWLINE_CRLF || NewlineMode(iOptions.AllOptions()) == PCRE_NEWLINE_ANY || NewlineMode(iOptions.AllOptions()) == PCRE_NEWLINE_ANYCRLF)) { matchend++; } // We also need to advance more than one char if we're in utf8 mode. #ifdef SUPPORT_UTF8 if (iOptions.Utf8()) { while ((matchend < aString.Length()) && (aString[matchend] & 0xc0) == 0x80) { matchend++; } } #endif if (matchend <= aString.Length()) { if(pOut.Length() + (matchend - start) <= pOut.MaxLength()) { pOut.Append(aString.Mid(start, matchend - start)); } else { iErrorCode = KErrRegExOutputTooBig; return KErrRegExOutputTooBig; } } start = matchend; } else { if(pOut.Length() + (matchstart - start) <= pOut.MaxLength()) { pOut.Append(aString.Mid(start, matchstart - start)); } else { iErrorCode = KErrRegExOutputTooBig; return KErrRegExOutputTooBig; } Rewrite(pOut, aRewrite, aString, vector, KVecSize, matches); start = matchend; lastend = matchend; count++; } } if (count == 0) { return count; } if (start < aString.Length()) { if((aString.Length() - start) + pOut.Length() <= pOut.MaxLength()) { pOut.Append(aString.Mid(start, aString.Length() - start)); } else { iErrorCode = KErrRegExOutputTooBig; return KErrRegExOutputTooBig; } } aString.Swap(pOut); CleanupStack::PopAndDestroy(out); return count; }/** * The first match of the regular expression in the supplied string is * replaced by another supplied string and copied into aOut with substitutions. * The non-matching portions of aString are ignored. * * Within the rewrite string, backslash-escaped digits (\1 to \9) can be * used to insert text matching a corresponding parenthesized group from * the pattern. \0 in "aRewrite" refers to the entire matching text. * @param aRewrite the text to replace the matching substring with. * @param aText the text to match against the regular expression. * @return ETrue if match occurred and extraction was succsessful, * EFalse otherwise. * @see ReplaceL() */EXPORT_C TBool CRegEx::ExtractL(const TDesC8& aRewrite, const TDesC8& aText, TDes8& aOut) const { TInt vector[KVecSize]; TInt matches = TryMatch(aText, 0, EUnanchored, vector, KVecSize); if (matches == 0) { iErrorCode = KErrRegExZeroMatches; return EFalse; } aOut.Zero(); TBool r = Rewrite(aOut, aRewrite, aText, vector, KVecSize, matches); return r; }/** * Returns EPcreNewlineAnyCrLf, EPcreNewlineAny, EPcreNewlineCrLf, * EPcreNewlineLf or EPcreNewlineCr * Note that EPcreNewlineCrLf is defined to be EPcreNewlineCr | EPcreNewlineLf. * @param aOptions * @return */EXPORT_C TInt CRegEx::NewlineMode(TInt aOptions) { // TODO: if we can make it threadsafe, cache this var TInt newlineMode = 0; /* if (newlineMode) return newlineMode; */ // do this once it's cached if (aOptions & (EPcreNewlineCrLf|EPcreNewlineCr|EPcreNewlineLf| EPcreNewlineAny|EPcreNewlineAnyCrLf)) { newlineMode = (aOptions & (EPcreNewlineCrLf|EPcreNewlineCr|EPcreNewlineLf| EPcreNewlineAny|EPcreNewlineAnyCrLf)); } else { TInt newline; pcre_config(PCRE_CONFIG_NEWLINE, &newline); switch(newline) { case KNewLineAnyCrLf: { newlineMode = EPcreNewlineAnyCrLf; break; } case KNewLineAny: { newlineMode = EPcreNewlineAny; break; } case KNewLineLf: { newlineMode = EPcreNewlineLf; break; } case KNewLineCr: { newlineMode = EPcreNewlineCr; break; } case KNewLineCrLf: { newlineMode = EPcreNewlineCrLf; break; } default: { __ASSERT_DEBUG(EFalse, EUnexpectedRetValFromPcre); } } } return newlineMode; }/** * Escapes all potentially meaningful regular expression (RE) characters in * aUnquoted. The returned string, used as a regular expression, * will exactly match the original string. For example, * 1.5-2.0? * may become: * 1\.5\-2\.0\? * Note QuoteMeta behaves the same as perl's QuoteMeta function, * *except* that it escapes the NUL character (\0) as backslash + 0, * rather than backslash + NUL. * @param aUnquoted unescaped string. * @return string with all meaningful RE characters escaped. */EXPORT_C HBufC8* CRegEx::QuoteMetaL(const TDesC8& aUnquoted) { HBufC8* result = HBufC8::NewLC(aUnquoted.Length() + KReserveForEscapeChars); TPtr8 pResult = result->Des(); _LIT8(KEscapeNull, "\\0"); _LIT8(KEscapeOther, "\\%c"); // Escape any ascii character not in [A-Za-z_0-9]. // // Note that it's legal to escape a character even if it has no // special meaning in a regular expression -- so this function does // that. (This also makes it identical to the perl function of the // same name; see `perldoc -f quotemeta`.) The one exception is // escaping NUL: rather than doing backslash + NUL, like perl does, // we do '\0', because pcre itself doesn't take embedded NUL chars. for (TInt ii = 0; ii < aUnquoted.Length(); ++ii) { // Always make sure we have enough room to escape at least one character if(pResult.MaxLength() <= pResult.Length() + KEscapeTripletLength) { result = result->ReAllocL(pResult.Length() + KReserveForEscapeChars); CleanupStack::Pop(); CleanupStack::PushL(result); pResult.Set(result->Des()); } if (aUnquoted[ii] == TChar('\0')) { pResult.Append(KEscapeNull()); } else if ((aUnquoted[ii] < TChar('a') || aUnquoted[ii] > TChar('z')) && (aUnquoted[ii] < TChar('A') || aUnquoted[ii] > TChar('Z')) && (aUnquoted[ii] < TChar('0') || aUnquoted[ii] > TChar('9')) && aUnquoted[ii] != TChar('_') && // If this is the part of a UTF8 or Latin1 character, we need // to copy this byte without escaping. Experimentally this is // what works correctly with the regexp library. !(aUnquoted[ii] & TChar(0x80))) { pResult.AppendFormat(KEscapeOther, aUnquoted[ii]); } else { pResult.Append(aUnquoted[ii]); } } CleanupStack::Pop(result); return result;}/** * Returns the number of capturing subpatterns, or -1 if the * regular expressions wasn't valid on construction. * @return the number of capturing subpatterns or or -1 if the regular * expressions wasn't valid on construction. */EXPORT_C TInt CRegEx::NumberOfCapturingGroups() const { if (iRePartial == NULL) return KErrNotFound; TInt result; TInt pcreRetVal = pcre_fullinfo(iRePartial, // The regular expression object iExtraPartial, // Study data PCRE_INFO_CAPTURECOUNT, &result); __ASSERT_DEBUG(pcreRetVal == 0, Panic(EUnexpectedRetValFromPcre)); return result; }/** * Analyzes a regular expression (RE) pattern further. This is especially useful * if an RE is going to be used several times to reduce the time taken for * matching. * * "At present, studying a pattern is useful only for non-anchored patterns that * do not have a single fixed starting character." */EXPORT_C void CRegEx::Study() { // "At present, studying a pattern is useful only for non-anchored // patterns that do not have a single fixed starting character." if(iRePartial) { const char* compileError; // ignored iExtraPartial = pcre_study( iRePartial, /* result of pcre_compile() */ 0, /* no options exist */ &compileError); /* set to NULL or points to a message */ } }// Private Functions ///** * Standard constructor */CRegEx::CRegEx() { }/** * Standard constructor * @param aOptions options used when compiling regular expression. */CRegEx::CRegEx(const TRegExOptions& aOptions) : iOptions(aOptions) { }/** * Second phase constructor. */void CRegEx::ConstructL(const TDesC8& aPattern) { iPattern = HBufC8::NewL(aPattern.Length() + 1); // Leave room for \0 TPtr8 pPattern = iPattern->Des(); pPattern.Copy(aPattern); pPattern.ZeroTerminate(); CommonConstructL(); }void CRegEx::CommonConstructL() { // The default value for an argument, to indicate no arg was passed in iNoArg = new(ELeave) TRegExArg((TAny*)NULL); // Compile patterns used for partial and full matches. iReFull = NULL; iRePartial = NULL; iRePartial = CompileL(EUnanchored); if(iRePartial) { iReFull = CompileL(EAnchorBoth); } User::LeaveIfError(iErrorCode); } void CRegEx::ConstructL(const TDesC16& aPattern) { LtkUtils::RLtkBuf8 narrowBuf; narrowBuf.CreateLC(aPattern.Length() + 1); if (iOptions.Utf8()) { narrowBuf.CopyAsUtf8L(aPattern); } else { narrowBuf.Copy(aPattern); } narrowBuf.AppendL(0); iPattern = narrowBuf.ToHBuf(); CleanupStack::Pop(&narrowBuf); CommonConstructL(); }/** * Compile the regular expression (RE) pattern. * @param aAnchor anchoring to use for the RE pattern. * @return pointer to PCRE object with compiled RE data. */pcre* CRegEx::CompileL(TAnchor aAnchor) { // First, convert TRegExOptions into pcre options TInt pcreOptions = iOptions.AllOptions(); // Special treatment for anchoring. This is needed because at // runtime pcre only provides an option for anchoring at the // beginning of a string (unless you use offset). // // There are three types of anchoring we want: // EUnanchored Compile the original pattern, and use // a pcre unanchored match. // EAnchorStart Compile the original pattern, and use // a pcre anchored match. // EAnchorBoth Tack a "\z" to the end of the original pattern // and use a pcre anchored match. pcre* re; TInt errCode = 0; TInt errOffset = 0; const char* compileError; // ignored if (aAnchor != EAnchorBoth) { re = pcre_compile2((const char *)iPattern->Ptr(), pcreOptions, &errCode,&compileError, &errOffset, NULL); } else { // Tack a '\z' at the end of RE. Parenthesize it first so that // the '\z' applies to all top-level alternatives in the regexp. _LIT8(KWrapped, "(?:%S)\\z\x0"); HBufC8* wrapped = HBufC8::NewLC(KWrapped().Length() + iPattern->Length()); TPtr8 pWrapped = wrapped->Des(); pWrapped.Format(KWrapped(), iPattern); re = pcre_compile2((const char *)pWrapped.Ptr(), pcreOptions, &errCode, &compileError, &errOffset, NULL); CleanupStack::PopAndDestroy(wrapped); } if (!re && (iErrorCode == KErrNone)) { iErrorCode = KErrRegExCompileBase - errCode; iErrorOffset = errOffset; } return re; }/** * Cleanup the compiled regular expression and study data. * Separated out from destructor in case support for recompiling * is introduced. */void CRegEx::Cleanup() { if (iReFull) { (*pcre_free)(iReFull); } if (iRePartial) { (*pcre_free)(iRePartial); } if(iExtraPartial) { (*pcre_free)(iExtraPartial); } }// Internal matching and rewrite implementations ///** * Match against aText, filling in aVector (up to aVector.Count() * 2/3) with * pairs of integers for the beginning and end positions of matched * text. The first pair corresponds to the entire matched text; * subsequent pairs correspond, in order, to parentheses-captured * matches. Returns the number of pairs (one more than the number of * the last subpattern with a match) if matching was successful * and zero if the match failed. * I.e. for RE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching * against "foo", "bar", and "baz" respectively. * When matching RE("(foo)|hello") against "hello", it will return 1. * But the values for all subpattern are filled in into "aVector". * @param aText the text to match against the regular expression. * @param aStartPos position in aText to start matching from * @param aAnchor the type of match to perform. * @param aVector vector that stores pairs of integers for the start and end * positions of matched substrings. * @param aVectorSize length of aVector * @return the number of matched subpatterns. */TInt CRegEx::TryMatch(const TDesC8& aText, TInt aStartPos, TAnchor aAnchor, TInt* aVector, TInt aVectorSize) const { pcre* re = (aAnchor == EAnchorBoth) ? iReFull : iRePartial; if (!re) { // Matching against invalid re return 0; } pcre_extra extra = { 0, 0, 0, 0, 0, 0 }; if(iExtraPartial) { extra = *iExtraPartial; } if (iOptions.MatchLimit() > 0) { extra.flags |= PCRE_EXTRA_MATCH_LIMIT; extra.match_limit = iOptions.MatchLimit(); } if (iOptions.MatchLimitRecursion() > 0) { extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; extra.match_limit_recursion = iOptions.MatchLimitRecursion(); } TInt rc = pcre_exec(re, // The regular expression object &extra, (const char *)aText.Ptr(), aText.Length(), aStartPos, (aAnchor == EUnanchored) ? 0 : PCRE_ANCHORED, aVector, aVectorSize); // Handle errors if (rc == PCRE_ERROR_NOMATCH) { return 0; } else if (rc < 0) { // Unexpected return code return 0; } else if (rc == 0) { // pcre_exec() returns 0 as a special case when the number of // capturing subpatterns exceeds the size of the vector. // When this happens, there is a match and the output vector // is filled, but we miss out on the positions of the extra subpatterns. rc = aVectorSize / 2; } return rc; }/** * Internal implementation of rewrite functionality used by the replace & * extract functions. * Appends the aRewrite string, with backslash subsitutions from aText * and aVector, to string aOut. * @param aOut output descriptor * @param aRewrite descriptor backslash subsitutions to append to aOut. * @param aText descriptor containing substitutions. * @param aVector vector that stores pairs of integers for the start and end * positions of matched substrings. * @param aVectorSize length of aVector. * @param aMatches number of matches. * @return ETrue if the operation was successfull, EFalse otherwise. * @see ReplaceL() * @see GlobalReplaceL() * @see ExtractL() */TBool CRegEx::Rewrite(TDes8& aOut, const TDesC8& aRewrite, const TDesC8& aText, TInt* aVector, TInt aVectorSize, TInt aMatches) const { for(TInt i = 0; i < aRewrite.Length(); i++) { TChar c = aRewrite[i]; if (c == '\\') { c = aRewrite[++i]; if (c.IsDigit()) { TUint n = c - TChar('0'); if (n >= aMatches) { iErrorCode = KErrRegExBadBackslashSubsitution; return EFalse; } __ASSERT_DEBUG(aVectorSize >= 2 * n + 1, Panic(EVectorTooSmall)); TInt start = aVector[2 * n]; if (start >= 0) { TInt requiredLength = aVector[2 * n + 1] - start; if((aOut.Length() + requiredLength) <= aOut.MaxLength()) { aOut.Append(aText.Mid(start, requiredLength)); } else { iErrorCode = KErrRegExOutputTooBig; return EFalse; } } } else if (c == '\\') { if((aOut.Length() + 1) <= aOut.MaxLength()) { aOut.Append(c); } else { iErrorCode = KErrRegExOutputTooBig; return EFalse; } } else { // Invalid rewrite pattern iErrorCode = KErrRegExInvalidRewritePattern; return EFalse; } } else { if((aOut.Length() + 1) <= aOut.MaxLength()) { aOut.Append(c); } else { iErrorCode = KErrRegExOutputTooBig; return EFalse; } } } return ETrue; }/** * Internal implementation of the match functionality. * @param aText the text to match against the regular expression. * @param aAnchor the type of match to perform. * @param aConsumed the length of the matched substring. * @param aArgs array of arguments that will contain the extracted subpatterns. * @param aVector output vector that stores pairs of integers for the start and * end positions of matched substrings. * @param aVectorSize length of aVector * @return */TBool CRegEx::DoMatchImpl(const TDesC8& aText, TAnchor aAnchor, TInt& aConsumed, const RPointerArray<const TRegExArg>& aArgs, TInt* aVector, TInt aVectorSize) const { // results + PCRE workspace __ASSERT_DEBUG((1 + aArgs.Count()) * KPcreWorkspaceMultiplier <= aVectorSize,Panic(EVectorTooSmall) ); TInt matches = TryMatch(aText, 0, aAnchor, aVector, aVectorSize); // TryMatch never returns negatives __ASSERT_DEBUG(matches >= 0, Panic(EInvalidMatchResults)); if (matches == 0) { iErrorCode = KErrRegExZeroMatches; return EFalse; } aConsumed = aVector[1]; if (aArgs.Count() == 0) { // We are not interested in results return ETrue; } if (NumberOfCapturingGroups() < aArgs.Count()) { // RE has fewer capturing groups than number of arg pointers passed in iErrorCode = KErrRegExFewerCaptureGroupsThanArgs; return EFalse; } // If we got here, we must have matched the whole pattern. // We do not need (can not do) any more checks on the value of 'matches' // here -- see the comment for TryMatch. for (TInt i = 0; i < aArgs.Count(); i++) { const TInt start = aVector[2*(i+1)]; const TInt limit = aVector[2*(i+1)+1]; TBool r; if(start == -1 || limit == -1) { r = aArgs[i]->Parse(KNullDesC8()); } else { r = aArgs[i]->Parse(aText.Mid(start, limit - start)); } if(!r) { iErrorCode = KErrRegExFailedToParseArg; return EFalse; } } return ETrue; }/** * Panic the current thread. * @param aPanic panic code. */void CRegEx::Panic(TRegExPanic aPanic) { User::Panic(KRegExPanic(), aPanic); }