libraries/spcre/src/cregex.cpp
changeset 0 7f656887cf89
equal deleted inserted replaced
-1:000000000000 0:7f656887cf89
       
     1 // Copyright (c) 2005 - 2006, Google Inc.
       
     2 // All rights reserved.
       
     3 //
       
     4 // Redistribution and use in source and binary forms, with or without
       
     5 // modification, are permitted provided that the following conditions are
       
     6 // met:
       
     7 //
       
     8 //     * Redistributions of source code must retain the above copyright
       
     9 // notice, this list of conditions and the following disclaimer.
       
    10 //     * Redistributions in binary form must reproduce the above
       
    11 // copyright notice, this list of conditions and the following disclaimer
       
    12 // in the documentation and/or other materials provided with the
       
    13 // distribution.
       
    14 //     * Neither the name of Google Inc. nor the names of its
       
    15 // contributors may be used to endorse or promote products derived from
       
    16 // this software without specific prior written permission.
       
    17 //
       
    18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
       
    19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
       
    20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
       
    21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
       
    22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
       
    23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
       
    24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       
    25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       
    26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    29 //
       
    30 // Author: Sanjay Ghemawat
       
    31 //
       
    32 
       
    33 // Heavily refactored for Symbian OS by Accenture.
       
    34 
       
    35 #define PCRE_EXP_DECL IMPORT_C // We need to define this because the config.h that defines it when building libpcre isn't exported.
       
    36 
       
    37 #include <fshell/descriptorutils.h>
       
    38 #include "pcre.h"
       
    39 #include "cregex.h"
       
    40 #include "tregexarg.h"
       
    41 
       
    42 /**
       
    43  * Maximum number of optional arguments supported by the matching interface.
       
    44  * If more arguments are required then use the more generic DoMatchL() function.
       
    45  */
       
    46 static const TInt KMaxArgs = 4;
       
    47 
       
    48 /**
       
    49  * Multiplier used to calculate size of vector that stores results from PCRE.
       
    50  * @see KVecSize
       
    51  */
       
    52 static const TInt KPcreWorkspaceMultiplier = 3;
       
    53 
       
    54 /**
       
    55 * "The first two-thirds of the vector is used to pass back captured subpatterns,
       
    56 * each subpattern using a pair of integers. The remaining third of the vector
       
    57 * is used as workspace by pcre_exec() while matching capturing subpatterns,
       
    58 * and is not available for passing back information. The number passed in 
       
    59 * vecsize should always be a multiple of three. If it is not, it is rounded
       
    60 * down."
       
    61 */ 
       
    62 static const TInt KVecSize = (1 + KMaxArgs) * KPcreWorkspaceMultiplier;
       
    63 
       
    64 /**
       
    65  * Number of characters in a triplet escape sequence.
       
    66  */
       
    67 static const TInt KEscapeTripletLength = 3;
       
    68 
       
    69 /**
       
    70  * Number of characters to reserve for escape sequences
       
    71  * Reserves enough room for several.
       
    72  */
       
    73 static const TInt KReserveForEscapeChars = KEscapeTripletLength * 4;
       
    74 
       
    75 
       
    76 // Public Functions //
       
    77 
       
    78 /**
       
    79  * 2-Phase constructor for CRegEx objects.
       
    80  * @param aPattern regular expression pattern
       
    81  * @return a pre-compiled regular expression object ready to perform matching.
       
    82  */
       
    83 EXPORT_C CRegEx* CRegEx::NewL(const TDesC8& aPattern)
       
    84 	{
       
    85 	CRegEx* self = NewLC(aPattern);
       
    86 	CleanupStack::Pop(self);
       
    87 	return self;
       
    88 	}
       
    89 
       
    90 /**
       
    91  * 2-Phase constructor for CRegEx objects.
       
    92  * @param aPattern regular expression pattern.
       
    93  * @param aOptions options to use when compiling regular expression.
       
    94  * @return a pre-compiled regular expression object ready to perform matching.
       
    95  */
       
    96 EXPORT_C CRegEx* CRegEx::NewL(const TDesC8& aPattern,
       
    97 	const TRegExOptions& aOptions)
       
    98 	{
       
    99 	CRegEx* self = NewLC(aPattern, aOptions);
       
   100 	CleanupStack::Pop(self);
       
   101 	return self;	
       
   102 	}
       
   103 
       
   104 /**
       
   105  * 2-Phase constructor for CRegEx objects. Object is left on the cleanup stack.
       
   106  * @param aPattern regular expression pattern.
       
   107  * @return a pre-compiled regular expression object ready to perform matching.
       
   108  */
       
   109 EXPORT_C CRegEx* CRegEx::NewLC(const TDesC8& aPattern)
       
   110 	{
       
   111 	CRegEx* self = new(ELeave) CRegEx();
       
   112 	CleanupStack::PushL(self);
       
   113 	self->ConstructL(aPattern);
       
   114 	return self;
       
   115 	}
       
   116 
       
   117 /**
       
   118  * 2-Phase constructor for CRegEx objects. Object is left on the cleanup stack.
       
   119  * @param aPattern regular expression pattern.
       
   120  * @param aOptions options to use when compiling regular expression.
       
   121  * @return a pre-compiled regular expression object ready to perform matching.
       
   122  */
       
   123 EXPORT_C CRegEx* CRegEx::NewLC(const TDesC8& aPattern,
       
   124 	const TRegExOptions& aOptions)
       
   125 	{
       
   126 	CRegEx* self = new(ELeave) CRegEx(aOptions);
       
   127 	CleanupStack::PushL(self);
       
   128 	self->ConstructL(aPattern);
       
   129 	return self;	
       
   130 	}
       
   131 
       
   132 EXPORT_C CRegEx* CRegEx::NewL(const TDesC16& aPattern, const TRegExOptions& aOptions)
       
   133 	{
       
   134 	CRegEx* self = new(ELeave) CRegEx(aOptions);
       
   135 	CleanupStack::PushL(self);
       
   136 	self->ConstructL(aPattern);
       
   137 	CleanupStack::Pop(self);
       
   138 	return self;
       
   139 	}
       
   140 
       
   141 /**
       
   142  * Standard destructor to free resources.
       
   143  */
       
   144 EXPORT_C CRegEx::~CRegEx()
       
   145 	{
       
   146 	delete iNoArg;
       
   147 	delete iPattern;
       
   148 	Cleanup();
       
   149 	}
       
   150 
       
   151 // Public matching interface //
       
   152 
       
   153 /**
       
   154  * Checks if the regular expression (RE) matches the supplied text entirely.
       
   155  * @param aText the text to match against the regular expression.
       
   156  * @return ETrue if the RE matches the supplied text entirely, EFalse otherwise.
       
   157  * @see PartialMatchL()
       
   158  */
       
   159 EXPORT_C TBool CRegEx::FullMatchL(const TDesC8& aText) const
       
   160 	{
       
   161 	return FullMatchL(aText, *iNoArg, *iNoArg, *iNoArg, *iNoArg);
       
   162 	}
       
   163 
       
   164 /**
       
   165  * Checks if the regular expression (RE) matches the supplied text entirely.
       
   166  * @param aText the text to match against the regular expression.
       
   167  * @param aArg1 contains the first extracted subpattern.
       
   168  * @return ETrue if the RE matches the supplied text entirely, EFalse otherwise.
       
   169  * @see PartialMatchL()
       
   170  */
       
   171 EXPORT_C TBool CRegEx::FullMatchL(const TDesC8& aText,
       
   172 		  const TRegExArg& aArg1) const
       
   173 	{
       
   174 	return FullMatchL(aText, aArg1, *iNoArg, *iNoArg, *iNoArg);
       
   175 	}
       
   176 
       
   177 /**
       
   178  * Checks if the regular expression (RE) matches the supplied text entirely.
       
   179  * @param aText the text to match against the regular expression.
       
   180  * @param aArg1 contains the first extracted subpattern.
       
   181  * @param aArg2 contains the second extracted subpattern.
       
   182  * @return ETrue if the RE matches the supplied text entirely, EFalse otherwise.
       
   183  * @see PartialMatchL()
       
   184  */
       
   185 EXPORT_C TBool CRegEx::FullMatchL(const TDesC8& aText,
       
   186 		  const TRegExArg& aArg1,
       
   187 		  const TRegExArg& aArg2) const
       
   188 	{
       
   189 	return FullMatchL(aText, aArg1, aArg2, *iNoArg, *iNoArg);
       
   190 	}
       
   191 
       
   192 /**
       
   193  * Checks if the regular expression (RE) matches the supplied text entirely.
       
   194  * @param aText the text to match against the regular expression.
       
   195  * @param aArg1 contains the first extracted subpattern.
       
   196  * @param aArg2 contains the second extracted subpattern.
       
   197  * @param aArg3 contains the third extracted subpattern.
       
   198  * @return ETrue if the RE matches the supplied text entirely, EFalse otherwise.
       
   199  * @see PartialMatchL()
       
   200  */
       
   201 EXPORT_C TBool CRegEx::FullMatchL(const TDesC8& aText,
       
   202 		  const TRegExArg& aArg1,
       
   203 		  const TRegExArg& aArg2,
       
   204 		  const TRegExArg& aArg3) const
       
   205 	{
       
   206 	return FullMatchL(aText, aArg1, aArg2, aArg3, *iNoArg);
       
   207 	}
       
   208 
       
   209 /**
       
   210  * Checks if the regular expression (RE) matches the supplied text entirely.
       
   211  * @param aText the text to match against the regular expression.
       
   212  * @param aArg1 contains the first extracted subpattern.
       
   213  * @param aArg2 contains the second extracted subpattern.
       
   214  * @param aArg3 contains the third extracted subpattern.
       
   215  * @param aArg4 contains the fourth extracted subpattern.
       
   216  * @return ETrue if the RE matches the supplied text entirely, EFalse otherwise.
       
   217  * @see PartialMatchL()
       
   218  */
       
   219 EXPORT_C TBool CRegEx::FullMatchL(const TDesC8& aText,
       
   220 		  const TRegExArg& aArg1,
       
   221 		  const TRegExArg& aArg2,
       
   222 		  const TRegExArg& aArg3,
       
   223 		  const TRegExArg& aArg4) const
       
   224 	{
       
   225 	RPointerArray<const TRegExArg> args;
       
   226 	CleanupClosePushL(args);
       
   227 	args.ReserveL(KMaxArgs);
       
   228 	
       
   229 	if (&aArg1 != iNoArg)
       
   230 		{
       
   231 		args.AppendL(&aArg1);
       
   232 		}
       
   233 	if (&aArg2 != iNoArg)
       
   234 		{
       
   235 		args.AppendL(&aArg2);		
       
   236 		}
       
   237 	if (&aArg3 != iNoArg)
       
   238 		{
       
   239 		args.AppendL(&aArg3);		
       
   240 		}
       
   241 	if (&aArg4 != iNoArg)
       
   242 		{
       
   243 		args.AppendL(&aArg4);		
       
   244 		}
       
   245 	
       
   246 	TInt consumed = 0;
       
   247 	TInt vector[KVecSize];
       
   248 	
       
   249 	TBool r =  DoMatchImpl(aText, EAnchorBoth, consumed, args, vector, KVecSize);
       
   250 	CleanupStack::PopAndDestroy(&args);
       
   251 	return r;
       
   252 	}
       
   253 
       
   254 /**
       
   255  * Checks if the regular expression (RE) matches any substring of the text.
       
   256  * @param aText the text to match against the regular expression.
       
   257  * @return ETrue if the RE  matches any substring of the supplied text,
       
   258  * EFalse otherwise.
       
   259  * @see FullMatchL()
       
   260  */
       
   261 EXPORT_C TBool CRegEx::PartialMatchL(const TDesC8& aText) const
       
   262 	{
       
   263 	return PartialMatchL(aText, *iNoArg, *iNoArg, *iNoArg, *iNoArg);
       
   264 	}
       
   265 
       
   266 /**
       
   267  * Checks if the regular expression (RE) matches any substring of the text.
       
   268  * @param aText the text to match against the regular expression.
       
   269  * @param aArg1 contains the first extracted subpattern.
       
   270  * @return ETrue if the RE  matches any substring of the supplied text,
       
   271  * EFalse otherwise.
       
   272  * @see FullMatchL()
       
   273  */
       
   274 EXPORT_C TBool CRegEx::PartialMatchL(const TDesC8& aText,
       
   275 		  const TRegExArg& aArg1) const
       
   276 	{
       
   277 	return PartialMatchL(aText, aArg1, *iNoArg, *iNoArg, *iNoArg);
       
   278 	}
       
   279 
       
   280 /**
       
   281  * Checks if the regular expression (RE) matches any substring of the text.
       
   282  * @param aText the text to match against the regular expression.
       
   283  * @param aArg1 contains the first extracted subpattern.
       
   284  * @param aArg2 contains the second extracted subpattern.
       
   285  * @return ETrue if the RE  matches any substring of the supplied text,
       
   286  * EFalse otherwise.
       
   287  * @see FullMatchL()
       
   288  */
       
   289 EXPORT_C TBool CRegEx::PartialMatchL(const TDesC8& aText,
       
   290 		  const TRegExArg& aArg1,
       
   291 		  const TRegExArg& aArg2) const
       
   292 	{
       
   293 	return PartialMatchL(aText, aArg1, aArg2, *iNoArg, *iNoArg);
       
   294 	}
       
   295 
       
   296 /**
       
   297  * Checks if the regular expression (RE) matches any substring of the text.
       
   298  * @param aText the text to match against the regular expression.
       
   299  * @param aArg1 contains the first extracted subpattern.
       
   300  * @param aArg2 contains the second extracted subpattern.
       
   301  * @param aArg3 contains the third extracted subpattern.
       
   302  * @return ETrue if the RE  matches any substring of the supplied text,
       
   303  * EFalse otherwise.
       
   304  * @see FullMatchL()
       
   305  */
       
   306 EXPORT_C TBool CRegEx::PartialMatchL(const TDesC8& aText,
       
   307 		  const TRegExArg& aArg1,
       
   308 		  const TRegExArg& aArg2,
       
   309 		  const TRegExArg& aArg3) const
       
   310 	{
       
   311 	return PartialMatchL(aText, aArg1, aArg2, aArg3, *iNoArg);
       
   312 	}
       
   313 
       
   314 /**
       
   315  * Checks if the regular expression (RE) matches any substring of the text.
       
   316  * @param aText the text to match against the regular expression.
       
   317  * @param aArg1 contains the first extracted subpattern.
       
   318  * @param aArg2 contains the second extracted subpattern.
       
   319  * @param aArg3 contains the third extracted subpattern.
       
   320  * @param aArg4 contains the fourth extracted subpattern. 
       
   321  * @return ETrue if the RE  matches any substring of the supplied text,
       
   322  * EFalse otherwise.
       
   323  * @see FullMatchL()
       
   324  */
       
   325 EXPORT_C TBool CRegEx::PartialMatchL(const TDesC8& aText,
       
   326 		  const TRegExArg& aArg1,
       
   327 		  const TRegExArg& aArg2,
       
   328 		  const TRegExArg& aArg3,
       
   329 		  const TRegExArg& aArg4) const
       
   330 	{
       
   331 	RPointerArray<const TRegExArg> args;
       
   332 	CleanupClosePushL(args);
       
   333 	args.ReserveL(KMaxArgs);
       
   334 
       
   335 	if (&aArg1 != iNoArg)
       
   336 		{
       
   337 		args.AppendL(&aArg1);
       
   338 		}
       
   339 	if (&aArg2 != iNoArg)
       
   340 		{
       
   341 		args.AppendL(&aArg2);		
       
   342 		}
       
   343 	if (&aArg3 != iNoArg)
       
   344 		{
       
   345 		args.AppendL(&aArg3);		
       
   346 		}
       
   347 	if (&aArg4 != iNoArg)
       
   348 		{
       
   349 		args.AppendL(&aArg4);		
       
   350 		}
       
   351 	
       
   352 	TInt consumed = 0;
       
   353 	TInt vector[KVecSize];
       
   354 	
       
   355 	TBool r =  DoMatchImpl(aText, EUnanchored, consumed, args, vector,
       
   356 					KVecSize);
       
   357 	CleanupStack::PopAndDestroy(&args);
       
   358 	return r;
       
   359 	}
       
   360 
       
   361 
       
   362 /**
       
   363  * General function to perform a regular expression (RE) match on a substring
       
   364  * of the text.
       
   365  * @param aText the text to match against the regular expression.
       
   366  * @param aAnchor the type of match to perform
       
   367  * @return ETrue if the RE  matches a substring of the supplied text,
       
   368  * EFalse otherwise.
       
   369  */
       
   370 EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText,
       
   371 		  TAnchor aAnchor,
       
   372 		  TInt&	aConsumed) const
       
   373 	{
       
   374 	return DoMatchL(aText, aAnchor, aConsumed, *iNoArg, *iNoArg, *iNoArg,
       
   375 				*iNoArg);
       
   376 	}
       
   377 
       
   378 /**
       
   379  * General function to perform a regular expression (RE) match on a substring
       
   380  * of the text.
       
   381  * @param aText the text to match against the regular expression.
       
   382  * @param aAnchor the type of match to perform
       
   383  * @param aArg1 contains the first extracted subpattern.
       
   384  * @return ETrue if the RE  matches a substring of the supplied text,
       
   385  * EFalse otherwise.
       
   386  */
       
   387 EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText,
       
   388 		  TAnchor aAnchor,
       
   389 		  TInt&	aConsumed,
       
   390 		  const TRegExArg& aArg1) const
       
   391 	{
       
   392 	return DoMatchL(aText, aAnchor, aConsumed, aArg1, *iNoArg, *iNoArg,
       
   393 				*iNoArg);
       
   394 	}
       
   395 
       
   396 /**
       
   397  * General function to perform a regular expression (RE) match on a substring
       
   398  * of the text.
       
   399  * @param aText the text to match against the regular expression.
       
   400  * @param aAnchor the type of match to perform
       
   401  * @param aArg1 contains the first extracted subpattern.
       
   402  * @param aArg2 contains the second extracted subpattern.
       
   403  * @return ETrue if the RE  matches a substring of the supplied text,
       
   404  * EFalse otherwise.
       
   405  */
       
   406 EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText,
       
   407 		  TAnchor aAnchor,
       
   408 		  TInt&	aConsumed,
       
   409 		  const TRegExArg& aArg1,
       
   410 		  const TRegExArg& aArg2) const
       
   411 	{
       
   412 	return DoMatchL(aText, aAnchor, aConsumed, aArg1, aArg2, *iNoArg, *iNoArg);
       
   413 	}
       
   414 
       
   415 /**
       
   416  * General function to perform a regular expression (RE) match on a substring
       
   417  * of the text.
       
   418  * @param aText the text to match against the regular expression.
       
   419  * @param aAnchor the type of match to perform
       
   420  * @param aArg1 contains the first extracted subpattern.
       
   421  * @param aArg2 contains the second extracted subpattern.
       
   422  * @param aArg3 contains the third extracted subpattern.
       
   423  * @return ETrue if the RE  matches a substring of the supplied text,
       
   424  * EFalse otherwise.
       
   425  */
       
   426 EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText,
       
   427 		  TAnchor aAnchor,
       
   428 		  TInt&	aConsumed,
       
   429 		  const TRegExArg& aArg1,
       
   430 		  const TRegExArg& aArg2,
       
   431 		  const TRegExArg& aArg3) const
       
   432 	{
       
   433 	return DoMatchL(aText, aAnchor, aConsumed, aArg1, aArg2, aArg3, *iNoArg);
       
   434 	}
       
   435 
       
   436 /**
       
   437  * General function to perform a regular expression (RE) match on a substring
       
   438  * of the text.
       
   439  * @param aText the text to match against the regular expression.
       
   440  * @param aAnchor the type of match to perform
       
   441  * @param aArg1 contains the first extracted subpattern.
       
   442  * @param aArg2 contains the second extracted subpattern.
       
   443  * @param aArg3 contains the third extracted subpattern.
       
   444  * @param aArg4 contains the fourth extracted subpattern. 
       
   445  * @return ETrue if the RE  matches a substring of the supplied text,
       
   446  * EFalse otherwise.
       
   447  */
       
   448 EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText,
       
   449 		  TAnchor aAnchor,
       
   450 		  TInt&	aConsumed,
       
   451 		  const TRegExArg& aArg1,
       
   452 		  const TRegExArg& aArg2,
       
   453 		  const TRegExArg& aArg3,
       
   454 		  const TRegExArg& aArg4) const
       
   455 	{
       
   456 	RPointerArray<const TRegExArg> args;
       
   457 	CleanupClosePushL(args);
       
   458 	args.ReserveL(KMaxArgs);
       
   459 	
       
   460 	if (&aArg1 != iNoArg)
       
   461 		{
       
   462 		args.AppendL(&aArg1);
       
   463 		}
       
   464 	if (&aArg2 != iNoArg)
       
   465 		{
       
   466 		args.AppendL(&aArg2);		
       
   467 		}
       
   468 	if (&aArg3 != iNoArg)
       
   469 		{
       
   470 		args.AppendL(&aArg3);		
       
   471 		}
       
   472 	if (&aArg4 != iNoArg)
       
   473 		{
       
   474 		args.AppendL(&aArg4);		
       
   475 		}	
       
   476 
       
   477 	TInt r = DoMatchL(aText, aAnchor, aConsumed, args);
       
   478 	CleanupStack::PopAndDestroy(&args);
       
   479 	return r;
       
   480 	}
       
   481 
       
   482 /**
       
   483  * General function to perform a regular expression (RE) match on a substring
       
   484  * of the text.
       
   485  * @param aText the text to match against the regular expression.
       
   486  * @param aAnchor the type of match to perform
       
   487  * @param array of arguments that will contain the extracted subpatterns.
       
   488  * @return ETrue if the RE  matches a substring of the supplied text,
       
   489  * EFalse otherwise.
       
   490  */
       
   491 EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText,
       
   492 		  TAnchor aAnchor,
       
   493 		  TInt&	aConsumed,			
       
   494 		  const RPointerArray<const TRegExArg>& aArgs) const
       
   495 	{
       
   496 	const TInt vectorSize = (1 + aArgs.Count()) * KPcreWorkspaceMultiplier;
       
   497 	TInt* vector = new( ELeave ) TInt[ vectorSize ];
       
   498 	CleanupArrayDeletePushL( vector );	
       
   499 	
       
   500 	TBool r =  DoMatchImpl(aText, aAnchor, aConsumed, aArgs, vector,
       
   501 					vectorSize);
       
   502 	CleanupStack::PopAndDestroy(vector);
       
   503 	return r;	
       
   504 	}
       
   505 
       
   506 /**
       
   507  * Allows text to be scanned incrementally. Call this function repeatidly to
       
   508  * match regular expressions at the front of a string and skip over them as
       
   509  * they match. 
       
   510  * @param aText the text to match against the regular expression.
       
   511  * @return ETrue if the RE matched and a substring was consumed,
       
   512  * EFalse otherwise.
       
   513  * @see FindAndConsumeL()
       
   514  */
       
   515 EXPORT_C TBool CRegEx::ConsumeL(TDes8& aText) const
       
   516 	{
       
   517 	return ConsumeL(aText, *iNoArg, *iNoArg, *iNoArg, *iNoArg);
       
   518 	}
       
   519 
       
   520 /**
       
   521  * Allows text to be scanned incrementally. Repeatidly call this function to
       
   522  * match regular expressions at the front of a string and skip over them as
       
   523  * they match. 
       
   524  * @param aText the text to match against the regular expression.
       
   525  * @param aArg1 contains the first extracted subpattern.
       
   526  * @return ETrue if the RE matched and a substring was consumed,
       
   527  * EFalse otherwise.
       
   528  * @see FindAndConsumeL()
       
   529  */
       
   530 EXPORT_C TBool CRegEx::ConsumeL(TDes8& aText,
       
   531 		  const TRegExArg& aArg1) const
       
   532 	{
       
   533 	return ConsumeL(aText, aArg1, *iNoArg, *iNoArg, *iNoArg);
       
   534 	}
       
   535 
       
   536 /**
       
   537  * Allows text to be scanned incrementally. Repeatidly call this function to
       
   538  * match regular expressions at the front of a string and skip over them as
       
   539  * they match. 
       
   540  * @param aText the text to match against the regular expression.
       
   541  * @param aArg1 contains the first extracted subpattern.
       
   542  * @param aArg2 contains the second extracted subpattern.
       
   543  * @return ETrue if the RE matched and a substring was consumed,
       
   544  * EFalse otherwise.
       
   545  * @see FindAndConsumeL()
       
   546  */
       
   547 EXPORT_C TBool CRegEx::ConsumeL(TDes8& aText,
       
   548 		  const TRegExArg& aArg1,
       
   549 		  const TRegExArg& aArg2) const
       
   550 	{
       
   551 	return ConsumeL(aText, aArg1, aArg2, *iNoArg, *iNoArg);
       
   552 	}
       
   553 
       
   554 /**
       
   555  * Allows text to be scanned incrementally. Repeatidly call this function to
       
   556  * match regular expressions at the front of a string and skip over them as
       
   557  * they match. 
       
   558  * @param aText the text to match against the regular expression.
       
   559  * @param aArg1 contains the first extracted subpattern.
       
   560  * @param aArg2 contains the second extracted subpattern.
       
   561  * @param aArg3 contains the third extracted subpattern.
       
   562  * @return ETrue if the RE matched and a substring was consumed,
       
   563  * EFalse otherwise.
       
   564  * @see FindAndConsumeL()
       
   565  */
       
   566 EXPORT_C TBool CRegEx::ConsumeL(TDes8& aText,
       
   567 		  const TRegExArg& aArg1,
       
   568 		  const TRegExArg& aArg2,
       
   569 		  const TRegExArg& aArg3) const
       
   570 	{
       
   571 	return ConsumeL(aText, aArg1, aArg2, aArg3, *iNoArg);
       
   572 	}
       
   573 
       
   574 /**
       
   575  * Allows text to be scanned incrementally. Repeatidly call this function to
       
   576  * match regular expressions at the front of a string and skip over them as
       
   577  * they match. 
       
   578  * @param aText the text to match against the regular expression.
       
   579  * @param aArg1 contains the first extracted subpattern.
       
   580  * @param aArg2 contains the second extracted subpattern.
       
   581  * @param aArg3 contains the third extracted subpattern.
       
   582  * @param aArg4 contains the fourth extracted subpattern. 
       
   583  * @return ETrue if the RE matched and a substring was consumed,
       
   584  * EFalse otherwise.
       
   585  * @see FindAndConsumeL()
       
   586  */
       
   587 EXPORT_C TBool CRegEx::ConsumeL(TDes8& aText,
       
   588 		  const TRegExArg& aArg1,
       
   589 		  const TRegExArg& aArg2,
       
   590 		  const TRegExArg& aArg3,
       
   591 		  const TRegExArg& aArg4) const
       
   592 	{
       
   593 	RPointerArray<const TRegExArg> args;
       
   594 	CleanupClosePushL(args);
       
   595 	args.ReserveL(KMaxArgs);
       
   596 
       
   597 	if (&aArg1 != iNoArg)
       
   598 		{
       
   599 		args.AppendL(&aArg1);
       
   600 		}
       
   601 	if (&aArg2 != iNoArg)
       
   602 		{
       
   603 		args.AppendL(&aArg2);		
       
   604 		}
       
   605 	if (&aArg3 != iNoArg)
       
   606 		{
       
   607 		args.AppendL(&aArg3);		
       
   608 		}
       
   609 	if (&aArg4 != iNoArg)
       
   610 		{
       
   611 		args.AppendL(&aArg4);		
       
   612 		}
       
   613 	
       
   614 	TInt consumed = 0;
       
   615 	TInt vector[KVecSize];
       
   616 	
       
   617 	TBool r =  DoMatchImpl(aText, EAnchorStart, consumed, args, vector,
       
   618 					KVecSize);
       
   619 	if (r)
       
   620 		{
       
   621 		// Remove prefix
       
   622 		aText.Delete(0, consumed);		
       
   623 		}
       
   624 	CleanupStack::PopAndDestroy(&args);
       
   625 	return r;	
       
   626 	}
       
   627 
       
   628 /**
       
   629  * Allows text to be scanned incrementally. Repeatidly call this function to
       
   630  * match  regular expression in a string and extract them as they match.
       
   631  * Unlike ConsumeL, your match is not anchored to the start of the string.
       
   632  * @param aText the text to match against the regular expression.
       
   633  * @return ETrue if the RE matched and a substring was consumed,
       
   634  * EFalse otherwise.
       
   635  * @see ConsumeL()
       
   636  */
       
   637 EXPORT_C TBool CRegEx::FindAndConsumeL(TDes8& aText) const
       
   638 	{
       
   639 	return FindAndConsumeL(aText, *iNoArg, *iNoArg, *iNoArg, *iNoArg);
       
   640 	}
       
   641 
       
   642 /**
       
   643  * Allows text to be scanned incrementally. Repeatidly call this function to
       
   644  * match  regular expression in a string and extract them as they match.
       
   645  * Unlike ConsumeL, your match is not anchored to the start of the string.
       
   646  * @param aText the text to match against the regular expression.
       
   647  * @param aArg1 contains the first extracted subpattern.
       
   648  * @return ETrue if the RE matched and a substring was consumed,
       
   649  * EFalse otherwise.
       
   650  * @see ConsumeL()
       
   651  */
       
   652 EXPORT_C TBool CRegEx::FindAndConsumeL(TDes8& aText,
       
   653 		  const TRegExArg& aArg1) const
       
   654 	{
       
   655 	return FindAndConsumeL(aText, aArg1, *iNoArg, *iNoArg, *iNoArg);
       
   656 	}
       
   657 
       
   658 /**
       
   659  * Allows text to be scanned incrementally. Repeatidly call this function to
       
   660  * match  regular expression in a string and extract them as they match.
       
   661  * Unlike ConsumeL, your match is not anchored to the start of the string.
       
   662  * @param aText the text to match against the regular expression.
       
   663  * @param aArg1 contains the first extracted subpattern.
       
   664  * @param aArg2 contains the second extracted subpattern.
       
   665  * @return ETrue if the RE matched and a substring was consumed,
       
   666  * EFalse otherwise.
       
   667  * @see ConsumeL()
       
   668  */
       
   669 EXPORT_C TBool CRegEx::FindAndConsumeL(TDes8& aText,
       
   670 		  const TRegExArg& aArg1,
       
   671 		  const TRegExArg& aArg2) const
       
   672 	{
       
   673 	return FindAndConsumeL(aText, aArg1, aArg2, *iNoArg, *iNoArg);
       
   674 	}
       
   675 
       
   676 /**
       
   677  * Allows text to be scanned incrementally. Repeatidly call this function to
       
   678  * match  regular expression in a string and extract them as they match.
       
   679  * Unlike ConsumeL, your match is not anchored to the start of the string.
       
   680  * @param aText the text to match against the regular expression.
       
   681  * @param aArg1 contains the first extracted subpattern.
       
   682  * @param aArg2 contains the second extracted subpattern.
       
   683  * @param aArg3 contains the third extracted subpattern.
       
   684  * @return ETrue if the RE matched and a substring was consumed,
       
   685  * EFalse otherwise.
       
   686  * @see ConsumeL()
       
   687  */
       
   688 EXPORT_C TBool CRegEx::FindAndConsumeL(TDes8& aText,
       
   689 		  const TRegExArg& aArg1,
       
   690 		  const TRegExArg& aArg2,
       
   691 		  const TRegExArg& aArg3) const
       
   692 	{
       
   693 	return FindAndConsumeL(aText, aArg1, aArg2, aArg3, *iNoArg);
       
   694 	}
       
   695 
       
   696 /**
       
   697  * Allows text to be scanned incrementally. Repeatidly call this function to
       
   698  * match  regular expression in a string and extract them as they match.
       
   699  * Unlike ConsumeL, your match is not anchored to the start of the string.
       
   700  * @param aText the text to match against the regular expression.
       
   701  * @param aArg1 contains the first extracted subpattern.
       
   702  * @param aArg2 contains the second extracted subpattern.
       
   703  * @param aArg3 contains the third extracted subpattern.
       
   704  * @param aArg4 contains the fourth extracted subpattern. 
       
   705  * @return ETrue if the RE matched and a substring was consumed,
       
   706  * EFalse otherwise.
       
   707  * @see ConsumeL()
       
   708  */
       
   709 EXPORT_C TBool CRegEx::FindAndConsumeL(TDes8& aText,
       
   710 		  const TRegExArg& aArg1,
       
   711 		  const TRegExArg& aArg2,
       
   712 		  const TRegExArg& aArg3,
       
   713 		  const TRegExArg& aArg4) const
       
   714 	{
       
   715 	RPointerArray<const TRegExArg> args;
       
   716 	CleanupClosePushL(args);
       
   717 	args.ReserveL(KMaxArgs);
       
   718 
       
   719 	if (&aArg1 != iNoArg)
       
   720 		{
       
   721 		args.AppendL(&aArg1);
       
   722 		}
       
   723 	if (&aArg2 != iNoArg)
       
   724 		{
       
   725 		args.AppendL(&aArg2);		
       
   726 		}
       
   727 	if (&aArg3 != iNoArg)
       
   728 		{
       
   729 		args.AppendL(&aArg3);		
       
   730 		}
       
   731 	if (&aArg4 != iNoArg)
       
   732 		{
       
   733 		args.AppendL(&aArg4);		
       
   734 		}	
       
   735 	
       
   736 	TInt consumed = 0;
       
   737 	TInt vector[KVecSize];
       
   738 	
       
   739 	TBool r = DoMatchImpl(aText, EUnanchored, consumed, args, vector, KVecSize);
       
   740 	CleanupStack::PopAndDestroy(&args);
       
   741 	
       
   742 	if (r)
       
   743 		{
       
   744 		// Remove prefix
       
   745 		aText.Delete(0, consumed);
       
   746 		}
       
   747 	
       
   748 	return r;
       
   749 	}
       
   750 
       
   751 
       
   752 /**
       
   753  * The first match of the regular expression in aString is
       
   754  * replaced by aRewrite.
       
   755  * 
       
   756  * Within aRewrite, backslash-escaped digits (\1 to \9) can be
       
   757  * used to insert text matching a corresponding parenthesized group from
       
   758  * the pattern. \0 in aRewrite refers to the entire matching text.
       
   759  * @param aRewrite the text to replace the matching substring with.
       
   760  * @param aText the text to match against the regular expression.
       
   761  * @return ETrue if match occurred and replace was succsessful,
       
   762  * EFalse otherwise.
       
   763  * @see GlobalReplaceL()
       
   764  */
       
   765 EXPORT_C TBool CRegEx::ReplaceL(const TDesC8& aRewrite, TDes8& aString) const
       
   766 	{
       
   767 	TInt r = EFalse;
       
   768 	TInt vector[KVecSize];
       
   769 	
       
   770 	TInt matches = TryMatch(aString, 0, EUnanchored, vector, KVecSize);
       
   771 	if (matches == 0)
       
   772 		{
       
   773 		return r;
       
   774 		}		
       
   775 	
       
   776 	HBufC8* s = HBufC8::NewLC(aString.MaxLength());
       
   777 	TPtr8 pS = s->Des();
       
   778 	if (!Rewrite(pS, aRewrite, aString, vector, KVecSize, matches))
       
   779 		{
       
   780 		CleanupStack::PopAndDestroy(s);
       
   781 		return r;
       
   782 		}		
       
   783 	
       
   784 	__ASSERT_DEBUG(vector[0] >= 0, Panic(EInvalidMatchResults));
       
   785 	__ASSERT_DEBUG(vector[1] >= 0, Panic(EInvalidMatchResults));
       
   786 	
       
   787 	TInt replacementLength = vector[1] - vector[0];
       
   788 	
       
   789 	if((aString.Length() + s->Length() - replacementLength) <= aString.MaxLength())
       
   790 		{
       
   791 		aString.Replace(vector[0], replacementLength, *s);
       
   792 		}
       
   793 	else
       
   794 		{
       
   795 		CleanupStack::PopAndDestroy(s);
       
   796 		iErrorCode = KErrRegExOutputTooBig;
       
   797 		return r;
       
   798 		}
       
   799 	
       
   800 	CleanupStack::PopAndDestroy(s);
       
   801 	r = ETrue;
       
   802 	return r;
       
   803 	}
       
   804 
       
   805 /**
       
   806  * All matches of the regular expression in aString are
       
   807  * replaced by aRewrite.
       
   808  * 
       
   809  * Within the rewrite string, backslash-escaped digits (\1 to \9) can be
       
   810  * used to insert text matching a corresponding parenthesized group from
       
   811  * the pattern. \0 in "aRewrite" refers to the entire matching text.
       
   812  * @param aRewrite the text to replace the matching substring with.
       
   813  * @param aText the text to match against the regular expression.
       
   814  * @return ETrue if matches occurred and replace was succsessful,
       
   815  * EFalse otherwise.
       
   816  * @see ReplaceL()
       
   817  */
       
   818 EXPORT_C TInt CRegEx::GlobalReplaceL(const TDesC8& aRewrite,
       
   819 	TDes8& aString) const	
       
   820 	{
       
   821 	TInt count = 0;
       
   822 	TInt vector[KVecSize];
       
   823 	
       
   824 	HBufC8* out = HBufC8::NewLC(aString.MaxLength());
       
   825 	TPtr8 pOut = out->Des();
       
   826 	
       
   827 	TInt start = 0;
       
   828 	TInt lastend = -1;
       
   829 	
       
   830 	while (start <= aString.Length())
       
   831 		{
       
   832 		TInt matches = TryMatch(aString, start, EUnanchored, vector, KVecSize);
       
   833 		if (matches <= 0)
       
   834 			{
       
   835 			break;
       
   836 			}			
       
   837 		TInt matchstart = vector[0];
       
   838 		TInt matchend = vector[1];
       
   839 		
       
   840 		__ASSERT_DEBUG(matchstart >= start, EInvalidMatchResults);
       
   841 		__ASSERT_DEBUG(matchend >= matchstart, EInvalidMatchResults);
       
   842 		if (matchstart == matchend && matchstart == lastend)
       
   843 			{
       
   844 			// advance one character if we matched an empty string at the same
       
   845 			// place as the last match occurred
       
   846 			matchend = start + 1;
       
   847 			// If the current char is CR and we're in CRLF mode, skip LF too.
       
   848 			// Note it's better to call pcre_fullinfo() than to examine
       
   849 			// all_options(), since options_ could have changed bewteen
       
   850 			// compile-time and now, but this is simpler and safe enough.
       
   851 			// Modified by PH to add ANY and ANYCRLF.
       
   852 			if ((start + 1 < aString.Length()) &&
       
   853 					aString[start] == '\r' && aString[start+1] == '\n' &&
       
   854 					(NewlineMode(iOptions.AllOptions()) == PCRE_NEWLINE_CRLF ||
       
   855 					NewlineMode(iOptions.AllOptions()) == PCRE_NEWLINE_ANY ||
       
   856 					NewlineMode(iOptions.AllOptions()) == PCRE_NEWLINE_ANYCRLF))
       
   857 				{
       
   858 				matchend++;
       
   859 				}
       
   860 			// We also need to advance more than one char if we're in utf8 mode.
       
   861 			#ifdef SUPPORT_UTF8
       
   862 			if (iOptions.Utf8())
       
   863 				{
       
   864 				while ((matchend < aString.Length())
       
   865 					&& (aString[matchend] & 0xc0) == 0x80)
       
   866 					{
       
   867 					matchend++;
       
   868 					}					
       
   869 				}
       
   870 			#endif
       
   871 			if (matchend <= aString.Length())
       
   872 				{
       
   873 				if(pOut.Length() + (matchend - start) <= pOut.MaxLength())
       
   874 					{
       
   875 					pOut.Append(aString.Mid(start, matchend - start));
       
   876 					}
       
   877 				else
       
   878 					{
       
   879 					iErrorCode = KErrRegExOutputTooBig;
       
   880 					return KErrRegExOutputTooBig;
       
   881 					}
       
   882 				}				
       
   883 			start = matchend;
       
   884 			}
       
   885 		else
       
   886 			{
       
   887 			if(pOut.Length() + (matchstart - start) <= pOut.MaxLength())
       
   888 				{
       
   889 				pOut.Append(aString.Mid(start, matchstart - start));
       
   890 				}
       
   891 			else
       
   892 				{
       
   893 				iErrorCode = KErrRegExOutputTooBig;
       
   894 				return KErrRegExOutputTooBig;
       
   895 				}
       
   896 			Rewrite(pOut, aRewrite, aString, vector, KVecSize, matches);
       
   897 			start = matchend;
       
   898 			lastend = matchend;
       
   899 			count++;
       
   900 			}
       
   901 		}
       
   902 	
       
   903 	if (count == 0)
       
   904 		{
       
   905 		return count;
       
   906 		}	
       
   907 
       
   908 	if (start < aString.Length())
       
   909 		{
       
   910 		if((aString.Length() - start) + pOut.Length() <= pOut.MaxLength())
       
   911 			{
       
   912 			pOut.Append(aString.Mid(start, aString.Length() - start));
       
   913 			}
       
   914 		else
       
   915 			{
       
   916 			iErrorCode = KErrRegExOutputTooBig;
       
   917 			return KErrRegExOutputTooBig;
       
   918 			}
       
   919 		}
       
   920 	
       
   921 	aString.Swap(pOut);
       
   922 	
       
   923 	CleanupStack::PopAndDestroy(out);
       
   924 	
       
   925 	return count;
       
   926 	}
       
   927 
       
   928 /**
       
   929  * The first match of the regular expression in the supplied string is
       
   930  * replaced by another supplied string and copied into aOut with substitutions.
       
   931  * The non-matching portions of aString are ignored.
       
   932  * 
       
   933  * Within the rewrite string, backslash-escaped digits (\1 to \9) can be
       
   934  * used to insert text matching a corresponding parenthesized group from
       
   935  * the pattern. \0 in "aRewrite" refers to the entire matching text.
       
   936  * @param aRewrite the text to replace the matching substring with.
       
   937  * @param aText the text to match against the regular expression.
       
   938  * @return ETrue if match occurred and extraction was succsessful,
       
   939  * EFalse otherwise.
       
   940  * @see ReplaceL()
       
   941  */
       
   942 EXPORT_C TBool CRegEx::ExtractL(const TDesC8& aRewrite,
       
   943 					const TDesC8& aText, TDes8& aOut) const
       
   944 	{
       
   945 	TInt vector[KVecSize];
       
   946 	
       
   947 	TInt matches = TryMatch(aText, 0, EUnanchored, vector, KVecSize);
       
   948 	if (matches == 0)
       
   949 		{
       
   950 		iErrorCode = KErrRegExZeroMatches;
       
   951 		return EFalse;
       
   952 		}	
       
   953 	aOut.Zero();
       
   954 	
       
   955 	TBool r = Rewrite(aOut, aRewrite, aText, vector, KVecSize, matches);
       
   956 	return r;
       
   957 	}
       
   958 
       
   959 /**
       
   960  * Returns EPcreNewlineAnyCrLf, EPcreNewlineAny, EPcreNewlineCrLf,
       
   961  * EPcreNewlineLf or EPcreNewlineCr
       
   962  * Note that EPcreNewlineCrLf is defined to be EPcreNewlineCr | EPcreNewlineLf.
       
   963  * @param aOptions
       
   964  * @return
       
   965  */
       
   966 
       
   967 EXPORT_C TInt CRegEx::NewlineMode(TInt aOptions)
       
   968 	{
       
   969 	// TODO: if we can make it threadsafe, cache this var
       
   970 	TInt newlineMode = 0;
       
   971 	/* if (newlineMode) return newlineMode; */  // do this once it's cached
       
   972 	
       
   973 	if (aOptions & (EPcreNewlineCrLf|EPcreNewlineCr|EPcreNewlineLf|
       
   974 			EPcreNewlineAny|EPcreNewlineAnyCrLf))
       
   975 		{
       
   976 		newlineMode = (aOptions &
       
   977 				(EPcreNewlineCrLf|EPcreNewlineCr|EPcreNewlineLf|
       
   978 						EPcreNewlineAny|EPcreNewlineAnyCrLf));
       
   979 		}
       
   980 	else
       
   981 		{
       
   982 		TInt newline;
       
   983 		pcre_config(PCRE_CONFIG_NEWLINE, &newline);
       
   984 
       
   985 		switch(newline)
       
   986 			{
       
   987 			case KNewLineAnyCrLf:
       
   988 				{
       
   989 				newlineMode = EPcreNewlineAnyCrLf;
       
   990 				break;
       
   991 				}			
       
   992 			case KNewLineAny:
       
   993 				{
       
   994 				newlineMode = EPcreNewlineAny;
       
   995 				break;
       
   996 				}	
       
   997 			case KNewLineLf:
       
   998 				{
       
   999 				newlineMode = EPcreNewlineLf;
       
  1000 				break;
       
  1001 				}			
       
  1002 			case KNewLineCr:
       
  1003 				{
       
  1004 				newlineMode = EPcreNewlineCr;
       
  1005 				break;
       
  1006 				}
       
  1007 			case KNewLineCrLf:
       
  1008 				{
       
  1009 				newlineMode = EPcreNewlineCrLf;
       
  1010 				break;
       
  1011 				}
       
  1012 			default:
       
  1013 				{
       
  1014 				__ASSERT_DEBUG(EFalse, EUnexpectedRetValFromPcre);				
       
  1015 				}				
       
  1016 			}		
       
  1017 		}
       
  1018 	return newlineMode;
       
  1019 	}
       
  1020 
       
  1021 /** 
       
  1022  * Escapes all potentially meaningful regular expression (RE) characters in
       
  1023  * aUnquoted.  The returned string, used as a regular expression,
       
  1024  * will exactly match the original string.  For example,
       
  1025  *           1.5-2.0?
       
  1026  * may become:
       
  1027  *           1\.5\-2\.0\?
       
  1028  * Note QuoteMeta behaves the same as perl's QuoteMeta function,
       
  1029  * *except* that it escapes the NUL character (\0) as backslash + 0,
       
  1030  * rather than backslash + NUL.
       
  1031  * @param aUnquoted unescaped string.
       
  1032  * @return string with all meaningful RE characters escaped.
       
  1033  */
       
  1034 EXPORT_C HBufC8* CRegEx::QuoteMetaL(const TDesC8& aUnquoted)
       
  1035 	{
       
  1036 	HBufC8* result = HBufC8::NewLC(aUnquoted.Length() + KReserveForEscapeChars);
       
  1037 	TPtr8 pResult = result->Des();
       
  1038 
       
  1039 	_LIT8(KEscapeNull, "\\0");
       
  1040 	_LIT8(KEscapeOther, "\\%c");
       
  1041 	
       
  1042 	// Escape any ascii character not in [A-Za-z_0-9].
       
  1043 	//
       
  1044 	// Note that it's legal to escape a character even if it has no
       
  1045 	// special meaning in a regular expression -- so this function does
       
  1046 	// that.  (This also makes it identical to the perl function of the
       
  1047 	// same name; see `perldoc -f quotemeta`.)  The one exception is
       
  1048 	// escaping NUL: rather than doing backslash + NUL, like perl does,
       
  1049 	// we do '\0', because pcre itself doesn't take embedded NUL chars.
       
  1050 	for (TInt ii = 0; ii < aUnquoted.Length(); ++ii)
       
  1051 	  {
       
  1052 	  
       
  1053 	  // Always make sure we have enough room to escape at least one character
       
  1054 	  if(pResult.MaxLength() <= pResult.Length() + KEscapeTripletLength)
       
  1055 		  {
       
  1056 		  result = result->ReAllocL(pResult.Length() + KReserveForEscapeChars);
       
  1057 		  CleanupStack::Pop();
       
  1058 		  CleanupStack::PushL(result);
       
  1059 		  pResult.Set(result->Des());
       
  1060 		  }
       
  1061 	  
       
  1062 	  if (aUnquoted[ii] == TChar('\0'))
       
  1063 		  {
       
  1064 		  pResult.Append(KEscapeNull());
       
  1065 		  }
       
  1066 	  else if ((aUnquoted[ii] < TChar('a') || aUnquoted[ii] > TChar('z')) &&
       
  1067 			   (aUnquoted[ii] < TChar('A') || aUnquoted[ii] > TChar('Z')) &&
       
  1068 			   (aUnquoted[ii] < TChar('0') || aUnquoted[ii] > TChar('9')) &&
       
  1069 			   aUnquoted[ii] != TChar('_') &&
       
  1070 			   // If this is the part of a UTF8 or Latin1 character, we need
       
  1071 			   // to copy this byte without escaping.  Experimentally this is
       
  1072 			   // what works correctly with the regexp library.
       
  1073 			   !(aUnquoted[ii] & TChar(0x80)))
       
  1074 		  {
       
  1075 		  pResult.AppendFormat(KEscapeOther, aUnquoted[ii]);
       
  1076 		  }
       
  1077 	  else
       
  1078 		  {
       
  1079 		  pResult.Append(aUnquoted[ii]);
       
  1080 		  }
       
  1081 	}
       
  1082 	CleanupStack::Pop(result);
       
  1083 	return result;
       
  1084 }
       
  1085 
       
  1086 /**
       
  1087  * Returns the number of capturing subpatterns, or -1 if the
       
  1088  * regular expressions wasn't valid on construction.
       
  1089  * @return the number of capturing subpatterns or or -1 if the regular
       
  1090  * expressions wasn't valid on construction.
       
  1091  */
       
  1092 EXPORT_C TInt CRegEx::NumberOfCapturingGroups() const
       
  1093 	{
       
  1094 	if (iRePartial == NULL) return KErrNotFound;
       
  1095 	
       
  1096 	TInt result;
       
  1097 	TInt pcreRetVal = pcre_fullinfo(iRePartial, // The regular expression object
       
  1098 								iExtraPartial,  // Study data
       
  1099                                 PCRE_INFO_CAPTURECOUNT,
       
  1100                                 &result);
       
  1101 	__ASSERT_DEBUG(pcreRetVal == 0, Panic(EUnexpectedRetValFromPcre));
       
  1102 	return result;
       
  1103 	}
       
  1104 
       
  1105 /**
       
  1106  * Analyzes a regular expression (RE) pattern further. This is especially useful
       
  1107  * if an RE is going to be used several times to reduce the time taken for
       
  1108  * matching.
       
  1109  *  
       
  1110  * "At present, studying a pattern is useful only for non-anchored patterns that
       
  1111  * do not have a single fixed starting character." 
       
  1112  */
       
  1113 EXPORT_C void CRegEx::Study()
       
  1114 	{
       
  1115 	// "At present, studying a pattern is useful only for non-anchored
       
  1116 	// patterns that do not have a single fixed starting character."
       
  1117 	if(iRePartial)
       
  1118 		{
       
  1119 		const char* compileError; // ignored
       
  1120 		iExtraPartial = pcre_study(
       
  1121 				iRePartial,     /* result of pcre_compile() */
       
  1122 				0,              /* no options exist */
       
  1123 				&compileError);	/* set to NULL or points to a message */		
       
  1124 		}
       
  1125 	}
       
  1126 
       
  1127 
       
  1128 // Private Functions //
       
  1129 
       
  1130 /**
       
  1131  * Standard constructor
       
  1132  */
       
  1133 CRegEx::CRegEx()
       
  1134 	{
       
  1135 	}
       
  1136 
       
  1137 
       
  1138 /**
       
  1139  * Standard constructor
       
  1140  * @param aOptions options used when compiling regular expression.
       
  1141  */
       
  1142 CRegEx::CRegEx(const TRegExOptions& aOptions)
       
  1143 	: iOptions(aOptions)
       
  1144 	{
       
  1145 	}
       
  1146 
       
  1147 /**
       
  1148  * Second phase constructor.
       
  1149  */
       
  1150 void CRegEx::ConstructL(const TDesC8& aPattern)
       
  1151 	{
       
  1152 	iPattern = HBufC8::NewL(aPattern.Length() + 1); // Leave room for \0
       
  1153 	TPtr8 pPattern = iPattern->Des();
       
  1154 	pPattern.Copy(aPattern);
       
  1155 	pPattern.ZeroTerminate();
       
  1156 	CommonConstructL();
       
  1157 	}
       
  1158 
       
  1159 void CRegEx::CommonConstructL()
       
  1160 	{
       
  1161 	// The default value for an argument, to indicate no arg was passed in
       
  1162 	iNoArg = new(ELeave) TRegExArg((TAny*)NULL);
       
  1163 	
       
  1164 	// Compile patterns used for partial and full matches.	
       
  1165 	iReFull = NULL;
       
  1166 	iRePartial = NULL;
       
  1167 	
       
  1168 	iRePartial = CompileL(EUnanchored);
       
  1169 	if(iRePartial)
       
  1170 		{
       
  1171 		iReFull = CompileL(EAnchorBoth);
       
  1172 		}	
       
  1173 	User::LeaveIfError(iErrorCode);	
       
  1174 	}	
       
  1175 
       
  1176 void CRegEx::ConstructL(const TDesC16& aPattern)
       
  1177 	{
       
  1178 	LtkUtils::RLtkBuf8 narrowBuf;
       
  1179 	narrowBuf.CreateLC(aPattern.Length() + 1);
       
  1180 	if (iOptions.Utf8())
       
  1181 		{
       
  1182 		narrowBuf.CopyAsUtf8L(aPattern);
       
  1183 		}
       
  1184 	else
       
  1185 		{
       
  1186 		narrowBuf.Copy(aPattern);
       
  1187 		}
       
  1188 	narrowBuf.AppendL(0);
       
  1189 	iPattern = narrowBuf.ToHBuf();
       
  1190 	CleanupStack::Pop(&narrowBuf);
       
  1191 
       
  1192 	CommonConstructL();
       
  1193 	}
       
  1194 
       
  1195 /**
       
  1196  * Compile the regular expression (RE) pattern.
       
  1197  * @param aAnchor anchoring to use for the RE pattern.
       
  1198  * @return pointer to PCRE object with compiled RE data. 
       
  1199  */
       
  1200 pcre* CRegEx::CompileL(TAnchor aAnchor)
       
  1201 	{
       
  1202 	// First, convert TRegExOptions into pcre options
       
  1203 	TInt pcreOptions = iOptions.AllOptions();
       
  1204 	
       
  1205 	// Special treatment for anchoring.  This is needed because at
       
  1206 	// runtime pcre only provides an option for anchoring at the
       
  1207 	// beginning of a string (unless you use offset).
       
  1208 	//
       
  1209 	// There are three types of anchoring we want:
       
  1210 	//    EUnanchored      Compile the original pattern, and use
       
  1211 	//                    a pcre unanchored match.
       
  1212 	//    EAnchorStart    Compile the original pattern, and use
       
  1213 	//                    a pcre anchored match.
       
  1214 	//    EAnchorBoth     Tack a "\z" to the end of the original pattern
       
  1215 	//                    and use a pcre anchored match.
       
  1216 	
       
  1217 	pcre* re;
       
  1218 	TInt errCode = 0;
       
  1219 	TInt errOffset = 0;
       
  1220 	
       
  1221 	const char* compileError; // ignored
       
  1222 
       
  1223 	if (aAnchor != EAnchorBoth)
       
  1224 		{
       
  1225 		re = pcre_compile2((const char *)iPattern->Ptr(), pcreOptions,
       
  1226 				&errCode,&compileError, &errOffset, NULL);
       
  1227 		}
       
  1228 	else
       
  1229 		{
       
  1230 		// Tack a '\z' at the end of RE.  Parenthesize it first so that
       
  1231 		// the '\z' applies to all top-level alternatives in the regexp.
       
  1232 		_LIT8(KWrapped, "(?:%S)\\z\x0");
       
  1233 		HBufC8* wrapped = HBufC8::NewLC(KWrapped().Length()
       
  1234 				+ iPattern->Length());
       
  1235 		TPtr8 pWrapped = wrapped->Des();
       
  1236 		pWrapped.Format(KWrapped(), iPattern);
       
  1237 		re = pcre_compile2((const char *)pWrapped.Ptr(), pcreOptions,
       
  1238 				&errCode, &compileError, &errOffset, NULL);
       
  1239 		CleanupStack::PopAndDestroy(wrapped);
       
  1240 		}
       
  1241 	
       
  1242 	if (!re && (iErrorCode == KErrNone))
       
  1243 		{
       
  1244 		iErrorCode = KErrRegExCompileBase - errCode;
       
  1245 		iErrorOffset = errOffset;
       
  1246 		}
       
  1247 	return re;
       
  1248 	}
       
  1249 
       
  1250 /**
       
  1251  * Cleanup the compiled regular expression and study data.
       
  1252  * Separated out from destructor in case support for recompiling
       
  1253  * is introduced.
       
  1254  */
       
  1255 void CRegEx::Cleanup()
       
  1256 	{
       
  1257 	if (iReFull)
       
  1258 		{
       
  1259 		(*pcre_free)(iReFull);
       
  1260 		}
       
  1261 	
       
  1262 	if (iRePartial)
       
  1263 		{
       
  1264 		(*pcre_free)(iRePartial);
       
  1265 		}
       
  1266 	if(iExtraPartial)
       
  1267 		{
       
  1268 		(*pcre_free)(iExtraPartial);
       
  1269 		}
       
  1270 	}
       
  1271 
       
  1272 // Internal matching and rewrite implementations //
       
  1273 
       
  1274 /**
       
  1275  * Match against aText, filling in aVector (up to aVector.Count() * 2/3) with
       
  1276  * pairs of integers for the beginning and end positions of matched
       
  1277  * text.  The first pair corresponds to the entire matched text;
       
  1278  * subsequent pairs correspond, in order, to parentheses-captured
       
  1279  * matches.  Returns the number of pairs (one more than the number of
       
  1280  * the last subpattern with a match) if matching was successful
       
  1281  * and zero if the match failed.
       
  1282  * I.e. for RE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
       
  1283  * against "foo", "bar", and "baz" respectively.
       
  1284  * When matching RE("(foo)|hello") against "hello", it will return 1.
       
  1285  * But the values for all subpattern are filled in into "aVector".
       
  1286  * @param aText the text to match against the regular expression.
       
  1287  * @param aStartPos position in aText to start matching from
       
  1288  * @param aAnchor  the type of match to perform.
       
  1289  * @param aVector vector that stores pairs of integers for the start and end
       
  1290  * positions of matched substrings.
       
  1291  * @param aVectorSize length of aVector
       
  1292  * @return the number of matched subpatterns.
       
  1293  */
       
  1294 TInt CRegEx::TryMatch(const TDesC8& aText,
       
  1295                  TInt aStartPos,
       
  1296                  TAnchor aAnchor,
       
  1297    			   	TInt* aVector,
       
  1298    			    TInt aVectorSize) const
       
  1299     {
       
  1300 	pcre* re = (aAnchor == EAnchorBoth) ? iReFull : iRePartial;
       
  1301 	if (!re)
       
  1302 		{
       
  1303 		// Matching against invalid re
       
  1304 		return 0;
       
  1305 		}
       
  1306 	
       
  1307 	pcre_extra extra = { 0, 0, 0, 0, 0, 0 };
       
  1308 	
       
  1309 	if(iExtraPartial)
       
  1310 		{
       
  1311 		extra = *iExtraPartial;
       
  1312 		}
       
  1313 
       
  1314 	if (iOptions.MatchLimit() > 0)
       
  1315 		{
       
  1316 		extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
       
  1317 		extra.match_limit = iOptions.MatchLimit();
       
  1318 		}
       
  1319 	
       
  1320 	if (iOptions.MatchLimitRecursion() > 0)
       
  1321 		{
       
  1322 		extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
       
  1323 		extra.match_limit_recursion = iOptions.MatchLimitRecursion();
       
  1324 		}
       
  1325 	
       
  1326 	TInt rc = pcre_exec(re,              // The regular expression object
       
  1327 						&extra,
       
  1328 						(const char *)aText.Ptr(),
       
  1329 						aText.Length(),
       
  1330 						aStartPos,
       
  1331 						(aAnchor == EUnanchored) ? 0 : PCRE_ANCHORED,
       
  1332 						aVector,
       
  1333 						aVectorSize);
       
  1334 	
       
  1335 	// Handle errors
       
  1336 	if (rc == PCRE_ERROR_NOMATCH)
       
  1337 		{
       
  1338 		return 0;
       
  1339 		}
       
  1340 	else if (rc < 0)
       
  1341 		{
       
  1342 		// Unexpected return code
       
  1343 		return 0;
       
  1344 		}
       
  1345 	else if (rc == 0)
       
  1346 		{
       
  1347 		// pcre_exec() returns 0 as a special case when the number of
       
  1348 		// capturing subpatterns exceeds the size of the vector.
       
  1349 		// When this happens, there is a match and the output vector
       
  1350 		// is filled, but we miss out on the positions of the extra subpatterns.
       
  1351 		rc = aVectorSize / 2;
       
  1352 		}
       
  1353 	return rc;
       
  1354     }
       
  1355 
       
  1356 /**
       
  1357  * Internal implementation of rewrite functionality used by the replace &
       
  1358  * extract functions.
       
  1359  * Appends the aRewrite string, with backslash subsitutions from aText
       
  1360  * and aVector, to string aOut.
       
  1361  * @param aOut output descriptor
       
  1362  * @param aRewrite descriptor backslash subsitutions to append to aOut.
       
  1363  * @param aText descriptor containing substitutions.
       
  1364  * @param aVector vector that stores pairs of integers for the start and end
       
  1365  * positions of matched substrings.
       
  1366  * @param aVectorSize length of aVector.
       
  1367  * @param aMatches number of matches.
       
  1368  * @return ETrue if the operation was successfull, EFalse otherwise.
       
  1369  * @see ReplaceL()
       
  1370  * @see GlobalReplaceL()
       
  1371  * @see ExtractL()
       
  1372  */
       
  1373 TBool CRegEx::Rewrite(TDes8& aOut, const TDesC8& aRewrite,
       
  1374                  const TDesC8& aText, TInt* aVector,
       
  1375                  TInt aVectorSize, TInt aMatches) const
       
  1376     {
       
  1377 	for(TInt i = 0; i < aRewrite.Length(); i++)
       
  1378 		{
       
  1379 		TChar c = aRewrite[i];
       
  1380 		
       
  1381 		if (c == '\\')
       
  1382 			{
       
  1383 			c = aRewrite[++i];
       
  1384 			if (c.IsDigit())
       
  1385 				{
       
  1386 				TUint n = c - TChar('0');
       
  1387 				if (n >= aMatches)
       
  1388 					{
       
  1389 					iErrorCode = KErrRegExBadBackslashSubsitution;
       
  1390 					return EFalse;
       
  1391 					}
       
  1392 				__ASSERT_DEBUG(aVectorSize >= 2 * n + 1, Panic(EVectorTooSmall));
       
  1393 				TInt start = aVector[2 * n];				
       
  1394 				if (start >= 0)
       
  1395 					{
       
  1396 					TInt requiredLength = aVector[2 * n + 1] - start;
       
  1397 					if((aOut.Length() + requiredLength) <= aOut.MaxLength())
       
  1398 						{
       
  1399 						aOut.Append(aText.Mid(start, requiredLength));
       
  1400 						}
       
  1401 					else
       
  1402 						{
       
  1403 						iErrorCode = KErrRegExOutputTooBig;
       
  1404 						return EFalse;
       
  1405 						}						
       
  1406 					}				
       
  1407 				}
       
  1408 				else if (c == '\\')
       
  1409 					{
       
  1410 					if((aOut.Length() + 1) <= aOut.MaxLength())
       
  1411 						{
       
  1412 						aOut.Append(c);
       
  1413 						}
       
  1414 					else
       
  1415 						{
       
  1416 						iErrorCode = KErrRegExOutputTooBig;
       
  1417 						return EFalse;
       
  1418 						}
       
  1419 					}
       
  1420 				else
       
  1421 					{
       
  1422 					// Invalid rewrite pattern
       
  1423 					iErrorCode = KErrRegExInvalidRewritePattern;
       
  1424 					return EFalse;
       
  1425 					}
       
  1426 			} 
       
  1427 		else
       
  1428 			{
       
  1429 			if((aOut.Length() + 1) <= aOut.MaxLength())
       
  1430 				{
       
  1431 				aOut.Append(c);
       
  1432 				}
       
  1433 			else
       
  1434 				{
       
  1435 				iErrorCode = KErrRegExOutputTooBig;
       
  1436 				return EFalse;
       
  1437 				}
       
  1438 			}
       
  1439 		}
       
  1440 	return ETrue;
       
  1441 	}
       
  1442 
       
  1443 /**
       
  1444  * Internal implementation of the match functionality.
       
  1445  * @param aText the text to match against the regular expression.
       
  1446  * @param aAnchor the type of match to perform.
       
  1447  * @param aConsumed the length of the matched substring.
       
  1448  * @param aArgs array of arguments that will contain the extracted subpatterns.
       
  1449  * @param aVector output vector that stores pairs of integers for the start and
       
  1450  * end positions of matched substrings.
       
  1451  * @param aVectorSize length of aVector
       
  1452  * @return
       
  1453  */
       
  1454 TBool CRegEx::DoMatchImpl(const TDesC8& aText,
       
  1455                     TAnchor aAnchor,
       
  1456                     TInt& aConsumed,
       
  1457                     const RPointerArray<const TRegExArg>& aArgs,
       
  1458       			   	TInt* aVector,
       
  1459       			    TInt aVectorSize) const
       
  1460     {
       
  1461     // results + PCRE workspace
       
  1462 	__ASSERT_DEBUG((1 + aArgs.Count()) * KPcreWorkspaceMultiplier <= aVectorSize,Panic(EVectorTooSmall) );
       
  1463 	TInt matches = TryMatch(aText, 0, aAnchor, aVector, aVectorSize);
       
  1464 	// TryMatch never returns negatives
       
  1465 	__ASSERT_DEBUG(matches >= 0, Panic(EInvalidMatchResults));  
       
  1466 	
       
  1467 	if (matches == 0)
       
  1468 		{
       
  1469 		iErrorCode = KErrRegExZeroMatches;
       
  1470 		return EFalse;
       
  1471 		}	
       
  1472 	
       
  1473 	aConsumed = aVector[1];
       
  1474 	
       
  1475 	if (aArgs.Count() == 0)
       
  1476 		{
       
  1477 		// We are not interested in results
       
  1478 		return ETrue;
       
  1479 		}
       
  1480 	
       
  1481 	if (NumberOfCapturingGroups() < aArgs.Count())
       
  1482 		{
       
  1483 		// RE has fewer capturing groups than number of arg pointers passed in
       
  1484 		iErrorCode = KErrRegExFewerCaptureGroupsThanArgs;
       
  1485 		return EFalse;
       
  1486 		}
       
  1487 
       
  1488 	// If we got here, we must have matched the whole pattern.
       
  1489 	// We do not need (can not do) any more checks on the value of 'matches'
       
  1490 	// here -- see the comment for TryMatch.
       
  1491 	for (TInt i = 0; i < aArgs.Count(); i++)
       
  1492 		{
       
  1493 		const TInt start = aVector[2*(i+1)];
       
  1494 		const TInt limit = aVector[2*(i+1)+1];
       
  1495 		
       
  1496 		TBool r;
       
  1497 		if(start == -1 || limit == -1)
       
  1498 			{
       
  1499 			r = aArgs[i]->Parse(KNullDesC8());
       
  1500 			}
       
  1501 		else
       
  1502 			{
       
  1503 			r = aArgs[i]->Parse(aText.Mid(start, limit - start));
       
  1504 			}
       
  1505 		if(!r)
       
  1506 			{
       
  1507 			iErrorCode = KErrRegExFailedToParseArg;
       
  1508 			return EFalse;			
       
  1509 			}
       
  1510 		}
       
  1511 
       
  1512 	return ETrue;
       
  1513 	}
       
  1514 
       
  1515 /**
       
  1516  * Panic the current thread.
       
  1517  * @param aPanic panic code.
       
  1518  */
       
  1519 void CRegEx::Panic(TRegExPanic aPanic)
       
  1520 	{
       
  1521 	User::Panic(KRegExPanic(), aPanic);
       
  1522 	}