|
1 // Copyright (c) 2005 - 2006, Google Inc. |
|
2 // All rights reserved. |
|
3 // |
|
4 // Redistribution and use in source and binary forms, with or without |
|
5 // modification, are permitted provided that the following conditions are |
|
6 // met: |
|
7 // |
|
8 // * Redistributions of source code must retain the above copyright |
|
9 // notice, this list of conditions and the following disclaimer. |
|
10 // * Redistributions in binary form must reproduce the above |
|
11 // copyright notice, this list of conditions and the following disclaimer |
|
12 // in the documentation and/or other materials provided with the |
|
13 // distribution. |
|
14 // * Neither the name of Google Inc. nor the names of its |
|
15 // contributors may be used to endorse or promote products derived from |
|
16 // this software without specific prior written permission. |
|
17 // |
|
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
29 // |
|
30 // Author: Sanjay Ghemawat |
|
31 // |
|
32 |
|
33 // Heavily refactored for Symbian OS by Accenture. |
|
34 |
|
35 #define PCRE_EXP_DECL IMPORT_C // We need to define this because the config.h that defines it when building libpcre isn't exported. |
|
36 |
|
37 #include <fshell/descriptorutils.h> |
|
38 #include "pcre.h" |
|
39 #include "cregex.h" |
|
40 #include "tregexarg.h" |
|
41 |
|
42 /** |
|
43 * Maximum number of optional arguments supported by the matching interface. |
|
44 * If more arguments are required then use the more generic DoMatchL() function. |
|
45 */ |
|
46 static const TInt KMaxArgs = 4; |
|
47 |
|
48 /** |
|
49 * Multiplier used to calculate size of vector that stores results from PCRE. |
|
50 * @see KVecSize |
|
51 */ |
|
52 static const TInt KPcreWorkspaceMultiplier = 3; |
|
53 |
|
54 /** |
|
55 * "The first two-thirds of the vector is used to pass back captured subpatterns, |
|
56 * each subpattern using a pair of integers. The remaining third of the vector |
|
57 * is used as workspace by pcre_exec() while matching capturing subpatterns, |
|
58 * and is not available for passing back information. The number passed in |
|
59 * vecsize should always be a multiple of three. If it is not, it is rounded |
|
60 * down." |
|
61 */ |
|
62 static const TInt KVecSize = (1 + KMaxArgs) * KPcreWorkspaceMultiplier; |
|
63 |
|
64 /** |
|
65 * Number of characters in a triplet escape sequence. |
|
66 */ |
|
67 static const TInt KEscapeTripletLength = 3; |
|
68 |
|
69 /** |
|
70 * Number of characters to reserve for escape sequences |
|
71 * Reserves enough room for several. |
|
72 */ |
|
73 static const TInt KReserveForEscapeChars = KEscapeTripletLength * 4; |
|
74 |
|
75 |
|
76 // Public Functions // |
|
77 |
|
78 /** |
|
79 * 2-Phase constructor for CRegEx objects. |
|
80 * @param aPattern regular expression pattern |
|
81 * @return a pre-compiled regular expression object ready to perform matching. |
|
82 */ |
|
83 EXPORT_C CRegEx* CRegEx::NewL(const TDesC8& aPattern) |
|
84 { |
|
85 CRegEx* self = NewLC(aPattern); |
|
86 CleanupStack::Pop(self); |
|
87 return self; |
|
88 } |
|
89 |
|
90 /** |
|
91 * 2-Phase constructor for CRegEx objects. |
|
92 * @param aPattern regular expression pattern. |
|
93 * @param aOptions options to use when compiling regular expression. |
|
94 * @return a pre-compiled regular expression object ready to perform matching. |
|
95 */ |
|
96 EXPORT_C CRegEx* CRegEx::NewL(const TDesC8& aPattern, |
|
97 const TRegExOptions& aOptions) |
|
98 { |
|
99 CRegEx* self = NewLC(aPattern, aOptions); |
|
100 CleanupStack::Pop(self); |
|
101 return self; |
|
102 } |
|
103 |
|
104 /** |
|
105 * 2-Phase constructor for CRegEx objects. Object is left on the cleanup stack. |
|
106 * @param aPattern regular expression pattern. |
|
107 * @return a pre-compiled regular expression object ready to perform matching. |
|
108 */ |
|
109 EXPORT_C CRegEx* CRegEx::NewLC(const TDesC8& aPattern) |
|
110 { |
|
111 CRegEx* self = new(ELeave) CRegEx(); |
|
112 CleanupStack::PushL(self); |
|
113 self->ConstructL(aPattern); |
|
114 return self; |
|
115 } |
|
116 |
|
117 /** |
|
118 * 2-Phase constructor for CRegEx objects. Object is left on the cleanup stack. |
|
119 * @param aPattern regular expression pattern. |
|
120 * @param aOptions options to use when compiling regular expression. |
|
121 * @return a pre-compiled regular expression object ready to perform matching. |
|
122 */ |
|
123 EXPORT_C CRegEx* CRegEx::NewLC(const TDesC8& aPattern, |
|
124 const TRegExOptions& aOptions) |
|
125 { |
|
126 CRegEx* self = new(ELeave) CRegEx(aOptions); |
|
127 CleanupStack::PushL(self); |
|
128 self->ConstructL(aPattern); |
|
129 return self; |
|
130 } |
|
131 |
|
132 EXPORT_C CRegEx* CRegEx::NewL(const TDesC16& aPattern, const TRegExOptions& aOptions) |
|
133 { |
|
134 CRegEx* self = new(ELeave) CRegEx(aOptions); |
|
135 CleanupStack::PushL(self); |
|
136 self->ConstructL(aPattern); |
|
137 CleanupStack::Pop(self); |
|
138 return self; |
|
139 } |
|
140 |
|
141 /** |
|
142 * Standard destructor to free resources. |
|
143 */ |
|
144 EXPORT_C CRegEx::~CRegEx() |
|
145 { |
|
146 delete iNoArg; |
|
147 delete iPattern; |
|
148 Cleanup(); |
|
149 } |
|
150 |
|
151 // Public matching interface // |
|
152 |
|
153 /** |
|
154 * Checks if the regular expression (RE) matches the supplied text entirely. |
|
155 * @param aText the text to match against the regular expression. |
|
156 * @return ETrue if the RE matches the supplied text entirely, EFalse otherwise. |
|
157 * @see PartialMatchL() |
|
158 */ |
|
159 EXPORT_C TBool CRegEx::FullMatchL(const TDesC8& aText) const |
|
160 { |
|
161 return FullMatchL(aText, *iNoArg, *iNoArg, *iNoArg, *iNoArg); |
|
162 } |
|
163 |
|
164 /** |
|
165 * Checks if the regular expression (RE) matches the supplied text entirely. |
|
166 * @param aText the text to match against the regular expression. |
|
167 * @param aArg1 contains the first extracted subpattern. |
|
168 * @return ETrue if the RE matches the supplied text entirely, EFalse otherwise. |
|
169 * @see PartialMatchL() |
|
170 */ |
|
171 EXPORT_C TBool CRegEx::FullMatchL(const TDesC8& aText, |
|
172 const TRegExArg& aArg1) const |
|
173 { |
|
174 return FullMatchL(aText, aArg1, *iNoArg, *iNoArg, *iNoArg); |
|
175 } |
|
176 |
|
177 /** |
|
178 * Checks if the regular expression (RE) matches the supplied text entirely. |
|
179 * @param aText the text to match against the regular expression. |
|
180 * @param aArg1 contains the first extracted subpattern. |
|
181 * @param aArg2 contains the second extracted subpattern. |
|
182 * @return ETrue if the RE matches the supplied text entirely, EFalse otherwise. |
|
183 * @see PartialMatchL() |
|
184 */ |
|
185 EXPORT_C TBool CRegEx::FullMatchL(const TDesC8& aText, |
|
186 const TRegExArg& aArg1, |
|
187 const TRegExArg& aArg2) const |
|
188 { |
|
189 return FullMatchL(aText, aArg1, aArg2, *iNoArg, *iNoArg); |
|
190 } |
|
191 |
|
192 /** |
|
193 * Checks if the regular expression (RE) matches the supplied text entirely. |
|
194 * @param aText the text to match against the regular expression. |
|
195 * @param aArg1 contains the first extracted subpattern. |
|
196 * @param aArg2 contains the second extracted subpattern. |
|
197 * @param aArg3 contains the third extracted subpattern. |
|
198 * @return ETrue if the RE matches the supplied text entirely, EFalse otherwise. |
|
199 * @see PartialMatchL() |
|
200 */ |
|
201 EXPORT_C TBool CRegEx::FullMatchL(const TDesC8& aText, |
|
202 const TRegExArg& aArg1, |
|
203 const TRegExArg& aArg2, |
|
204 const TRegExArg& aArg3) const |
|
205 { |
|
206 return FullMatchL(aText, aArg1, aArg2, aArg3, *iNoArg); |
|
207 } |
|
208 |
|
209 /** |
|
210 * Checks if the regular expression (RE) matches the supplied text entirely. |
|
211 * @param aText the text to match against the regular expression. |
|
212 * @param aArg1 contains the first extracted subpattern. |
|
213 * @param aArg2 contains the second extracted subpattern. |
|
214 * @param aArg3 contains the third extracted subpattern. |
|
215 * @param aArg4 contains the fourth extracted subpattern. |
|
216 * @return ETrue if the RE matches the supplied text entirely, EFalse otherwise. |
|
217 * @see PartialMatchL() |
|
218 */ |
|
219 EXPORT_C TBool CRegEx::FullMatchL(const TDesC8& aText, |
|
220 const TRegExArg& aArg1, |
|
221 const TRegExArg& aArg2, |
|
222 const TRegExArg& aArg3, |
|
223 const TRegExArg& aArg4) const |
|
224 { |
|
225 RPointerArray<const TRegExArg> args; |
|
226 CleanupClosePushL(args); |
|
227 args.ReserveL(KMaxArgs); |
|
228 |
|
229 if (&aArg1 != iNoArg) |
|
230 { |
|
231 args.AppendL(&aArg1); |
|
232 } |
|
233 if (&aArg2 != iNoArg) |
|
234 { |
|
235 args.AppendL(&aArg2); |
|
236 } |
|
237 if (&aArg3 != iNoArg) |
|
238 { |
|
239 args.AppendL(&aArg3); |
|
240 } |
|
241 if (&aArg4 != iNoArg) |
|
242 { |
|
243 args.AppendL(&aArg4); |
|
244 } |
|
245 |
|
246 TInt consumed = 0; |
|
247 TInt vector[KVecSize]; |
|
248 |
|
249 TBool r = DoMatchImpl(aText, EAnchorBoth, consumed, args, vector, KVecSize); |
|
250 CleanupStack::PopAndDestroy(&args); |
|
251 return r; |
|
252 } |
|
253 |
|
254 /** |
|
255 * Checks if the regular expression (RE) matches any substring of the text. |
|
256 * @param aText the text to match against the regular expression. |
|
257 * @return ETrue if the RE matches any substring of the supplied text, |
|
258 * EFalse otherwise. |
|
259 * @see FullMatchL() |
|
260 */ |
|
261 EXPORT_C TBool CRegEx::PartialMatchL(const TDesC8& aText) const |
|
262 { |
|
263 return PartialMatchL(aText, *iNoArg, *iNoArg, *iNoArg, *iNoArg); |
|
264 } |
|
265 |
|
266 /** |
|
267 * Checks if the regular expression (RE) matches any substring of the text. |
|
268 * @param aText the text to match against the regular expression. |
|
269 * @param aArg1 contains the first extracted subpattern. |
|
270 * @return ETrue if the RE matches any substring of the supplied text, |
|
271 * EFalse otherwise. |
|
272 * @see FullMatchL() |
|
273 */ |
|
274 EXPORT_C TBool CRegEx::PartialMatchL(const TDesC8& aText, |
|
275 const TRegExArg& aArg1) const |
|
276 { |
|
277 return PartialMatchL(aText, aArg1, *iNoArg, *iNoArg, *iNoArg); |
|
278 } |
|
279 |
|
280 /** |
|
281 * Checks if the regular expression (RE) matches any substring of the text. |
|
282 * @param aText the text to match against the regular expression. |
|
283 * @param aArg1 contains the first extracted subpattern. |
|
284 * @param aArg2 contains the second extracted subpattern. |
|
285 * @return ETrue if the RE matches any substring of the supplied text, |
|
286 * EFalse otherwise. |
|
287 * @see FullMatchL() |
|
288 */ |
|
289 EXPORT_C TBool CRegEx::PartialMatchL(const TDesC8& aText, |
|
290 const TRegExArg& aArg1, |
|
291 const TRegExArg& aArg2) const |
|
292 { |
|
293 return PartialMatchL(aText, aArg1, aArg2, *iNoArg, *iNoArg); |
|
294 } |
|
295 |
|
296 /** |
|
297 * Checks if the regular expression (RE) matches any substring of the text. |
|
298 * @param aText the text to match against the regular expression. |
|
299 * @param aArg1 contains the first extracted subpattern. |
|
300 * @param aArg2 contains the second extracted subpattern. |
|
301 * @param aArg3 contains the third extracted subpattern. |
|
302 * @return ETrue if the RE matches any substring of the supplied text, |
|
303 * EFalse otherwise. |
|
304 * @see FullMatchL() |
|
305 */ |
|
306 EXPORT_C TBool CRegEx::PartialMatchL(const TDesC8& aText, |
|
307 const TRegExArg& aArg1, |
|
308 const TRegExArg& aArg2, |
|
309 const TRegExArg& aArg3) const |
|
310 { |
|
311 return PartialMatchL(aText, aArg1, aArg2, aArg3, *iNoArg); |
|
312 } |
|
313 |
|
314 /** |
|
315 * Checks if the regular expression (RE) matches any substring of the text. |
|
316 * @param aText the text to match against the regular expression. |
|
317 * @param aArg1 contains the first extracted subpattern. |
|
318 * @param aArg2 contains the second extracted subpattern. |
|
319 * @param aArg3 contains the third extracted subpattern. |
|
320 * @param aArg4 contains the fourth extracted subpattern. |
|
321 * @return ETrue if the RE matches any substring of the supplied text, |
|
322 * EFalse otherwise. |
|
323 * @see FullMatchL() |
|
324 */ |
|
325 EXPORT_C TBool CRegEx::PartialMatchL(const TDesC8& aText, |
|
326 const TRegExArg& aArg1, |
|
327 const TRegExArg& aArg2, |
|
328 const TRegExArg& aArg3, |
|
329 const TRegExArg& aArg4) const |
|
330 { |
|
331 RPointerArray<const TRegExArg> args; |
|
332 CleanupClosePushL(args); |
|
333 args.ReserveL(KMaxArgs); |
|
334 |
|
335 if (&aArg1 != iNoArg) |
|
336 { |
|
337 args.AppendL(&aArg1); |
|
338 } |
|
339 if (&aArg2 != iNoArg) |
|
340 { |
|
341 args.AppendL(&aArg2); |
|
342 } |
|
343 if (&aArg3 != iNoArg) |
|
344 { |
|
345 args.AppendL(&aArg3); |
|
346 } |
|
347 if (&aArg4 != iNoArg) |
|
348 { |
|
349 args.AppendL(&aArg4); |
|
350 } |
|
351 |
|
352 TInt consumed = 0; |
|
353 TInt vector[KVecSize]; |
|
354 |
|
355 TBool r = DoMatchImpl(aText, EUnanchored, consumed, args, vector, |
|
356 KVecSize); |
|
357 CleanupStack::PopAndDestroy(&args); |
|
358 return r; |
|
359 } |
|
360 |
|
361 |
|
362 /** |
|
363 * General function to perform a regular expression (RE) match on a substring |
|
364 * of the text. |
|
365 * @param aText the text to match against the regular expression. |
|
366 * @param aAnchor the type of match to perform |
|
367 * @return ETrue if the RE matches a substring of the supplied text, |
|
368 * EFalse otherwise. |
|
369 */ |
|
370 EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText, |
|
371 TAnchor aAnchor, |
|
372 TInt& aConsumed) const |
|
373 { |
|
374 return DoMatchL(aText, aAnchor, aConsumed, *iNoArg, *iNoArg, *iNoArg, |
|
375 *iNoArg); |
|
376 } |
|
377 |
|
378 /** |
|
379 * General function to perform a regular expression (RE) match on a substring |
|
380 * of the text. |
|
381 * @param aText the text to match against the regular expression. |
|
382 * @param aAnchor the type of match to perform |
|
383 * @param aArg1 contains the first extracted subpattern. |
|
384 * @return ETrue if the RE matches a substring of the supplied text, |
|
385 * EFalse otherwise. |
|
386 */ |
|
387 EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText, |
|
388 TAnchor aAnchor, |
|
389 TInt& aConsumed, |
|
390 const TRegExArg& aArg1) const |
|
391 { |
|
392 return DoMatchL(aText, aAnchor, aConsumed, aArg1, *iNoArg, *iNoArg, |
|
393 *iNoArg); |
|
394 } |
|
395 |
|
396 /** |
|
397 * General function to perform a regular expression (RE) match on a substring |
|
398 * of the text. |
|
399 * @param aText the text to match against the regular expression. |
|
400 * @param aAnchor the type of match to perform |
|
401 * @param aArg1 contains the first extracted subpattern. |
|
402 * @param aArg2 contains the second extracted subpattern. |
|
403 * @return ETrue if the RE matches a substring of the supplied text, |
|
404 * EFalse otherwise. |
|
405 */ |
|
406 EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText, |
|
407 TAnchor aAnchor, |
|
408 TInt& aConsumed, |
|
409 const TRegExArg& aArg1, |
|
410 const TRegExArg& aArg2) const |
|
411 { |
|
412 return DoMatchL(aText, aAnchor, aConsumed, aArg1, aArg2, *iNoArg, *iNoArg); |
|
413 } |
|
414 |
|
415 /** |
|
416 * General function to perform a regular expression (RE) match on a substring |
|
417 * of the text. |
|
418 * @param aText the text to match against the regular expression. |
|
419 * @param aAnchor the type of match to perform |
|
420 * @param aArg1 contains the first extracted subpattern. |
|
421 * @param aArg2 contains the second extracted subpattern. |
|
422 * @param aArg3 contains the third extracted subpattern. |
|
423 * @return ETrue if the RE matches a substring of the supplied text, |
|
424 * EFalse otherwise. |
|
425 */ |
|
426 EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText, |
|
427 TAnchor aAnchor, |
|
428 TInt& aConsumed, |
|
429 const TRegExArg& aArg1, |
|
430 const TRegExArg& aArg2, |
|
431 const TRegExArg& aArg3) const |
|
432 { |
|
433 return DoMatchL(aText, aAnchor, aConsumed, aArg1, aArg2, aArg3, *iNoArg); |
|
434 } |
|
435 |
|
436 /** |
|
437 * General function to perform a regular expression (RE) match on a substring |
|
438 * of the text. |
|
439 * @param aText the text to match against the regular expression. |
|
440 * @param aAnchor the type of match to perform |
|
441 * @param aArg1 contains the first extracted subpattern. |
|
442 * @param aArg2 contains the second extracted subpattern. |
|
443 * @param aArg3 contains the third extracted subpattern. |
|
444 * @param aArg4 contains the fourth extracted subpattern. |
|
445 * @return ETrue if the RE matches a substring of the supplied text, |
|
446 * EFalse otherwise. |
|
447 */ |
|
448 EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText, |
|
449 TAnchor aAnchor, |
|
450 TInt& aConsumed, |
|
451 const TRegExArg& aArg1, |
|
452 const TRegExArg& aArg2, |
|
453 const TRegExArg& aArg3, |
|
454 const TRegExArg& aArg4) const |
|
455 { |
|
456 RPointerArray<const TRegExArg> args; |
|
457 CleanupClosePushL(args); |
|
458 args.ReserveL(KMaxArgs); |
|
459 |
|
460 if (&aArg1 != iNoArg) |
|
461 { |
|
462 args.AppendL(&aArg1); |
|
463 } |
|
464 if (&aArg2 != iNoArg) |
|
465 { |
|
466 args.AppendL(&aArg2); |
|
467 } |
|
468 if (&aArg3 != iNoArg) |
|
469 { |
|
470 args.AppendL(&aArg3); |
|
471 } |
|
472 if (&aArg4 != iNoArg) |
|
473 { |
|
474 args.AppendL(&aArg4); |
|
475 } |
|
476 |
|
477 TInt r = DoMatchL(aText, aAnchor, aConsumed, args); |
|
478 CleanupStack::PopAndDestroy(&args); |
|
479 return r; |
|
480 } |
|
481 |
|
482 /** |
|
483 * General function to perform a regular expression (RE) match on a substring |
|
484 * of the text. |
|
485 * @param aText the text to match against the regular expression. |
|
486 * @param aAnchor the type of match to perform |
|
487 * @param array of arguments that will contain the extracted subpatterns. |
|
488 * @return ETrue if the RE matches a substring of the supplied text, |
|
489 * EFalse otherwise. |
|
490 */ |
|
491 EXPORT_C TBool CRegEx::DoMatchL(const TDesC8& aText, |
|
492 TAnchor aAnchor, |
|
493 TInt& aConsumed, |
|
494 const RPointerArray<const TRegExArg>& aArgs) const |
|
495 { |
|
496 const TInt vectorSize = (1 + aArgs.Count()) * KPcreWorkspaceMultiplier; |
|
497 TInt* vector = new( ELeave ) TInt[ vectorSize ]; |
|
498 CleanupArrayDeletePushL( vector ); |
|
499 |
|
500 TBool r = DoMatchImpl(aText, aAnchor, aConsumed, aArgs, vector, |
|
501 vectorSize); |
|
502 CleanupStack::PopAndDestroy(vector); |
|
503 return r; |
|
504 } |
|
505 |
|
506 /** |
|
507 * Allows text to be scanned incrementally. Call this function repeatidly to |
|
508 * match regular expressions at the front of a string and skip over them as |
|
509 * they match. |
|
510 * @param aText the text to match against the regular expression. |
|
511 * @return ETrue if the RE matched and a substring was consumed, |
|
512 * EFalse otherwise. |
|
513 * @see FindAndConsumeL() |
|
514 */ |
|
515 EXPORT_C TBool CRegEx::ConsumeL(TDes8& aText) const |
|
516 { |
|
517 return ConsumeL(aText, *iNoArg, *iNoArg, *iNoArg, *iNoArg); |
|
518 } |
|
519 |
|
520 /** |
|
521 * Allows text to be scanned incrementally. Repeatidly call this function to |
|
522 * match regular expressions at the front of a string and skip over them as |
|
523 * they match. |
|
524 * @param aText the text to match against the regular expression. |
|
525 * @param aArg1 contains the first extracted subpattern. |
|
526 * @return ETrue if the RE matched and a substring was consumed, |
|
527 * EFalse otherwise. |
|
528 * @see FindAndConsumeL() |
|
529 */ |
|
530 EXPORT_C TBool CRegEx::ConsumeL(TDes8& aText, |
|
531 const TRegExArg& aArg1) const |
|
532 { |
|
533 return ConsumeL(aText, aArg1, *iNoArg, *iNoArg, *iNoArg); |
|
534 } |
|
535 |
|
536 /** |
|
537 * Allows text to be scanned incrementally. Repeatidly call this function to |
|
538 * match regular expressions at the front of a string and skip over them as |
|
539 * they match. |
|
540 * @param aText the text to match against the regular expression. |
|
541 * @param aArg1 contains the first extracted subpattern. |
|
542 * @param aArg2 contains the second extracted subpattern. |
|
543 * @return ETrue if the RE matched and a substring was consumed, |
|
544 * EFalse otherwise. |
|
545 * @see FindAndConsumeL() |
|
546 */ |
|
547 EXPORT_C TBool CRegEx::ConsumeL(TDes8& aText, |
|
548 const TRegExArg& aArg1, |
|
549 const TRegExArg& aArg2) const |
|
550 { |
|
551 return ConsumeL(aText, aArg1, aArg2, *iNoArg, *iNoArg); |
|
552 } |
|
553 |
|
554 /** |
|
555 * Allows text to be scanned incrementally. Repeatidly call this function to |
|
556 * match regular expressions at the front of a string and skip over them as |
|
557 * they match. |
|
558 * @param aText the text to match against the regular expression. |
|
559 * @param aArg1 contains the first extracted subpattern. |
|
560 * @param aArg2 contains the second extracted subpattern. |
|
561 * @param aArg3 contains the third extracted subpattern. |
|
562 * @return ETrue if the RE matched and a substring was consumed, |
|
563 * EFalse otherwise. |
|
564 * @see FindAndConsumeL() |
|
565 */ |
|
566 EXPORT_C TBool CRegEx::ConsumeL(TDes8& aText, |
|
567 const TRegExArg& aArg1, |
|
568 const TRegExArg& aArg2, |
|
569 const TRegExArg& aArg3) const |
|
570 { |
|
571 return ConsumeL(aText, aArg1, aArg2, aArg3, *iNoArg); |
|
572 } |
|
573 |
|
574 /** |
|
575 * Allows text to be scanned incrementally. Repeatidly call this function to |
|
576 * match regular expressions at the front of a string and skip over them as |
|
577 * they match. |
|
578 * @param aText the text to match against the regular expression. |
|
579 * @param aArg1 contains the first extracted subpattern. |
|
580 * @param aArg2 contains the second extracted subpattern. |
|
581 * @param aArg3 contains the third extracted subpattern. |
|
582 * @param aArg4 contains the fourth extracted subpattern. |
|
583 * @return ETrue if the RE matched and a substring was consumed, |
|
584 * EFalse otherwise. |
|
585 * @see FindAndConsumeL() |
|
586 */ |
|
587 EXPORT_C TBool CRegEx::ConsumeL(TDes8& aText, |
|
588 const TRegExArg& aArg1, |
|
589 const TRegExArg& aArg2, |
|
590 const TRegExArg& aArg3, |
|
591 const TRegExArg& aArg4) const |
|
592 { |
|
593 RPointerArray<const TRegExArg> args; |
|
594 CleanupClosePushL(args); |
|
595 args.ReserveL(KMaxArgs); |
|
596 |
|
597 if (&aArg1 != iNoArg) |
|
598 { |
|
599 args.AppendL(&aArg1); |
|
600 } |
|
601 if (&aArg2 != iNoArg) |
|
602 { |
|
603 args.AppendL(&aArg2); |
|
604 } |
|
605 if (&aArg3 != iNoArg) |
|
606 { |
|
607 args.AppendL(&aArg3); |
|
608 } |
|
609 if (&aArg4 != iNoArg) |
|
610 { |
|
611 args.AppendL(&aArg4); |
|
612 } |
|
613 |
|
614 TInt consumed = 0; |
|
615 TInt vector[KVecSize]; |
|
616 |
|
617 TBool r = DoMatchImpl(aText, EAnchorStart, consumed, args, vector, |
|
618 KVecSize); |
|
619 if (r) |
|
620 { |
|
621 // Remove prefix |
|
622 aText.Delete(0, consumed); |
|
623 } |
|
624 CleanupStack::PopAndDestroy(&args); |
|
625 return r; |
|
626 } |
|
627 |
|
628 /** |
|
629 * Allows text to be scanned incrementally. Repeatidly call this function to |
|
630 * match regular expression in a string and extract them as they match. |
|
631 * Unlike ConsumeL, your match is not anchored to the start of the string. |
|
632 * @param aText the text to match against the regular expression. |
|
633 * @return ETrue if the RE matched and a substring was consumed, |
|
634 * EFalse otherwise. |
|
635 * @see ConsumeL() |
|
636 */ |
|
637 EXPORT_C TBool CRegEx::FindAndConsumeL(TDes8& aText) const |
|
638 { |
|
639 return FindAndConsumeL(aText, *iNoArg, *iNoArg, *iNoArg, *iNoArg); |
|
640 } |
|
641 |
|
642 /** |
|
643 * Allows text to be scanned incrementally. Repeatidly call this function to |
|
644 * match regular expression in a string and extract them as they match. |
|
645 * Unlike ConsumeL, your match is not anchored to the start of the string. |
|
646 * @param aText the text to match against the regular expression. |
|
647 * @param aArg1 contains the first extracted subpattern. |
|
648 * @return ETrue if the RE matched and a substring was consumed, |
|
649 * EFalse otherwise. |
|
650 * @see ConsumeL() |
|
651 */ |
|
652 EXPORT_C TBool CRegEx::FindAndConsumeL(TDes8& aText, |
|
653 const TRegExArg& aArg1) const |
|
654 { |
|
655 return FindAndConsumeL(aText, aArg1, *iNoArg, *iNoArg, *iNoArg); |
|
656 } |
|
657 |
|
658 /** |
|
659 * Allows text to be scanned incrementally. Repeatidly call this function to |
|
660 * match regular expression in a string and extract them as they match. |
|
661 * Unlike ConsumeL, your match is not anchored to the start of the string. |
|
662 * @param aText the text to match against the regular expression. |
|
663 * @param aArg1 contains the first extracted subpattern. |
|
664 * @param aArg2 contains the second extracted subpattern. |
|
665 * @return ETrue if the RE matched and a substring was consumed, |
|
666 * EFalse otherwise. |
|
667 * @see ConsumeL() |
|
668 */ |
|
669 EXPORT_C TBool CRegEx::FindAndConsumeL(TDes8& aText, |
|
670 const TRegExArg& aArg1, |
|
671 const TRegExArg& aArg2) const |
|
672 { |
|
673 return FindAndConsumeL(aText, aArg1, aArg2, *iNoArg, *iNoArg); |
|
674 } |
|
675 |
|
676 /** |
|
677 * Allows text to be scanned incrementally. Repeatidly call this function to |
|
678 * match regular expression in a string and extract them as they match. |
|
679 * Unlike ConsumeL, your match is not anchored to the start of the string. |
|
680 * @param aText the text to match against the regular expression. |
|
681 * @param aArg1 contains the first extracted subpattern. |
|
682 * @param aArg2 contains the second extracted subpattern. |
|
683 * @param aArg3 contains the third extracted subpattern. |
|
684 * @return ETrue if the RE matched and a substring was consumed, |
|
685 * EFalse otherwise. |
|
686 * @see ConsumeL() |
|
687 */ |
|
688 EXPORT_C TBool CRegEx::FindAndConsumeL(TDes8& aText, |
|
689 const TRegExArg& aArg1, |
|
690 const TRegExArg& aArg2, |
|
691 const TRegExArg& aArg3) const |
|
692 { |
|
693 return FindAndConsumeL(aText, aArg1, aArg2, aArg3, *iNoArg); |
|
694 } |
|
695 |
|
696 /** |
|
697 * Allows text to be scanned incrementally. Repeatidly call this function to |
|
698 * match regular expression in a string and extract them as they match. |
|
699 * Unlike ConsumeL, your match is not anchored to the start of the string. |
|
700 * @param aText the text to match against the regular expression. |
|
701 * @param aArg1 contains the first extracted subpattern. |
|
702 * @param aArg2 contains the second extracted subpattern. |
|
703 * @param aArg3 contains the third extracted subpattern. |
|
704 * @param aArg4 contains the fourth extracted subpattern. |
|
705 * @return ETrue if the RE matched and a substring was consumed, |
|
706 * EFalse otherwise. |
|
707 * @see ConsumeL() |
|
708 */ |
|
709 EXPORT_C TBool CRegEx::FindAndConsumeL(TDes8& aText, |
|
710 const TRegExArg& aArg1, |
|
711 const TRegExArg& aArg2, |
|
712 const TRegExArg& aArg3, |
|
713 const TRegExArg& aArg4) const |
|
714 { |
|
715 RPointerArray<const TRegExArg> args; |
|
716 CleanupClosePushL(args); |
|
717 args.ReserveL(KMaxArgs); |
|
718 |
|
719 if (&aArg1 != iNoArg) |
|
720 { |
|
721 args.AppendL(&aArg1); |
|
722 } |
|
723 if (&aArg2 != iNoArg) |
|
724 { |
|
725 args.AppendL(&aArg2); |
|
726 } |
|
727 if (&aArg3 != iNoArg) |
|
728 { |
|
729 args.AppendL(&aArg3); |
|
730 } |
|
731 if (&aArg4 != iNoArg) |
|
732 { |
|
733 args.AppendL(&aArg4); |
|
734 } |
|
735 |
|
736 TInt consumed = 0; |
|
737 TInt vector[KVecSize]; |
|
738 |
|
739 TBool r = DoMatchImpl(aText, EUnanchored, consumed, args, vector, KVecSize); |
|
740 CleanupStack::PopAndDestroy(&args); |
|
741 |
|
742 if (r) |
|
743 { |
|
744 // Remove prefix |
|
745 aText.Delete(0, consumed); |
|
746 } |
|
747 |
|
748 return r; |
|
749 } |
|
750 |
|
751 |
|
752 /** |
|
753 * The first match of the regular expression in aString is |
|
754 * replaced by aRewrite. |
|
755 * |
|
756 * Within aRewrite, backslash-escaped digits (\1 to \9) can be |
|
757 * used to insert text matching a corresponding parenthesized group from |
|
758 * the pattern. \0 in aRewrite refers to the entire matching text. |
|
759 * @param aRewrite the text to replace the matching substring with. |
|
760 * @param aText the text to match against the regular expression. |
|
761 * @return ETrue if match occurred and replace was succsessful, |
|
762 * EFalse otherwise. |
|
763 * @see GlobalReplaceL() |
|
764 */ |
|
765 EXPORT_C TBool CRegEx::ReplaceL(const TDesC8& aRewrite, TDes8& aString) const |
|
766 { |
|
767 TInt r = EFalse; |
|
768 TInt vector[KVecSize]; |
|
769 |
|
770 TInt matches = TryMatch(aString, 0, EUnanchored, vector, KVecSize); |
|
771 if (matches == 0) |
|
772 { |
|
773 return r; |
|
774 } |
|
775 |
|
776 HBufC8* s = HBufC8::NewLC(aString.MaxLength()); |
|
777 TPtr8 pS = s->Des(); |
|
778 if (!Rewrite(pS, aRewrite, aString, vector, KVecSize, matches)) |
|
779 { |
|
780 CleanupStack::PopAndDestroy(s); |
|
781 return r; |
|
782 } |
|
783 |
|
784 __ASSERT_DEBUG(vector[0] >= 0, Panic(EInvalidMatchResults)); |
|
785 __ASSERT_DEBUG(vector[1] >= 0, Panic(EInvalidMatchResults)); |
|
786 |
|
787 TInt replacementLength = vector[1] - vector[0]; |
|
788 |
|
789 if((aString.Length() + s->Length() - replacementLength) <= aString.MaxLength()) |
|
790 { |
|
791 aString.Replace(vector[0], replacementLength, *s); |
|
792 } |
|
793 else |
|
794 { |
|
795 CleanupStack::PopAndDestroy(s); |
|
796 iErrorCode = KErrRegExOutputTooBig; |
|
797 return r; |
|
798 } |
|
799 |
|
800 CleanupStack::PopAndDestroy(s); |
|
801 r = ETrue; |
|
802 return r; |
|
803 } |
|
804 |
|
805 /** |
|
806 * All matches of the regular expression in aString are |
|
807 * replaced by aRewrite. |
|
808 * |
|
809 * Within the rewrite string, backslash-escaped digits (\1 to \9) can be |
|
810 * used to insert text matching a corresponding parenthesized group from |
|
811 * the pattern. \0 in "aRewrite" refers to the entire matching text. |
|
812 * @param aRewrite the text to replace the matching substring with. |
|
813 * @param aText the text to match against the regular expression. |
|
814 * @return ETrue if matches occurred and replace was succsessful, |
|
815 * EFalse otherwise. |
|
816 * @see ReplaceL() |
|
817 */ |
|
818 EXPORT_C TInt CRegEx::GlobalReplaceL(const TDesC8& aRewrite, |
|
819 TDes8& aString) const |
|
820 { |
|
821 TInt count = 0; |
|
822 TInt vector[KVecSize]; |
|
823 |
|
824 HBufC8* out = HBufC8::NewLC(aString.MaxLength()); |
|
825 TPtr8 pOut = out->Des(); |
|
826 |
|
827 TInt start = 0; |
|
828 TInt lastend = -1; |
|
829 |
|
830 while (start <= aString.Length()) |
|
831 { |
|
832 TInt matches = TryMatch(aString, start, EUnanchored, vector, KVecSize); |
|
833 if (matches <= 0) |
|
834 { |
|
835 break; |
|
836 } |
|
837 TInt matchstart = vector[0]; |
|
838 TInt matchend = vector[1]; |
|
839 |
|
840 __ASSERT_DEBUG(matchstart >= start, EInvalidMatchResults); |
|
841 __ASSERT_DEBUG(matchend >= matchstart, EInvalidMatchResults); |
|
842 if (matchstart == matchend && matchstart == lastend) |
|
843 { |
|
844 // advance one character if we matched an empty string at the same |
|
845 // place as the last match occurred |
|
846 matchend = start + 1; |
|
847 // If the current char is CR and we're in CRLF mode, skip LF too. |
|
848 // Note it's better to call pcre_fullinfo() than to examine |
|
849 // all_options(), since options_ could have changed bewteen |
|
850 // compile-time and now, but this is simpler and safe enough. |
|
851 // Modified by PH to add ANY and ANYCRLF. |
|
852 if ((start + 1 < aString.Length()) && |
|
853 aString[start] == '\r' && aString[start+1] == '\n' && |
|
854 (NewlineMode(iOptions.AllOptions()) == PCRE_NEWLINE_CRLF || |
|
855 NewlineMode(iOptions.AllOptions()) == PCRE_NEWLINE_ANY || |
|
856 NewlineMode(iOptions.AllOptions()) == PCRE_NEWLINE_ANYCRLF)) |
|
857 { |
|
858 matchend++; |
|
859 } |
|
860 // We also need to advance more than one char if we're in utf8 mode. |
|
861 #ifdef SUPPORT_UTF8 |
|
862 if (iOptions.Utf8()) |
|
863 { |
|
864 while ((matchend < aString.Length()) |
|
865 && (aString[matchend] & 0xc0) == 0x80) |
|
866 { |
|
867 matchend++; |
|
868 } |
|
869 } |
|
870 #endif |
|
871 if (matchend <= aString.Length()) |
|
872 { |
|
873 if(pOut.Length() + (matchend - start) <= pOut.MaxLength()) |
|
874 { |
|
875 pOut.Append(aString.Mid(start, matchend - start)); |
|
876 } |
|
877 else |
|
878 { |
|
879 iErrorCode = KErrRegExOutputTooBig; |
|
880 return KErrRegExOutputTooBig; |
|
881 } |
|
882 } |
|
883 start = matchend; |
|
884 } |
|
885 else |
|
886 { |
|
887 if(pOut.Length() + (matchstart - start) <= pOut.MaxLength()) |
|
888 { |
|
889 pOut.Append(aString.Mid(start, matchstart - start)); |
|
890 } |
|
891 else |
|
892 { |
|
893 iErrorCode = KErrRegExOutputTooBig; |
|
894 return KErrRegExOutputTooBig; |
|
895 } |
|
896 Rewrite(pOut, aRewrite, aString, vector, KVecSize, matches); |
|
897 start = matchend; |
|
898 lastend = matchend; |
|
899 count++; |
|
900 } |
|
901 } |
|
902 |
|
903 if (count == 0) |
|
904 { |
|
905 return count; |
|
906 } |
|
907 |
|
908 if (start < aString.Length()) |
|
909 { |
|
910 if((aString.Length() - start) + pOut.Length() <= pOut.MaxLength()) |
|
911 { |
|
912 pOut.Append(aString.Mid(start, aString.Length() - start)); |
|
913 } |
|
914 else |
|
915 { |
|
916 iErrorCode = KErrRegExOutputTooBig; |
|
917 return KErrRegExOutputTooBig; |
|
918 } |
|
919 } |
|
920 |
|
921 aString.Swap(pOut); |
|
922 |
|
923 CleanupStack::PopAndDestroy(out); |
|
924 |
|
925 return count; |
|
926 } |
|
927 |
|
928 /** |
|
929 * The first match of the regular expression in the supplied string is |
|
930 * replaced by another supplied string and copied into aOut with substitutions. |
|
931 * The non-matching portions of aString are ignored. |
|
932 * |
|
933 * Within the rewrite string, backslash-escaped digits (\1 to \9) can be |
|
934 * used to insert text matching a corresponding parenthesized group from |
|
935 * the pattern. \0 in "aRewrite" refers to the entire matching text. |
|
936 * @param aRewrite the text to replace the matching substring with. |
|
937 * @param aText the text to match against the regular expression. |
|
938 * @return ETrue if match occurred and extraction was succsessful, |
|
939 * EFalse otherwise. |
|
940 * @see ReplaceL() |
|
941 */ |
|
942 EXPORT_C TBool CRegEx::ExtractL(const TDesC8& aRewrite, |
|
943 const TDesC8& aText, TDes8& aOut) const |
|
944 { |
|
945 TInt vector[KVecSize]; |
|
946 |
|
947 TInt matches = TryMatch(aText, 0, EUnanchored, vector, KVecSize); |
|
948 if (matches == 0) |
|
949 { |
|
950 iErrorCode = KErrRegExZeroMatches; |
|
951 return EFalse; |
|
952 } |
|
953 aOut.Zero(); |
|
954 |
|
955 TBool r = Rewrite(aOut, aRewrite, aText, vector, KVecSize, matches); |
|
956 return r; |
|
957 } |
|
958 |
|
959 /** |
|
960 * Returns EPcreNewlineAnyCrLf, EPcreNewlineAny, EPcreNewlineCrLf, |
|
961 * EPcreNewlineLf or EPcreNewlineCr |
|
962 * Note that EPcreNewlineCrLf is defined to be EPcreNewlineCr | EPcreNewlineLf. |
|
963 * @param aOptions |
|
964 * @return |
|
965 */ |
|
966 |
|
967 EXPORT_C TInt CRegEx::NewlineMode(TInt aOptions) |
|
968 { |
|
969 // TODO: if we can make it threadsafe, cache this var |
|
970 TInt newlineMode = 0; |
|
971 /* if (newlineMode) return newlineMode; */ // do this once it's cached |
|
972 |
|
973 if (aOptions & (EPcreNewlineCrLf|EPcreNewlineCr|EPcreNewlineLf| |
|
974 EPcreNewlineAny|EPcreNewlineAnyCrLf)) |
|
975 { |
|
976 newlineMode = (aOptions & |
|
977 (EPcreNewlineCrLf|EPcreNewlineCr|EPcreNewlineLf| |
|
978 EPcreNewlineAny|EPcreNewlineAnyCrLf)); |
|
979 } |
|
980 else |
|
981 { |
|
982 TInt newline; |
|
983 pcre_config(PCRE_CONFIG_NEWLINE, &newline); |
|
984 |
|
985 switch(newline) |
|
986 { |
|
987 case KNewLineAnyCrLf: |
|
988 { |
|
989 newlineMode = EPcreNewlineAnyCrLf; |
|
990 break; |
|
991 } |
|
992 case KNewLineAny: |
|
993 { |
|
994 newlineMode = EPcreNewlineAny; |
|
995 break; |
|
996 } |
|
997 case KNewLineLf: |
|
998 { |
|
999 newlineMode = EPcreNewlineLf; |
|
1000 break; |
|
1001 } |
|
1002 case KNewLineCr: |
|
1003 { |
|
1004 newlineMode = EPcreNewlineCr; |
|
1005 break; |
|
1006 } |
|
1007 case KNewLineCrLf: |
|
1008 { |
|
1009 newlineMode = EPcreNewlineCrLf; |
|
1010 break; |
|
1011 } |
|
1012 default: |
|
1013 { |
|
1014 __ASSERT_DEBUG(EFalse, EUnexpectedRetValFromPcre); |
|
1015 } |
|
1016 } |
|
1017 } |
|
1018 return newlineMode; |
|
1019 } |
|
1020 |
|
1021 /** |
|
1022 * Escapes all potentially meaningful regular expression (RE) characters in |
|
1023 * aUnquoted. The returned string, used as a regular expression, |
|
1024 * will exactly match the original string. For example, |
|
1025 * 1.5-2.0? |
|
1026 * may become: |
|
1027 * 1\.5\-2\.0\? |
|
1028 * Note QuoteMeta behaves the same as perl's QuoteMeta function, |
|
1029 * *except* that it escapes the NUL character (\0) as backslash + 0, |
|
1030 * rather than backslash + NUL. |
|
1031 * @param aUnquoted unescaped string. |
|
1032 * @return string with all meaningful RE characters escaped. |
|
1033 */ |
|
1034 EXPORT_C HBufC8* CRegEx::QuoteMetaL(const TDesC8& aUnquoted) |
|
1035 { |
|
1036 HBufC8* result = HBufC8::NewLC(aUnquoted.Length() + KReserveForEscapeChars); |
|
1037 TPtr8 pResult = result->Des(); |
|
1038 |
|
1039 _LIT8(KEscapeNull, "\\0"); |
|
1040 _LIT8(KEscapeOther, "\\%c"); |
|
1041 |
|
1042 // Escape any ascii character not in [A-Za-z_0-9]. |
|
1043 // |
|
1044 // Note that it's legal to escape a character even if it has no |
|
1045 // special meaning in a regular expression -- so this function does |
|
1046 // that. (This also makes it identical to the perl function of the |
|
1047 // same name; see `perldoc -f quotemeta`.) The one exception is |
|
1048 // escaping NUL: rather than doing backslash + NUL, like perl does, |
|
1049 // we do '\0', because pcre itself doesn't take embedded NUL chars. |
|
1050 for (TInt ii = 0; ii < aUnquoted.Length(); ++ii) |
|
1051 { |
|
1052 |
|
1053 // Always make sure we have enough room to escape at least one character |
|
1054 if(pResult.MaxLength() <= pResult.Length() + KEscapeTripletLength) |
|
1055 { |
|
1056 result = result->ReAllocL(pResult.Length() + KReserveForEscapeChars); |
|
1057 CleanupStack::Pop(); |
|
1058 CleanupStack::PushL(result); |
|
1059 pResult.Set(result->Des()); |
|
1060 } |
|
1061 |
|
1062 if (aUnquoted[ii] == TChar('\0')) |
|
1063 { |
|
1064 pResult.Append(KEscapeNull()); |
|
1065 } |
|
1066 else if ((aUnquoted[ii] < TChar('a') || aUnquoted[ii] > TChar('z')) && |
|
1067 (aUnquoted[ii] < TChar('A') || aUnquoted[ii] > TChar('Z')) && |
|
1068 (aUnquoted[ii] < TChar('0') || aUnquoted[ii] > TChar('9')) && |
|
1069 aUnquoted[ii] != TChar('_') && |
|
1070 // If this is the part of a UTF8 or Latin1 character, we need |
|
1071 // to copy this byte without escaping. Experimentally this is |
|
1072 // what works correctly with the regexp library. |
|
1073 !(aUnquoted[ii] & TChar(0x80))) |
|
1074 { |
|
1075 pResult.AppendFormat(KEscapeOther, aUnquoted[ii]); |
|
1076 } |
|
1077 else |
|
1078 { |
|
1079 pResult.Append(aUnquoted[ii]); |
|
1080 } |
|
1081 } |
|
1082 CleanupStack::Pop(result); |
|
1083 return result; |
|
1084 } |
|
1085 |
|
1086 /** |
|
1087 * Returns the number of capturing subpatterns, or -1 if the |
|
1088 * regular expressions wasn't valid on construction. |
|
1089 * @return the number of capturing subpatterns or or -1 if the regular |
|
1090 * expressions wasn't valid on construction. |
|
1091 */ |
|
1092 EXPORT_C TInt CRegEx::NumberOfCapturingGroups() const |
|
1093 { |
|
1094 if (iRePartial == NULL) return KErrNotFound; |
|
1095 |
|
1096 TInt result; |
|
1097 TInt pcreRetVal = pcre_fullinfo(iRePartial, // The regular expression object |
|
1098 iExtraPartial, // Study data |
|
1099 PCRE_INFO_CAPTURECOUNT, |
|
1100 &result); |
|
1101 __ASSERT_DEBUG(pcreRetVal == 0, Panic(EUnexpectedRetValFromPcre)); |
|
1102 return result; |
|
1103 } |
|
1104 |
|
1105 /** |
|
1106 * Analyzes a regular expression (RE) pattern further. This is especially useful |
|
1107 * if an RE is going to be used several times to reduce the time taken for |
|
1108 * matching. |
|
1109 * |
|
1110 * "At present, studying a pattern is useful only for non-anchored patterns that |
|
1111 * do not have a single fixed starting character." |
|
1112 */ |
|
1113 EXPORT_C void CRegEx::Study() |
|
1114 { |
|
1115 // "At present, studying a pattern is useful only for non-anchored |
|
1116 // patterns that do not have a single fixed starting character." |
|
1117 if(iRePartial) |
|
1118 { |
|
1119 const char* compileError; // ignored |
|
1120 iExtraPartial = pcre_study( |
|
1121 iRePartial, /* result of pcre_compile() */ |
|
1122 0, /* no options exist */ |
|
1123 &compileError); /* set to NULL or points to a message */ |
|
1124 } |
|
1125 } |
|
1126 |
|
1127 |
|
1128 // Private Functions // |
|
1129 |
|
1130 /** |
|
1131 * Standard constructor |
|
1132 */ |
|
1133 CRegEx::CRegEx() |
|
1134 { |
|
1135 } |
|
1136 |
|
1137 |
|
1138 /** |
|
1139 * Standard constructor |
|
1140 * @param aOptions options used when compiling regular expression. |
|
1141 */ |
|
1142 CRegEx::CRegEx(const TRegExOptions& aOptions) |
|
1143 : iOptions(aOptions) |
|
1144 { |
|
1145 } |
|
1146 |
|
1147 /** |
|
1148 * Second phase constructor. |
|
1149 */ |
|
1150 void CRegEx::ConstructL(const TDesC8& aPattern) |
|
1151 { |
|
1152 iPattern = HBufC8::NewL(aPattern.Length() + 1); // Leave room for \0 |
|
1153 TPtr8 pPattern = iPattern->Des(); |
|
1154 pPattern.Copy(aPattern); |
|
1155 pPattern.ZeroTerminate(); |
|
1156 CommonConstructL(); |
|
1157 } |
|
1158 |
|
1159 void CRegEx::CommonConstructL() |
|
1160 { |
|
1161 // The default value for an argument, to indicate no arg was passed in |
|
1162 iNoArg = new(ELeave) TRegExArg((TAny*)NULL); |
|
1163 |
|
1164 // Compile patterns used for partial and full matches. |
|
1165 iReFull = NULL; |
|
1166 iRePartial = NULL; |
|
1167 |
|
1168 iRePartial = CompileL(EUnanchored); |
|
1169 if(iRePartial) |
|
1170 { |
|
1171 iReFull = CompileL(EAnchorBoth); |
|
1172 } |
|
1173 User::LeaveIfError(iErrorCode); |
|
1174 } |
|
1175 |
|
1176 void CRegEx::ConstructL(const TDesC16& aPattern) |
|
1177 { |
|
1178 LtkUtils::RLtkBuf8 narrowBuf; |
|
1179 narrowBuf.CreateLC(aPattern.Length() + 1); |
|
1180 if (iOptions.Utf8()) |
|
1181 { |
|
1182 narrowBuf.CopyAsUtf8L(aPattern); |
|
1183 } |
|
1184 else |
|
1185 { |
|
1186 narrowBuf.Copy(aPattern); |
|
1187 } |
|
1188 narrowBuf.AppendL(0); |
|
1189 iPattern = narrowBuf.ToHBuf(); |
|
1190 CleanupStack::Pop(&narrowBuf); |
|
1191 |
|
1192 CommonConstructL(); |
|
1193 } |
|
1194 |
|
1195 /** |
|
1196 * Compile the regular expression (RE) pattern. |
|
1197 * @param aAnchor anchoring to use for the RE pattern. |
|
1198 * @return pointer to PCRE object with compiled RE data. |
|
1199 */ |
|
1200 pcre* CRegEx::CompileL(TAnchor aAnchor) |
|
1201 { |
|
1202 // First, convert TRegExOptions into pcre options |
|
1203 TInt pcreOptions = iOptions.AllOptions(); |
|
1204 |
|
1205 // Special treatment for anchoring. This is needed because at |
|
1206 // runtime pcre only provides an option for anchoring at the |
|
1207 // beginning of a string (unless you use offset). |
|
1208 // |
|
1209 // There are three types of anchoring we want: |
|
1210 // EUnanchored Compile the original pattern, and use |
|
1211 // a pcre unanchored match. |
|
1212 // EAnchorStart Compile the original pattern, and use |
|
1213 // a pcre anchored match. |
|
1214 // EAnchorBoth Tack a "\z" to the end of the original pattern |
|
1215 // and use a pcre anchored match. |
|
1216 |
|
1217 pcre* re; |
|
1218 TInt errCode = 0; |
|
1219 TInt errOffset = 0; |
|
1220 |
|
1221 const char* compileError; // ignored |
|
1222 |
|
1223 if (aAnchor != EAnchorBoth) |
|
1224 { |
|
1225 re = pcre_compile2((const char *)iPattern->Ptr(), pcreOptions, |
|
1226 &errCode,&compileError, &errOffset, NULL); |
|
1227 } |
|
1228 else |
|
1229 { |
|
1230 // Tack a '\z' at the end of RE. Parenthesize it first so that |
|
1231 // the '\z' applies to all top-level alternatives in the regexp. |
|
1232 _LIT8(KWrapped, "(?:%S)\\z\x0"); |
|
1233 HBufC8* wrapped = HBufC8::NewLC(KWrapped().Length() |
|
1234 + iPattern->Length()); |
|
1235 TPtr8 pWrapped = wrapped->Des(); |
|
1236 pWrapped.Format(KWrapped(), iPattern); |
|
1237 re = pcre_compile2((const char *)pWrapped.Ptr(), pcreOptions, |
|
1238 &errCode, &compileError, &errOffset, NULL); |
|
1239 CleanupStack::PopAndDestroy(wrapped); |
|
1240 } |
|
1241 |
|
1242 if (!re && (iErrorCode == KErrNone)) |
|
1243 { |
|
1244 iErrorCode = KErrRegExCompileBase - errCode; |
|
1245 iErrorOffset = errOffset; |
|
1246 } |
|
1247 return re; |
|
1248 } |
|
1249 |
|
1250 /** |
|
1251 * Cleanup the compiled regular expression and study data. |
|
1252 * Separated out from destructor in case support for recompiling |
|
1253 * is introduced. |
|
1254 */ |
|
1255 void CRegEx::Cleanup() |
|
1256 { |
|
1257 if (iReFull) |
|
1258 { |
|
1259 (*pcre_free)(iReFull); |
|
1260 } |
|
1261 |
|
1262 if (iRePartial) |
|
1263 { |
|
1264 (*pcre_free)(iRePartial); |
|
1265 } |
|
1266 if(iExtraPartial) |
|
1267 { |
|
1268 (*pcre_free)(iExtraPartial); |
|
1269 } |
|
1270 } |
|
1271 |
|
1272 // Internal matching and rewrite implementations // |
|
1273 |
|
1274 /** |
|
1275 * Match against aText, filling in aVector (up to aVector.Count() * 2/3) with |
|
1276 * pairs of integers for the beginning and end positions of matched |
|
1277 * text. The first pair corresponds to the entire matched text; |
|
1278 * subsequent pairs correspond, in order, to parentheses-captured |
|
1279 * matches. Returns the number of pairs (one more than the number of |
|
1280 * the last subpattern with a match) if matching was successful |
|
1281 * and zero if the match failed. |
|
1282 * I.e. for RE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching |
|
1283 * against "foo", "bar", and "baz" respectively. |
|
1284 * When matching RE("(foo)|hello") against "hello", it will return 1. |
|
1285 * But the values for all subpattern are filled in into "aVector". |
|
1286 * @param aText the text to match against the regular expression. |
|
1287 * @param aStartPos position in aText to start matching from |
|
1288 * @param aAnchor the type of match to perform. |
|
1289 * @param aVector vector that stores pairs of integers for the start and end |
|
1290 * positions of matched substrings. |
|
1291 * @param aVectorSize length of aVector |
|
1292 * @return the number of matched subpatterns. |
|
1293 */ |
|
1294 TInt CRegEx::TryMatch(const TDesC8& aText, |
|
1295 TInt aStartPos, |
|
1296 TAnchor aAnchor, |
|
1297 TInt* aVector, |
|
1298 TInt aVectorSize) const |
|
1299 { |
|
1300 pcre* re = (aAnchor == EAnchorBoth) ? iReFull : iRePartial; |
|
1301 if (!re) |
|
1302 { |
|
1303 // Matching against invalid re |
|
1304 return 0; |
|
1305 } |
|
1306 |
|
1307 pcre_extra extra = { 0, 0, 0, 0, 0, 0 }; |
|
1308 |
|
1309 if(iExtraPartial) |
|
1310 { |
|
1311 extra = *iExtraPartial; |
|
1312 } |
|
1313 |
|
1314 if (iOptions.MatchLimit() > 0) |
|
1315 { |
|
1316 extra.flags |= PCRE_EXTRA_MATCH_LIMIT; |
|
1317 extra.match_limit = iOptions.MatchLimit(); |
|
1318 } |
|
1319 |
|
1320 if (iOptions.MatchLimitRecursion() > 0) |
|
1321 { |
|
1322 extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; |
|
1323 extra.match_limit_recursion = iOptions.MatchLimitRecursion(); |
|
1324 } |
|
1325 |
|
1326 TInt rc = pcre_exec(re, // The regular expression object |
|
1327 &extra, |
|
1328 (const char *)aText.Ptr(), |
|
1329 aText.Length(), |
|
1330 aStartPos, |
|
1331 (aAnchor == EUnanchored) ? 0 : PCRE_ANCHORED, |
|
1332 aVector, |
|
1333 aVectorSize); |
|
1334 |
|
1335 // Handle errors |
|
1336 if (rc == PCRE_ERROR_NOMATCH) |
|
1337 { |
|
1338 return 0; |
|
1339 } |
|
1340 else if (rc < 0) |
|
1341 { |
|
1342 // Unexpected return code |
|
1343 return 0; |
|
1344 } |
|
1345 else if (rc == 0) |
|
1346 { |
|
1347 // pcre_exec() returns 0 as a special case when the number of |
|
1348 // capturing subpatterns exceeds the size of the vector. |
|
1349 // When this happens, there is a match and the output vector |
|
1350 // is filled, but we miss out on the positions of the extra subpatterns. |
|
1351 rc = aVectorSize / 2; |
|
1352 } |
|
1353 return rc; |
|
1354 } |
|
1355 |
|
1356 /** |
|
1357 * Internal implementation of rewrite functionality used by the replace & |
|
1358 * extract functions. |
|
1359 * Appends the aRewrite string, with backslash subsitutions from aText |
|
1360 * and aVector, to string aOut. |
|
1361 * @param aOut output descriptor |
|
1362 * @param aRewrite descriptor backslash subsitutions to append to aOut. |
|
1363 * @param aText descriptor containing substitutions. |
|
1364 * @param aVector vector that stores pairs of integers for the start and end |
|
1365 * positions of matched substrings. |
|
1366 * @param aVectorSize length of aVector. |
|
1367 * @param aMatches number of matches. |
|
1368 * @return ETrue if the operation was successfull, EFalse otherwise. |
|
1369 * @see ReplaceL() |
|
1370 * @see GlobalReplaceL() |
|
1371 * @see ExtractL() |
|
1372 */ |
|
1373 TBool CRegEx::Rewrite(TDes8& aOut, const TDesC8& aRewrite, |
|
1374 const TDesC8& aText, TInt* aVector, |
|
1375 TInt aVectorSize, TInt aMatches) const |
|
1376 { |
|
1377 for(TInt i = 0; i < aRewrite.Length(); i++) |
|
1378 { |
|
1379 TChar c = aRewrite[i]; |
|
1380 |
|
1381 if (c == '\\') |
|
1382 { |
|
1383 c = aRewrite[++i]; |
|
1384 if (c.IsDigit()) |
|
1385 { |
|
1386 TUint n = c - TChar('0'); |
|
1387 if (n >= aMatches) |
|
1388 { |
|
1389 iErrorCode = KErrRegExBadBackslashSubsitution; |
|
1390 return EFalse; |
|
1391 } |
|
1392 __ASSERT_DEBUG(aVectorSize >= 2 * n + 1, Panic(EVectorTooSmall)); |
|
1393 TInt start = aVector[2 * n]; |
|
1394 if (start >= 0) |
|
1395 { |
|
1396 TInt requiredLength = aVector[2 * n + 1] - start; |
|
1397 if((aOut.Length() + requiredLength) <= aOut.MaxLength()) |
|
1398 { |
|
1399 aOut.Append(aText.Mid(start, requiredLength)); |
|
1400 } |
|
1401 else |
|
1402 { |
|
1403 iErrorCode = KErrRegExOutputTooBig; |
|
1404 return EFalse; |
|
1405 } |
|
1406 } |
|
1407 } |
|
1408 else if (c == '\\') |
|
1409 { |
|
1410 if((aOut.Length() + 1) <= aOut.MaxLength()) |
|
1411 { |
|
1412 aOut.Append(c); |
|
1413 } |
|
1414 else |
|
1415 { |
|
1416 iErrorCode = KErrRegExOutputTooBig; |
|
1417 return EFalse; |
|
1418 } |
|
1419 } |
|
1420 else |
|
1421 { |
|
1422 // Invalid rewrite pattern |
|
1423 iErrorCode = KErrRegExInvalidRewritePattern; |
|
1424 return EFalse; |
|
1425 } |
|
1426 } |
|
1427 else |
|
1428 { |
|
1429 if((aOut.Length() + 1) <= aOut.MaxLength()) |
|
1430 { |
|
1431 aOut.Append(c); |
|
1432 } |
|
1433 else |
|
1434 { |
|
1435 iErrorCode = KErrRegExOutputTooBig; |
|
1436 return EFalse; |
|
1437 } |
|
1438 } |
|
1439 } |
|
1440 return ETrue; |
|
1441 } |
|
1442 |
|
1443 /** |
|
1444 * Internal implementation of the match functionality. |
|
1445 * @param aText the text to match against the regular expression. |
|
1446 * @param aAnchor the type of match to perform. |
|
1447 * @param aConsumed the length of the matched substring. |
|
1448 * @param aArgs array of arguments that will contain the extracted subpatterns. |
|
1449 * @param aVector output vector that stores pairs of integers for the start and |
|
1450 * end positions of matched substrings. |
|
1451 * @param aVectorSize length of aVector |
|
1452 * @return |
|
1453 */ |
|
1454 TBool CRegEx::DoMatchImpl(const TDesC8& aText, |
|
1455 TAnchor aAnchor, |
|
1456 TInt& aConsumed, |
|
1457 const RPointerArray<const TRegExArg>& aArgs, |
|
1458 TInt* aVector, |
|
1459 TInt aVectorSize) const |
|
1460 { |
|
1461 // results + PCRE workspace |
|
1462 __ASSERT_DEBUG((1 + aArgs.Count()) * KPcreWorkspaceMultiplier <= aVectorSize,Panic(EVectorTooSmall) ); |
|
1463 TInt matches = TryMatch(aText, 0, aAnchor, aVector, aVectorSize); |
|
1464 // TryMatch never returns negatives |
|
1465 __ASSERT_DEBUG(matches >= 0, Panic(EInvalidMatchResults)); |
|
1466 |
|
1467 if (matches == 0) |
|
1468 { |
|
1469 iErrorCode = KErrRegExZeroMatches; |
|
1470 return EFalse; |
|
1471 } |
|
1472 |
|
1473 aConsumed = aVector[1]; |
|
1474 |
|
1475 if (aArgs.Count() == 0) |
|
1476 { |
|
1477 // We are not interested in results |
|
1478 return ETrue; |
|
1479 } |
|
1480 |
|
1481 if (NumberOfCapturingGroups() < aArgs.Count()) |
|
1482 { |
|
1483 // RE has fewer capturing groups than number of arg pointers passed in |
|
1484 iErrorCode = KErrRegExFewerCaptureGroupsThanArgs; |
|
1485 return EFalse; |
|
1486 } |
|
1487 |
|
1488 // If we got here, we must have matched the whole pattern. |
|
1489 // We do not need (can not do) any more checks on the value of 'matches' |
|
1490 // here -- see the comment for TryMatch. |
|
1491 for (TInt i = 0; i < aArgs.Count(); i++) |
|
1492 { |
|
1493 const TInt start = aVector[2*(i+1)]; |
|
1494 const TInt limit = aVector[2*(i+1)+1]; |
|
1495 |
|
1496 TBool r; |
|
1497 if(start == -1 || limit == -1) |
|
1498 { |
|
1499 r = aArgs[i]->Parse(KNullDesC8()); |
|
1500 } |
|
1501 else |
|
1502 { |
|
1503 r = aArgs[i]->Parse(aText.Mid(start, limit - start)); |
|
1504 } |
|
1505 if(!r) |
|
1506 { |
|
1507 iErrorCode = KErrRegExFailedToParseArg; |
|
1508 return EFalse; |
|
1509 } |
|
1510 } |
|
1511 |
|
1512 return ETrue; |
|
1513 } |
|
1514 |
|
1515 /** |
|
1516 * Panic the current thread. |
|
1517 * @param aPanic panic code. |
|
1518 */ |
|
1519 void CRegEx::Panic(TRegExPanic aPanic) |
|
1520 { |
|
1521 User::Panic(KRegExPanic(), aPanic); |
|
1522 } |