|
1 // Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of the License "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // Folding and decomposition implementation |
|
15 // |
|
16 // |
|
17 |
|
18 #include "FoldDecomp.inl" |
|
19 #include "CompareImp.h" |
|
20 #include "u32std.h" |
|
21 |
|
22 #define ARRAY_LENGTH(a) (static_cast<TInt>(sizeof(a)/sizeof(a[0]))) |
|
23 |
|
24 //////////////////////////////////////////////////////////////////////////////////////////// |
|
25 // Global functions |
|
26 //////////////////////////////////////////////////////////////////////////////////////////// |
|
27 |
|
28 /** |
|
29 @internalComponent |
|
30 */ |
|
31 TChar UTF16ToChar(const TText16* a) |
|
32 { |
|
33 if (0xD800 <= a[0]) |
|
34 { |
|
35 if (a[0] < 0xE000) |
|
36 { |
|
37 if (a[0] < 0xDC00 && ::IsLowSurrogate(a[1])) |
|
38 { |
|
39 TChar c = ::PairSurrogates(a[0], a[1]); |
|
40 if ((c & 0xFFFE) != 0xFFFE) |
|
41 return c; |
|
42 } |
|
43 return 0xFFFF; |
|
44 } |
|
45 if (a[0] == 0xFFFE) |
|
46 return 0xFFFF; |
|
47 } |
|
48 return a[0]; |
|
49 } |
|
50 |
|
51 /** |
|
52 Is a character a base character (ETrue) or a combiner (EFalse)? |
|
53 For now, we will treat all control characters as base characters. |
|
54 @internalComponent |
|
55 */ |
|
56 TBool IsBaseCharacter(TChar a) |
|
57 { |
|
58 if(a < 0x220) |
|
59 { |
|
60 // These Unicode characters are all assigned |
|
61 // and none of them is a combining character |
|
62 return ETrue; |
|
63 } |
|
64 return (a.GetCategory() & 0xFFF0) - TChar::EMarkGroup; |
|
65 } |
|
66 |
|
67 /** |
|
68 @internalComponent |
|
69 */ |
|
70 inline TInt GetDecompositionIndex(TChar a) |
|
71 { |
|
72 TInt i = DecompositionHashStart(a); |
|
73 TUint32 v = KUnicodeToIndexHash[i]; |
|
74 if (!v) |
|
75 return -1; |
|
76 if ((v & 0xFFFFF) != a) |
|
77 { |
|
78 TInt step = DecompositionHashStep(a); |
|
79 do { |
|
80 i = (i + step) & KDecompositionHashBitmask; |
|
81 v = KUnicodeToIndexHash[i]; |
|
82 if (!v) |
|
83 return -1; |
|
84 } while ((v & 0xFFFFF) != a); |
|
85 } |
|
86 // it is important that this is an unsigned shift |
|
87 return static_cast<TInt>(v >> 20); |
|
88 } |
|
89 |
|
90 /** |
|
91 Will not work if an invalid index is passed. |
|
92 @internalComponent |
|
93 */ |
|
94 static TUTF32Iterator GetFoldedDecomposition(TInt aIndex) |
|
95 { |
|
96 TInt singletonIndex = aIndex - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2); |
|
97 if (0 <= singletonIndex) |
|
98 return TUTF32Iterator(KSingletonFolds + singletonIndex); |
|
99 const TText* start = KNonSingletonFolds + aIndex * 2; |
|
100 if (*start != KLongD) |
|
101 return TUTF32Iterator(start, start + 2, |
|
102 TUTF32Iterator::EStartsWithValidCharacter); |
|
103 TInt longDecompIndex = start[1]; |
|
104 start = KLongDecompositions + (longDecompIndex & 0xFFF); |
|
105 return TUTF32Iterator(start, start + (longDecompIndex >> 12) + 3, |
|
106 TUTF32Iterator::EStartsWithValidCharacter); |
|
107 } |
|
108 |
|
109 /** |
|
110 @internalComponent |
|
111 */ |
|
112 static TChar GetFirstFoldedChar(TChar a) |
|
113 { |
|
114 TInt index = ::GetDecompositionIndex(a); |
|
115 return index < 0? a : ::GetFoldedDecomposition(index).Current(); |
|
116 } |
|
117 |
|
118 /** |
|
119 @internalComponent |
|
120 */ |
|
121 static TBool FirstFoldedCodeIsNot(TInt aNonSurrogate, TInt aFoldedNonSurrogate) |
|
122 { |
|
123 TInt index = ::GetDecompositionIndex(aNonSurrogate); |
|
124 if (index < 0) |
|
125 return aNonSurrogate - aFoldedNonSurrogate; |
|
126 TInt singletonIndex = index - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2); |
|
127 if (0 < singletonIndex) |
|
128 return KSingletonFolds[singletonIndex] - aFoldedNonSurrogate; |
|
129 const TText* start = KNonSingletonFolds + index * 2; |
|
130 if (start[0] == KLongD) |
|
131 start = KLongDecompositions + (start[1] & 0xFFF); |
|
132 return *start - aFoldedNonSurrogate; |
|
133 } |
|
134 |
|
135 /** |
|
136 @internalComponent |
|
137 */ |
|
138 static TInt GetClass(TFoldedDecompIterator& a) |
|
139 { |
|
140 ASSERT(!a.AtEnd()); |
|
141 a.EnterFoldedSequence(); |
|
142 TChar ch = a.Current(); |
|
143 TInt cl = ch.GetCombiningClass(); |
|
144 if (cl == 0) |
|
145 // Assume starters have been folded from ypogegrammeni |
|
146 cl = 240; |
|
147 return cl; |
|
148 } |
|
149 |
|
150 //////////////////////////////////////////////////////////////////////////////////////////// |
|
151 // TUTF32Iterator |
|
152 //////////////////////////////////////////////////////////////////////////////////////////// |
|
153 |
|
154 /** |
|
155 @internalComponent |
|
156 */ |
|
157 void TUTF32Iterator::Next() |
|
158 { |
|
159 ASSERT(iStart != iEnd); |
|
160 while (++iStart != iEnd) |
|
161 { |
|
162 iCurrent = ::UTF16ToChar(iStart); |
|
163 if (iCurrent != 0xFFFF) |
|
164 return; |
|
165 } |
|
166 } |
|
167 |
|
168 /** |
|
169 Locates a base character in a string using a folded comparision. Will not find combining |
|
170 characters, nor will it consider Korean combining Jamo to be equivalent to Hangul. |
|
171 @internalComponent |
|
172 */ |
|
173 TBool TUTF32Iterator::LocateFoldedBaseCharacter(TChar aChar) |
|
174 { |
|
175 // A quick shortcut for simple rejections |
|
176 if (aChar < 0xFFFF) |
|
177 { |
|
178 while (iStart != iEnd && *iStart < 0xD800 && ::FirstFoldedCodeIsNot(*iStart, aChar)) |
|
179 ++iStart; |
|
180 if (iStart != iEnd) |
|
181 { |
|
182 iCurrent = ::UTF16ToChar(iStart); |
|
183 if (iCurrent == 0xFFFF) |
|
184 Next(); |
|
185 } |
|
186 } |
|
187 while (!AtEnd()) |
|
188 { |
|
189 TChar foldedChar = ::GetFirstFoldedChar(iCurrent); |
|
190 if (aChar == foldedChar) |
|
191 return ETrue; |
|
192 Next(); |
|
193 } |
|
194 return EFalse; |
|
195 } |
|
196 |
|
197 //////////////////////////////////////////////////////////////////////////////////////////// |
|
198 // TFoldedDecompIterator |
|
199 //////////////////////////////////////////////////////////////////////////////////////////// |
|
200 |
|
201 /** |
|
202 @internalComponent |
|
203 */ |
|
204 TFoldedDecompIterator::TFoldedDecompIterator(const TUTF32Iterator& a) |
|
205 { |
|
206 Set(a); |
|
207 } |
|
208 |
|
209 /** |
|
210 @internalComponent |
|
211 */ |
|
212 TBool TFoldedDecompIterator::AtEnd() const |
|
213 { |
|
214 return iOriginal.AtEnd(); |
|
215 } |
|
216 |
|
217 /** |
|
218 @internalComponent |
|
219 */ |
|
220 TBool TFoldedDecompIterator::AtEndOrWildcard() const |
|
221 { |
|
222 // neither '?' nor '*' have decomposition sequences, so we can assume that |
|
223 // if we are pointing at one or the other, we don't need to check if we |
|
224 // are in a decomposition sequence or not. |
|
225 return iOriginal.AtEnd() || iOriginal.Current() == '?' || iOriginal.Current() == '*'; |
|
226 } |
|
227 |
|
228 /** |
|
229 @internalComponent |
|
230 */ |
|
231 TBool TFoldedDecompIterator::EnterFoldedSequence() |
|
232 { |
|
233 ASSERT(!AtEnd()); |
|
234 return !IsInFoldedSequence() && StrictEnterFoldedSequence(); |
|
235 } |
|
236 |
|
237 /** |
|
238 Enter folded sequence, assuming that we are not already in one |
|
239 @internalComponent |
|
240 */ |
|
241 TBool TFoldedDecompIterator::StrictEnterFoldedSequence() |
|
242 { |
|
243 ASSERT(!AtEnd()); |
|
244 ASSERT(!IsInFoldedSequence()); |
|
245 TInt index = ::GetDecompositionIndex(iOriginal.Current()); |
|
246 if (index < 0) |
|
247 return EFalse; |
|
248 iFolded = ::GetFoldedDecomposition(index); |
|
249 return ETrue; |
|
250 } |
|
251 |
|
252 /** |
|
253 An iota might have folded from a combining ypogegrammeni. |
|
254 If the current character is a base character, this function will |
|
255 detect whether it was folded from a combining character (and |
|
256 therefore does not constitute a grapheme boundary). |
|
257 @internalComponent |
|
258 */ |
|
259 TBool TFoldedDecompIterator::CurrentIsBaseFoldedFromCombiner() const |
|
260 { |
|
261 if (!IsInFoldedSequence()) |
|
262 return EFalse; |
|
263 // The only character that does this is Ypogerammeni, which folds to iota |
|
264 if (iFolded.Current() != 0x3B9) |
|
265 return EFalse; |
|
266 // If the unfolded character is a combiner then it cannot contain an iota, |
|
267 // so it must be an ypogegrammeni that has been folded to one. |
|
268 // This will catch 0x345, the ypogegrammeni itself. |
|
269 if (!::IsBaseCharacter(iOriginal.Current())) |
|
270 return ETrue; |
|
271 // Otherwise the base character will be at the start of the decomposition |
|
272 // sequence. We will assume that it is folded from a genuine iota if and |
|
273 // only if there is an iota at the start of the folded decomposition |
|
274 // sequence. (In theory there might be an iota with a combining |
|
275 // ypogegrammeni, but in practice this will not occur). |
|
276 TInt index = ::GetDecompositionIndex(iOriginal.Current()); |
|
277 ASSERT(0 <= index); |
|
278 TUTF32Iterator folded = ::GetFoldedDecomposition(index); |
|
279 return folded.Current() != 0x3B9; |
|
280 } |
|
281 |
|
282 /** |
|
283 @internalComponent |
|
284 */ |
|
285 TChar TFoldedDecompIterator::Current() const |
|
286 { |
|
287 ASSERT(!AtEnd()); |
|
288 return IsInFoldedSequence()? iFolded.Current() : iOriginal.Current(); |
|
289 } |
|
290 |
|
291 /** |
|
292 Move past this code if it matches unfolded or folded |
|
293 @internalComponent |
|
294 */ |
|
295 TBool TFoldedDecompIterator::Match(TChar aCode) |
|
296 { |
|
297 ASSERT(!AtEnd()); |
|
298 if (!IsInFoldedSequence()) |
|
299 { |
|
300 if (aCode == iOriginal.Current()) |
|
301 { |
|
302 iOriginal.Next(); |
|
303 return ETrue; |
|
304 } |
|
305 if (!StrictEnterFoldedSequence()) |
|
306 return EFalse; |
|
307 } |
|
308 ASSERT(IsInFoldedSequence()); |
|
309 if (aCode == iFolded.Current()) |
|
310 { |
|
311 iFolded.Next(); |
|
312 if (iFolded.AtEnd()) |
|
313 iOriginal.Next(); |
|
314 return ETrue; |
|
315 } |
|
316 return EFalse; |
|
317 } |
|
318 |
|
319 /** |
|
320 Move this and argument past matching character. |
|
321 @internalComponent |
|
322 */ |
|
323 TBool TFoldedDecompIterator::Match(TFoldedDecompIterator& aThat) |
|
324 { |
|
325 ASSERT(!AtEnd()); |
|
326 if (!IsInFoldedSequence()) |
|
327 { |
|
328 if ( aThat.Match(iOriginal.Current()) ) |
|
329 { |
|
330 iOriginal.Next(); |
|
331 return ETrue; |
|
332 } |
|
333 if (!StrictEnterFoldedSequence()) |
|
334 return EFalse; |
|
335 } |
|
336 ASSERT(IsInFoldedSequence()); |
|
337 if ( aThat.Match(iFolded.Current()) ) |
|
338 { |
|
339 iFolded.Next(); |
|
340 if (iFolded.AtEnd()) |
|
341 iOriginal.Next(); |
|
342 return ETrue; |
|
343 } |
|
344 return EFalse; |
|
345 } |
|
346 |
|
347 /** |
|
348 @internalComponent |
|
349 */ |
|
350 void TFoldedDecompIterator::Next() |
|
351 { |
|
352 ASSERT(!AtEnd()); |
|
353 if (IsInFoldedSequence()) |
|
354 { |
|
355 iFolded.Next(); |
|
356 if (IsInFoldedSequence()) |
|
357 return; |
|
358 } |
|
359 iOriginal.Next(); |
|
360 } |
|
361 |
|
362 //////////////////////////////////////////////////////////////////////////////////////////// |
|
363 // TFoldedSortedDecompIterator |
|
364 //////////////////////////////////////////////////////////////////////////////////////////// |
|
365 |
|
366 /** |
|
367 Set this iterator to iterate over the next combining characters. |
|
368 Iotas in folded sequences are assumed to be character class 240 |
|
369 (combining ypogegrammeni). Next() must be used once before |
|
370 the first call to Current(), as long as AtEnd() returns false. |
|
371 @param aBase Sets the start of the iteration. This value is advanced to the |
|
372 end of the iteration. |
|
373 @return The number of codes in the iteration. |
|
374 @internalComponent |
|
375 */ |
|
376 TInt TFoldedSortedDecompIterator::Set(TFoldedDecompIterator &aBase) |
|
377 { |
|
378 iStart = aBase; |
|
379 TInt length = 0; |
|
380 iCurrentClass = 256; |
|
381 |
|
382 const TUnicodeDataSet* charDataSet = GetLocaleCharSet()->iCharDataSet; |
|
383 |
|
384 // Find the next starter (i.e. character with combining class 0). |
|
385 // We must not count iota folded from ypogegrammeni. |
|
386 // Ypogegrammeni has combining class 240. |
|
387 // Iota has combining class 0. |
|
388 // Also we will be searching for the character with the smallest |
|
389 // combining class to start at. |
|
390 while (!aBase.AtEnd()) |
|
391 { |
|
392 aBase.EnterFoldedSequence(); |
|
393 TChar ch = aBase.Current(); |
|
394 TInt cl = TUnicode(TUint(ch)).GetCombiningClass(charDataSet); |
|
395 if (cl == 0) |
|
396 { |
|
397 if (aBase.CurrentIsBaseFoldedFromCombiner()) |
|
398 cl = 240; |
|
399 else |
|
400 break; |
|
401 } |
|
402 if (cl < iCurrentClass) |
|
403 { |
|
404 iCurrentClass = cl; |
|
405 iCurrent = aBase; |
|
406 iCurrentCount = length; |
|
407 } |
|
408 ++length; |
|
409 aBase.Next(); |
|
410 } |
|
411 iRemaining = length; |
|
412 iLength = length; |
|
413 ASSERT(iLength == 0 || iCurrentClass < 256); |
|
414 return length; |
|
415 } |
|
416 |
|
417 /** |
|
418 Set this iterator so that AtEnd() returns ETrue. |
|
419 @internalComponent |
|
420 */ |
|
421 void TFoldedSortedDecompIterator::Set() |
|
422 { |
|
423 iRemaining = 0; |
|
424 } |
|
425 |
|
426 /** |
|
427 @internalComponent |
|
428 */ |
|
429 TBool TFoldedSortedDecompIterator::AtEnd() const |
|
430 { |
|
431 return iRemaining == 0; |
|
432 } |
|
433 |
|
434 /** |
|
435 @internalComponent |
|
436 */ |
|
437 TChar TFoldedSortedDecompIterator::Current() const |
|
438 { |
|
439 ASSERT(!AtEnd()); |
|
440 return iCurrent.Current(); |
|
441 } |
|
442 |
|
443 /** |
|
444 @internalComponent |
|
445 */ |
|
446 void TFoldedSortedDecompIterator::Next() |
|
447 { |
|
448 ASSERT(!AtEnd()); |
|
449 --iRemaining; |
|
450 while (++iCurrentCount != iLength) |
|
451 { |
|
452 iCurrent.Next(); |
|
453 if (::GetClass(iCurrent) == iCurrentClass) |
|
454 return; |
|
455 } |
|
456 // We have not found any more of the same class, so we will look |
|
457 // for the smallest one larger. |
|
458 TInt minClass = 256; |
|
459 TFoldedDecompIterator searcher(iStart); |
|
460 TInt searchCount = 0; |
|
461 while (searchCount != iLength) |
|
462 { |
|
463 TInt cl = ::GetClass(searcher); |
|
464 if (iCurrentClass < cl && cl < minClass) |
|
465 { |
|
466 minClass = cl; |
|
467 iCurrentCount = searchCount; |
|
468 iCurrent = searcher; |
|
469 } |
|
470 ++searchCount; |
|
471 searcher.Next(); |
|
472 } |
|
473 iCurrentClass = minClass; |
|
474 ASSERT(minClass < 256 || AtEnd()); |
|
475 } |
|
476 |
|
477 //////////////////////////////////////////////////////////////////////////////////////////// |
|
478 // TFoldedCanonicalIterator |
|
479 //////////////////////////////////////////////////////////////////////////////////////////// |
|
480 |
|
481 /** |
|
482 @internalComponent |
|
483 */ |
|
484 TFoldedCanonicalIterator::TFoldedCanonicalIterator(const TUTF32Iterator& a) |
|
485 { |
|
486 iBase.Set(a); |
|
487 iSorted.Set(); |
|
488 if(!iBase.AtEnd()) |
|
489 { |
|
490 iBase.EnterFoldedSequence(); |
|
491 if (iBase.Current().GetCombiningClass() != 0 || iBase.CurrentIsBaseFoldedFromCombiner()) |
|
492 { |
|
493 iSorted.Set(iBase); |
|
494 } |
|
495 } |
|
496 } |
|
497 |
|
498 /** |
|
499 @internalComponent |
|
500 */ |
|
501 TBool TFoldedCanonicalIterator::AtEnd() const |
|
502 { |
|
503 return iSorted.AtEnd() && iBase.AtEnd(); |
|
504 } |
|
505 |
|
506 /** |
|
507 @internalComponent |
|
508 */ |
|
509 TChar TFoldedCanonicalIterator::Current() const |
|
510 { |
|
511 ASSERT(!iBase.AtEnd() || !iSorted.AtEnd()); |
|
512 return iSorted.AtEnd()? iBase.Current() : iSorted.Current(); |
|
513 } |
|
514 |
|
515 /** |
|
516 @internalComponent |
|
517 */ |
|
518 void TFoldedCanonicalIterator::Next(const TUnicodeDataSet* aCharDataSet) |
|
519 { |
|
520 ASSERT(!iBase.AtEnd() || !iSorted.AtEnd()); |
|
521 if (!iSorted.AtEnd()) |
|
522 { |
|
523 iSorted.Next(); |
|
524 return; |
|
525 } |
|
526 iBase.Next(); |
|
527 if(!iBase.AtEnd()) |
|
528 { |
|
529 iBase.EnterFoldedSequence(); |
|
530 if (TUnicode(TUint(iBase.Current())).GetCombiningClass(aCharDataSet) != 0 || iBase.CurrentIsBaseFoldedFromCombiner()) |
|
531 { |
|
532 iSorted.Set(iBase); |
|
533 } |
|
534 } |
|
535 } |
|
536 |
|
537 //////////////////////////////////////////////////////////////////////////////////////////// |
|
538 // Folding - Global functions and structures |
|
539 //////////////////////////////////////////////////////////////////////////////////////////// |
|
540 |
|
541 /** |
|
542 @internalComponent |
|
543 */ |
|
544 struct TEndTester |
|
545 { |
|
546 typedef enum {EGenuine, EWildcard} TType; |
|
547 |
|
548 inline TEndTester(TType aType) : |
|
549 iType(aType) |
|
550 { |
|
551 } |
|
552 |
|
553 inline TBool operator()(const TFoldedDecompIterator& a) const |
|
554 { |
|
555 return iType == EGenuine ? a.AtEnd() : a.AtEndOrWildcard(); |
|
556 } |
|
557 |
|
558 private: |
|
559 TType iType; |
|
560 |
|
561 }; |
|
562 |
|
563 /** |
|
564 Consumes as much of aCandidate as matches aSearchTerm up to the next '?' or |
|
565 '*' wildcard or the end of aSearchTerm. |
|
566 It is assumed that the search term begins with a base character. |
|
567 Returns true if and only if the whole search term was matched |
|
568 with a whole number of UTF16s in the candidate string. |
|
569 On return of ETrue the candidate string iterator will have consumed the |
|
570 matching characters the search term will have had all its matching characters |
|
571 consumed. |
|
572 @internalComponent |
|
573 */ |
|
574 TBool ConsumeFoldedMatch(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm, |
|
575 const TEndTester& aEndTester) |
|
576 { |
|
577 TBool assumeBase = ETrue; |
|
578 TFoldedDecompIterator st(aSearchTerm); |
|
579 TFoldedDecompIterator cs(aCandidateString); |
|
580 while (!aEndTester(st)) |
|
581 { |
|
582 if (cs.AtEnd()) |
|
583 return EFalse; |
|
584 if (st.Match(cs)) |
|
585 { |
|
586 assumeBase = EFalse; |
|
587 if (aEndTester(st)) |
|
588 { |
|
589 // We have a match... |
|
590 if (cs.IsInFoldedSequence()) |
|
591 // but it was against only part of a character |
|
592 // in the original string, so we fail. |
|
593 return EFalse; |
|
594 aCandidateString = cs.BaseIterator(); |
|
595 aSearchTerm = st.BaseIterator(); |
|
596 return ETrue; |
|
597 } |
|
598 continue; |
|
599 } |
|
600 // did not match. We need to re-order canonically. |
|
601 if (assumeBase) |
|
602 // The first characters did not match.. do not bother |
|
603 // to re-order. |
|
604 return EFalse; |
|
605 TFoldedSortedDecompIterator csc; |
|
606 TInt cscLength = csc.Set(cs); |
|
607 if (cscLength < 2) |
|
608 // If there are less than two characters to be reordered, |
|
609 // there is no hope of getting a match by re-ordering |
|
610 return EFalse; |
|
611 TFoldedSortedDecompIterator stc; |
|
612 if (cscLength != stc.Set(st)) |
|
613 // if the strings have differing numbers of characters, |
|
614 // there can be no match |
|
615 return EFalse; |
|
616 while (!stc.AtEnd()) |
|
617 { |
|
618 ASSERT(!csc.AtEnd()); |
|
619 if (stc.Current() != csc.Current()) |
|
620 // well, we tried. |
|
621 return EFalse; |
|
622 stc.Next(); |
|
623 csc.Next(); |
|
624 } |
|
625 } |
|
626 // If the candidate string is in a folded sequence, then |
|
627 // we should not accept the match, as we require all matches |
|
628 // to be for a whole number of characters in the original string. |
|
629 if (cs.IsInFoldedSequence()) |
|
630 return EFalse; |
|
631 aCandidateString = cs.BaseIterator(); |
|
632 aSearchTerm = st.BaseIterator(); |
|
633 return ETrue; |
|
634 } |
|
635 |
|
636 /** |
|
637 @internalComponent |
|
638 */ |
|
639 TBool ConsumeFoldedMatchWholeGraphemes(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm, |
|
640 const TEndTester& aEndTester) |
|
641 { |
|
642 if (!::ConsumeFoldedMatch(aCandidateString, aSearchTerm, aEndTester)) |
|
643 return EFalse; |
|
644 return aCandidateString.AtEnd() || ::IsBaseCharacter(aCandidateString.Current()); |
|
645 } |
|
646 |
|
647 /** |
|
648 @internalComponent |
|
649 */ |
|
650 static TBool ConsumeGrapheme(TUTF32Iterator& a) |
|
651 { |
|
652 if (a.AtEnd()) |
|
653 return EFalse; |
|
654 a.Next(); |
|
655 while (!a.AtEnd() && !::IsBaseCharacter(a.Current())) |
|
656 a.Next(); |
|
657 return ETrue; |
|
658 } |
|
659 |
|
660 /** |
|
661 @internalComponent |
|
662 */ |
|
663 TBool MatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm) |
|
664 { |
|
665 TEndTester endTester(TEndTester::EWildcard); |
|
666 while(::ConsumeFoldedMatchWholeGraphemes(aCandidateString, aSearchTerm, endTester)) |
|
667 { |
|
668 if (aSearchTerm.AtEnd() || aSearchTerm.Current() == '*') |
|
669 return ETrue; |
|
670 ASSERT(aSearchTerm.Current() == '?'); |
|
671 aSearchTerm.Next(); |
|
672 if (!::ConsumeGrapheme(aCandidateString)) |
|
673 return EFalse; |
|
674 } |
|
675 return EFalse; |
|
676 } |
|
677 |
|
678 /** |
|
679 @internalComponent |
|
680 */ |
|
681 TBool FindMatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm) |
|
682 { |
|
683 // match as many graphemes as there are leading ?s |
|
684 while (!aSearchTerm.AtEnd() |
|
685 && aSearchTerm.Current() != '*' && aSearchTerm.Current() == '?') |
|
686 { |
|
687 if (!::ConsumeGrapheme(aCandidateString)) |
|
688 return EFalse; |
|
689 aSearchTerm.Next(); |
|
690 } |
|
691 if (aSearchTerm.AtEnd() || aSearchTerm.Current() == '*') |
|
692 return ETrue; |
|
693 TChar firstCharOfSearchTerm = ::GetFirstFoldedChar(aSearchTerm.Current()); |
|
694 TUTF32Iterator savedST(aSearchTerm); |
|
695 while (aCandidateString.LocateFoldedBaseCharacter(firstCharOfSearchTerm)) |
|
696 { |
|
697 TUTF32Iterator savedCS = aCandidateString; |
|
698 if (::MatchSectionFolded(aCandidateString, aSearchTerm)) |
|
699 return ETrue; |
|
700 aSearchTerm = savedST; |
|
701 aCandidateString = savedCS; |
|
702 aCandidateString.Next(); |
|
703 } |
|
704 return EFalse; |
|
705 } |
|
706 |
|
707 /** |
|
708 @internalComponent |
|
709 */ |
|
710 TBool MatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd, |
|
711 const TText16* aSearchTermStart, const TText16* aSearchTermEnd) |
|
712 { |
|
713 TUTF32Iterator candidate(aCandidateStringStart, aCandidateStringEnd); |
|
714 TUTF32Iterator searchTerm(aSearchTermStart, aSearchTermEnd); |
|
715 // First section of search term must match exactly at the start of the |
|
716 // candidate string. |
|
717 if (!::MatchSectionFolded(candidate, searchTerm)) |
|
718 return EFalse; |
|
719 |
|
720 // If there was only one section, it must match the whole candidate string. |
|
721 if (searchTerm.AtEnd()) |
|
722 return candidate.AtEnd(); |
|
723 |
|
724 while (!searchTerm.AtEnd()) |
|
725 { |
|
726 searchTerm.Next(); |
|
727 if (!::FindMatchSectionFolded(candidate, searchTerm)) |
|
728 return EFalse; |
|
729 } |
|
730 |
|
731 // The last section must match exactly at the end of the candidate string. |
|
732 if (candidate.AtEnd()) // shortcut |
|
733 return ETrue; |
|
734 const TText16* searchTermLastSection = aSearchTermEnd; |
|
735 while (searchTermLastSection != aSearchTermStart |
|
736 && searchTermLastSection[-1] != '*') |
|
737 --searchTermLastSection; |
|
738 if (searchTermLastSection == aSearchTermEnd) |
|
739 // last section is null, so trivially matches |
|
740 return ETrue; |
|
741 // Count graphemes by counting the number of base characters. |
|
742 // The first one is assumed to start a grapheme. |
|
743 TInt graphemeCount = 1; |
|
744 for (const TText16* s = searchTermLastSection + 1; s != aSearchTermEnd; ++s) |
|
745 { |
|
746 if (::IsBaseCharacter(*s)) |
|
747 ++graphemeCount; |
|
748 } |
|
749 // Count this many graphemes back in the candidate string |
|
750 const TText16* candidateLastSection = aCandidateStringEnd; |
|
751 while (graphemeCount != 0 |
|
752 && candidateLastSection != aCandidateStringStart) |
|
753 { |
|
754 if (::IsBaseCharacter(*--candidateLastSection)) |
|
755 --graphemeCount; |
|
756 } |
|
757 TUTF32Iterator last(candidateLastSection, aCandidateStringEnd); |
|
758 TUTF32Iterator st(searchTermLastSection, aSearchTermEnd); |
|
759 return ::MatchSectionFolded(last, st); |
|
760 } |
|
761 |
|
762 /** |
|
763 Implementation of MatchF |
|
764 (slow if there is a match: MatchStringFolded is better, but does not return |
|
765 the position of the match) |
|
766 @internalComponent |
|
767 */ |
|
768 TInt LocateMatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd, |
|
769 const TText16* aSearchTermStart, const TText16* aSearchTermEnd) |
|
770 { |
|
771 // Quick shortcut for simple non-match |
|
772 if (aSearchTermStart != aSearchTermEnd && *aSearchTermStart != '*') |
|
773 { |
|
774 if (aCandidateStringStart == aCandidateStringEnd) |
|
775 return KErrNotFound; |
|
776 // We won't bother with non-characters and surrogate pairs. |
|
777 if (*aSearchTermStart != '?' |
|
778 && *aSearchTermStart < 0xD800 |
|
779 && *aCandidateStringStart < 0xD800 |
|
780 && ::GetFirstFoldedChar(*aSearchTermStart) != ::GetFirstFoldedChar(*aCandidateStringStart)) |
|
781 return KErrNotFound; |
|
782 } |
|
783 if (!::MatchStringFolded(aCandidateStringStart, aCandidateStringEnd, |
|
784 aSearchTermStart, aSearchTermEnd)) |
|
785 return KErrNotFound; |
|
786 // find where it matches |
|
787 while (aSearchTermStart != aSearchTermEnd && *aSearchTermStart == '*') |
|
788 ++aSearchTermStart; |
|
789 const TText16* end = aSearchTermStart; |
|
790 while (end != aSearchTermEnd && *end != '*') |
|
791 ++end; |
|
792 // To preserve existing behaviour, a search term consisting of nothing |
|
793 // but stars is considered to match at 0. |
|
794 if (end == aSearchTermStart) |
|
795 return 0; |
|
796 for (const TText16* csSection = aCandidateStringStart; |
|
797 csSection <= aCandidateStringEnd; ++csSection) |
|
798 { |
|
799 TUTF32Iterator cs(csSection, aCandidateStringEnd); |
|
800 TUTF32Iterator st(aSearchTermStart, end); |
|
801 if (::MatchSectionFolded(cs, st)) |
|
802 { |
|
803 // If this match must match exactly at the end, we must keep |
|
804 // going. |
|
805 // This could be a lot faster, with some optimizations. |
|
806 if (end == aSearchTermEnd) |
|
807 { |
|
808 if (!cs.AtEnd()) |
|
809 continue; |
|
810 } |
|
811 return csSection - aCandidateStringStart; |
|
812 } |
|
813 // Because this function is using TUTF32Iterator, which means the |
|
814 // original author want to support surrogate. Take it as a defect and |
|
815 // fix it, while do not define a new LocateMatchStringFoldedSurrogate(). |
|
816 if (IsSurrogate(*csSection)) |
|
817 ++csSection; |
|
818 } |
|
819 // this should never happen! |
|
820 ASSERT(0); |
|
821 return KErrNotFound; |
|
822 } |
|
823 |
|
824 /** |
|
825 Implementation of FindF |
|
826 @internalComponent |
|
827 */ |
|
828 TInt FindFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm) |
|
829 { |
|
830 //Empty aSearchTerm string? - Then return 0 - keep the new implementation functionally |
|
831 //compatible with the old one. |
|
832 if(aSearchTerm.Length() == 0) |
|
833 { |
|
834 return 0; |
|
835 } |
|
836 const TText16* candidateStartPosition = aCandidateString.CurrentPosition(); |
|
837 TChar firstCharOfSearchTerm = ::GetFirstFoldedChar(aSearchTerm.Current()); |
|
838 TUTF32Iterator savedST(aSearchTerm); |
|
839 while (aCandidateString.LocateFoldedBaseCharacter(firstCharOfSearchTerm)) |
|
840 { |
|
841 TUTF32Iterator savedCS = aCandidateString; |
|
842 TEndTester endTester(TEndTester::EGenuine); |
|
843 if (::ConsumeFoldedMatchWholeGraphemes(aCandidateString, aSearchTerm, endTester)) |
|
844 return savedCS.CurrentPosition() - candidateStartPosition; |
|
845 aSearchTerm = savedST; |
|
846 aCandidateString = savedCS; |
|
847 aCandidateString.Next(); |
|
848 } |
|
849 return KErrNotFound; |
|
850 } |
|
851 |
|
852 /** |
|
853 Implementation of boolean CompareF |
|
854 @internalComponent |
|
855 */ |
|
856 TBool EqualsFolded(TUTF32Iterator& aLeft, TUTF32Iterator& aRight) |
|
857 { |
|
858 TEndTester endTester(TEndTester::EGenuine); |
|
859 if (::ConsumeFoldedMatchWholeGraphemes(aLeft, aRight, endTester)) |
|
860 return aLeft.AtEnd(); |
|
861 return EFalse; |
|
862 } |
|
863 |
|
864 /** |
|
865 Implementation of tri-state CompareF |
|
866 @internalComponent |
|
867 */ |
|
868 TInt CompareFolded(const TUTF32Iterator& aLeft, const TUTF32Iterator& aRight) |
|
869 { |
|
870 TFoldedCanonicalIterator left(aLeft); |
|
871 TFoldedCanonicalIterator right(aRight); |
|
872 |
|
873 const TUnicodeDataSet* charDataSet = GetLocaleCharSet()->iCharDataSet; |
|
874 |
|
875 while (!left.AtEnd()) |
|
876 { |
|
877 if (right.AtEnd()) |
|
878 return 1; |
|
879 TChar cr = right.Current(); |
|
880 TChar cl = left.Current(); |
|
881 if (cr != cl) |
|
882 return cl - cr; |
|
883 right.Next(charDataSet); |
|
884 left.Next(charDataSet); |
|
885 } |
|
886 return right.AtEnd()? 0 : -1; |
|
887 } |
|
888 |
|
889 //////////////////////////////////////////////////////////////////////////////////////////// |
|
890 // Composition/Decomposition - Global functions and structures |
|
891 //////////////////////////////////////////////////////////////////////////////////////////// |
|
892 |
|
893 /** |
|
894 @internalComponent |
|
895 */ |
|
896 static TUTF32Iterator IndexToNonSingletonDecomposition(TInt aIndex) |
|
897 { |
|
898 const TText* start = KNonSingletonDecompositions + aIndex * 2; |
|
899 if (*start != KLongD) |
|
900 return TUTF32Iterator(start, start + 2, TUTF32Iterator::EStartsWithValidCharacter); |
|
901 TInt longDecompIndex = start[1]; |
|
902 start = KLongDecompositions + (longDecompIndex & 0xFFF); |
|
903 return TUTF32Iterator(start, start + (longDecompIndex >> 12) + 3, TUTF32Iterator::EStartsWithValidCharacter); |
|
904 } |
|
905 |
|
906 /** |
|
907 Come up with a decomposition for the current value pointed at by the iterator |
|
908 @internalComponent |
|
909 */ |
|
910 static TBool Decompose(const TUTF32Iterator& aUTF32It, TUTF32Iterator& aOutIt) |
|
911 { |
|
912 TInt index = ::GetDecompositionIndex(aUTF32It.Current()); |
|
913 TInt singletonIndex = index - (sizeof(KNonSingletonDecompositions)/sizeof(KNonSingletonDecompositions[0])/2); |
|
914 const TInt SizeOfSingletonTable = sizeof(KSingletonDecompositions)/sizeof(KSingletonDecompositions[0]); |
|
915 if(index < 0 || SizeOfSingletonTable <= singletonIndex) |
|
916 { |
|
917 aOutIt = aUTF32It.CurrentAsIterator(); |
|
918 return EFalse;//There is not any valid decomposition |
|
919 } |
|
920 if(0 <= singletonIndex) |
|
921 { |
|
922 // KSingletonDecompositions contains some items that come from "ShortDecompsLongFolds". |
|
923 // "ShortDecompsLongFolds" contains some characters that have no composition, but have "long fold". |
|
924 // This basically specific to non-BMP characters with fold also outside BMP. |
|
925 // For example: |
|
926 // 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428; |
|
927 // In Unicode 5.0.0, totally, there are 40 similar characters, which are U+10400-U+10427. |
|
928 if (KSingletonDecompositions[singletonIndex] == 0xFFFF) |
|
929 return EFalse; |
|
930 aOutIt = TUTF32Iterator(KSingletonDecompositions + singletonIndex); |
|
931 } |
|
932 else |
|
933 { |
|
934 aOutIt = ::IndexToNonSingletonDecomposition(index); |
|
935 } |
|
936 return ETrue;//Valid decomposition |
|
937 } |
|
938 |
|
939 /** |
|
940 @internalComponent |
|
941 */ |
|
942 static TUTF32Iterator CharToUTF32Iterator(TChar aChar, TDes16& aBuf) |
|
943 { |
|
944 aBuf.Zero(); |
|
945 if (aChar < 0x10000) |
|
946 aBuf.Append(aChar); |
|
947 else |
|
948 { |
|
949 aBuf.Append((aChar >> 10) + 0xD7C0); |
|
950 aBuf.Append((aChar & 0x3FF) + 0xDC00); |
|
951 } |
|
952 const TText16* t = aBuf.Ptr(); |
|
953 return TUTF32Iterator(t, t + aBuf.Length()); |
|
954 } |
|
955 |
|
956 /** |
|
957 @internalComponent |
|
958 */ |
|
959 TBool DecomposeChar(TChar aCh, TPtrC16& aResult) |
|
960 { |
|
961 aResult.Set(NULL, 0); |
|
962 TBuf16<2> srcBuf; |
|
963 TUTF32Iterator it = ::CharToUTF32Iterator(aCh, srcBuf); |
|
964 TBool res = ::Decompose(it, it); |
|
965 if(res) |
|
966 { |
|
967 aResult.Set(it.CurrentPosition(), it.Length()); |
|
968 } |
|
969 return res; |
|
970 } |
|
971 |
|
972 /** |
|
973 Turn an index into the hash table known to point to a non-singleton |
|
974 decomposition into that decomposition. |
|
975 @internalComponent |
|
976 */ |
|
977 static TUTF32Iterator HashIndexToDecomposition(TInt aIndex) |
|
978 { |
|
979 TUint32 v = KUnicodeToIndexHash[aIndex]; |
|
980 ASSERT(v != 0); |
|
981 TInt index = static_cast<TInt>(v >> 20); |
|
982 ASSERT(index < ARRAY_LENGTH(KNonSingletonDecompositions)/2); |
|
983 return ::IndexToNonSingletonDecomposition(index); |
|
984 } |
|
985 |
|
986 /** |
|
987 Takes a start and (one past the) end index into KCompostionMapping |
|
988 and a number of UTF16 characters (aLengthSoFar). All of the compositions |
|
989 within the range must have their first aLengthSoFar UTF16 characters |
|
990 matching. |
|
991 |
|
992 On entry, if aStart == aEnd then there is no possibility of a match so return |
|
993 immediately with EFalse. To continue, aStart must be strictly less than aEnd. |
|
994 |
|
995 Afterwards, aStart and aEnd will be narrowed to all those compositions |
|
996 where the aLengthSoFar'th UTF16 character matches aNextCharacter. |
|
997 No further compositions existing is indicated by aStart == aEnd. |
|
998 |
|
999 @return ETrue if the composition at aStart is exactly of length aLengthSoFar + 1. |
|
1000 @internalComponent |
|
1001 */ |
|
1002 static TBool RefineComposition(TInt& aStart, TInt& aEnd, TInt aLengthSoFar, TInt aNextCharacter) |
|
1003 { |
|
1004 if (aStart == aEnd) |
|
1005 return EFalse; |
|
1006 ASSERT((TUint)aStart < (TUint)aEnd); |
|
1007 ASSERT((TUint)aEnd <= (TUint)ARRAY_LENGTH(KCompositionMapping)); |
|
1008 TUTF32Iterator startIterator(::HashIndexToDecomposition(KCompositionMapping[aStart])); |
|
1009 if (startIterator.Length() == aLengthSoFar) |
|
1010 ++aStart; |
|
1011 |
|
1012 // Find a single example of a decomposition that is suitable |
|
1013 TInt mid; |
|
1014 TUTF32Iterator midIt; |
|
1015 for (;;) |
|
1016 { |
|
1017 if (aStart == aEnd) |
|
1018 return EFalse; |
|
1019 mid = aStart + ((aEnd - aStart) >> 1); |
|
1020 midIt = ::HashIndexToDecomposition(KCompositionMapping[mid]); |
|
1021 ASSERT(aLengthSoFar < midIt.Length()); |
|
1022 TInt midItChar = midIt[aLengthSoFar]; |
|
1023 if (midItChar < aNextCharacter) |
|
1024 aStart = mid + 1; |
|
1025 else if (aNextCharacter < midItChar) |
|
1026 aEnd = mid; |
|
1027 else |
|
1028 { |
|
1029 startIterator = midIt; |
|
1030 break; |
|
1031 } |
|
1032 } |
|
1033 |
|
1034 // FInd the first decomposition that does not match |
|
1035 TInt start2 = mid + 1; |
|
1036 while (start2 != aEnd) |
|
1037 { |
|
1038 ASSERT(start2 < aEnd); |
|
1039 TInt mid2 = start2 + ((aEnd - start2) >> 1); |
|
1040 midIt = ::HashIndexToDecomposition(KCompositionMapping[mid2]); |
|
1041 ASSERT(aLengthSoFar < midIt.Length()); |
|
1042 TInt midItChar = midIt[aLengthSoFar]; |
|
1043 ASSERT(aNextCharacter <= midItChar); |
|
1044 if (aNextCharacter < midItChar) |
|
1045 aEnd = mid2; |
|
1046 else |
|
1047 start2 = mid2 + 1; |
|
1048 } |
|
1049 |
|
1050 // Find the first decomposition that matches |
|
1051 while (aStart != mid) |
|
1052 { |
|
1053 ASSERT(aStart < mid); |
|
1054 TInt mid2 = aStart + ((mid - aStart) >> 1); |
|
1055 midIt = ::HashIndexToDecomposition(KCompositionMapping[mid2]); |
|
1056 ASSERT(aLengthSoFar < midIt.Length()); |
|
1057 TInt midItChar = midIt[aLengthSoFar]; |
|
1058 ASSERT(midItChar <= aNextCharacter); |
|
1059 if (midItChar < aNextCharacter) |
|
1060 aStart = mid2 + 1; |
|
1061 else |
|
1062 { |
|
1063 startIterator = midIt; |
|
1064 mid = mid2; |
|
1065 } |
|
1066 } |
|
1067 |
|
1068 return startIterator.Length() == (aLengthSoFar + 1); |
|
1069 } |
|
1070 |
|
1071 /** |
|
1072 @internalComponent |
|
1073 */ |
|
1074 static TBool RefineCompositionUTF32(TInt& aStart, TInt& aEnd, TInt& aLengthSoFar, TChar aChar) |
|
1075 { |
|
1076 if (aChar < 0x10000) |
|
1077 return ::RefineComposition(aStart, aEnd, aLengthSoFar++, aChar); |
|
1078 ::RefineComposition(aStart, aEnd, aLengthSoFar++, (aChar >> 10) + 0xD7C0); |
|
1079 if (aStart == aEnd) |
|
1080 return EFalse; |
|
1081 return ::RefineComposition(aStart, aEnd, aLengthSoFar++, (aChar & 0x3FF) + 0xDC00); |
|
1082 } |
|
1083 |
|
1084 /** |
|
1085 Combine as many of the characters presented as possible into a single |
|
1086 character. |
|
1087 @return The number of characters successfully combined. |
|
1088 @param aCombined If a nonzero value is returned, this contains |
|
1089 the character that is that number of characters from the start of |
|
1090 aDes combined. |
|
1091 @internalComponent |
|
1092 */ |
|
1093 TInt CombineAsMuchAsPossible(const TDesC16& aDes, TChar& aCombined) |
|
1094 { |
|
1095 TInt start = 0; |
|
1096 TInt end = sizeof(KCompositionMapping)/sizeof(KCompositionMapping[0]); |
|
1097 TInt length = 0; |
|
1098 TInt bestIndex = 0; |
|
1099 TInt bestLength = 0; |
|
1100 const TText16* ptr = aDes.Ptr(); |
|
1101 TUTF32Iterator input(ptr, ptr + aDes.Length()); |
|
1102 while (!input.AtEnd()) |
|
1103 { |
|
1104 if (::RefineCompositionUTF32(start, end, length, input.Current())) |
|
1105 { |
|
1106 bestIndex = start; |
|
1107 bestLength = length; |
|
1108 } |
|
1109 input.Next(); |
|
1110 } |
|
1111 if (bestLength == 0) |
|
1112 return 0; |
|
1113 aCombined = KUnicodeToIndexHash[KCompositionMapping[bestIndex]] & 0xFFFFF; |
|
1114 return bestLength; |
|
1115 } |
|
1116 |
|
1117 ////////////////////////////////////////////////////////////////////////////////////////////// |
|
1118 // COLLATION |
|
1119 ////////////////////////////////////////////////////////////////////////////////////////////// |
|
1120 |
|
1121 ////////////////////////////////////////////////////////////////////////////////////////////// |
|
1122 // TDecompositionIterator class |
|
1123 |
|
1124 /** |
|
1125 @internalComponent |
|
1126 */ |
|
1127 void TDecompositionIterator::Set(const TUTF32Iterator& a) |
|
1128 { |
|
1129 iBase = a; |
|
1130 if (!iBase.AtEnd()) |
|
1131 { |
|
1132 (void)::Decompose(iBase, iDecomposition); |
|
1133 } |
|
1134 } |
|
1135 |
|
1136 /** |
|
1137 @internalComponent |
|
1138 */ |
|
1139 TDecompositionIterator::TDecompositionIterator(const TUTF32Iterator& a) |
|
1140 { |
|
1141 Set(a); |
|
1142 } |
|
1143 |
|
1144 /** |
|
1145 @internalComponent |
|
1146 */ |
|
1147 TBool TDecompositionIterator::AtEnd() const |
|
1148 { |
|
1149 return iBase.AtEnd(); |
|
1150 } |
|
1151 |
|
1152 /** |
|
1153 @internalComponent |
|
1154 */ |
|
1155 TChar TDecompositionIterator::Current() const |
|
1156 { |
|
1157 return iDecomposition.Current(); |
|
1158 } |
|
1159 |
|
1160 /** |
|
1161 @internalComponent |
|
1162 */ |
|
1163 void TDecompositionIterator::Next() |
|
1164 { |
|
1165 ASSERT(!iBase.AtEnd() && !iDecomposition.AtEnd()); |
|
1166 iDecomposition.Next(); |
|
1167 if (!iDecomposition.AtEnd()) |
|
1168 return; |
|
1169 iBase.Next(); |
|
1170 if (!iBase.AtEnd()) |
|
1171 { |
|
1172 (void)::Decompose(iBase, iDecomposition); |
|
1173 } |
|
1174 } |
|
1175 |
|
1176 /** |
|
1177 @internalComponent |
|
1178 */ |
|
1179 const TText16* TDecompositionIterator::CurrentPosition() const |
|
1180 { |
|
1181 return iBase.CurrentPosition(); |
|
1182 } |
|
1183 |
|
1184 /** |
|
1185 Find out the length and minimum combining class of |
|
1186 the current run of characters of nonzero combining class. |
|
1187 aMinClass and aMinClassPos are not written to if the return |
|
1188 value is 0. |
|
1189 aEndOfRun is written to with the final position of the iteration |
|
1190 if 0 is returned and aEndOfRun is non-null |
|
1191 @internalComponent |
|
1192 */ |
|
1193 static TInt ReorderingRun(TInt& aMinClass, TDecompositionIterator& aMinClassPos, |
|
1194 const TDecompositionIterator& aStart, TBool* aOpenSequence, |
|
1195 TInt aMaxDisallowedClass = 0, TDecompositionIterator* aEndOfRun = 0) |
|
1196 { |
|
1197 TInt comclass = aStart.AtEnd()? 0 : aStart.Current().GetCombiningClass(); |
|
1198 if (comclass == 0) |
|
1199 { |
|
1200 if (aEndOfRun) |
|
1201 *aEndOfRun = aStart; |
|
1202 if (aOpenSequence) |
|
1203 *aOpenSequence = aStart.AtEnd(); |
|
1204 return 0; |
|
1205 } |
|
1206 aMinClass = 256; |
|
1207 TDecompositionIterator i = aStart; |
|
1208 TInt count = 0; |
|
1209 while (comclass != 0) |
|
1210 { |
|
1211 if (aMaxDisallowedClass < comclass) |
|
1212 { |
|
1213 if (comclass < aMinClass) |
|
1214 { |
|
1215 aMinClass = comclass; |
|
1216 aMinClassPos = i; |
|
1217 } |
|
1218 ++count; |
|
1219 } |
|
1220 i.Next(); |
|
1221 comclass = i.AtEnd()? 0 : i.Current().GetCombiningClass(); |
|
1222 } |
|
1223 if (count == 0 && aEndOfRun) |
|
1224 *aEndOfRun = i; |
|
1225 if (aOpenSequence) |
|
1226 *aOpenSequence = i.AtEnd(); |
|
1227 return count; |
|
1228 } |
|
1229 |
|
1230 ////////////////////////////////////////////////////////////////////////////////////////////// |
|
1231 // TCanonicalDecompositionIterator class |
|
1232 |
|
1233 /** |
|
1234 @internalComponent |
|
1235 */ |
|
1236 void TCanonicalDecompositionIterator::Set(const TUTF32Iterator& a) |
|
1237 { |
|
1238 iBase.Set(a); |
|
1239 iLastPosition = 0; |
|
1240 if (ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, &iInOpenSequence) < 2) |
|
1241 iCurrentCombiningClass = 0; |
|
1242 } |
|
1243 |
|
1244 /** |
|
1245 @internalComponent |
|
1246 */ |
|
1247 TBool TCanonicalDecompositionIterator::AtEnd() const |
|
1248 { |
|
1249 return iBase.AtEnd(); |
|
1250 } |
|
1251 |
|
1252 /** |
|
1253 @internalComponent |
|
1254 */ |
|
1255 TChar TCanonicalDecompositionIterator::Current() const |
|
1256 { |
|
1257 return iCurrentCombiningClass? iCurrent.Current() : iBase.Current(); |
|
1258 } |
|
1259 |
|
1260 /** |
|
1261 @internalComponent |
|
1262 */ |
|
1263 void TCanonicalDecompositionIterator::Next() |
|
1264 { |
|
1265 iLastPosition = iBase.CurrentPosition(); |
|
1266 if (iCurrentCombiningClass == 0) |
|
1267 { |
|
1268 iBase.Next(); |
|
1269 if (ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, &iInOpenSequence) < 2) |
|
1270 iCurrentCombiningClass = 0; |
|
1271 return; |
|
1272 } |
|
1273 // Find the next character in the run with the same combining class |
|
1274 iCurrent.Next(); |
|
1275 TInt curclass = iCurrent.AtEnd()? 0 : iCurrent.Current().GetCombiningClass(); |
|
1276 while (curclass != 0) |
|
1277 { |
|
1278 if (curclass == iCurrentCombiningClass) |
|
1279 // success |
|
1280 return; |
|
1281 iCurrent.Next(); |
|
1282 curclass = iCurrent.AtEnd()? 0 : iCurrent.Current().GetCombiningClass(); |
|
1283 } |
|
1284 // There are none left in the current class. Find out what the next one is. |
|
1285 if (0 == ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, 0, iCurrentCombiningClass, &iBase)) |
|
1286 iCurrentCombiningClass = 0; |
|
1287 } |
|
1288 |
|
1289 /** |
|
1290 @internalComponent |
|
1291 */ |
|
1292 const TText16* TCanonicalDecompositionIterator::CurrentPositionIfAtCharacter() const |
|
1293 { |
|
1294 if (iCurrentCombiningClass != 0) |
|
1295 return 0; |
|
1296 const TText16* p = iBase.CurrentPosition(); |
|
1297 return iLastPosition == p? 0 : p; |
|
1298 } |
|
1299 |
|
1300 /** |
|
1301 @internalComponent |
|
1302 */ |
|
1303 TBool TCanonicalDecompositionIterator::IsInOpenSequence() const |
|
1304 { |
|
1305 return iInOpenSequence; |
|
1306 } |
|
1307 |
|
1308 ////////////////////////////////////////////////////////////////////////////////////////////// |
|
1309 // TCanonicalDecompositionIteratorCached class |
|
1310 |
|
1311 /** |
|
1312 @internalComponent |
|
1313 */ |
|
1314 void TCanonicalDecompositionIteratorCached::Set(const TUTF32Iterator& a) |
|
1315 { |
|
1316 iBase.Set(a); |
|
1317 iCacheStart = 0; |
|
1318 iCacheSize = 0; |
|
1319 } |
|
1320 |
|
1321 /** |
|
1322 @internalComponent |
|
1323 */ |
|
1324 TBool TCanonicalDecompositionIteratorCached::AtEnd() const |
|
1325 { |
|
1326 return iCacheSize == 0 && iBase.AtEnd(); |
|
1327 } |
|
1328 |
|
1329 /** |
|
1330 @internalComponent |
|
1331 */ |
|
1332 void TCanonicalDecompositionIteratorCached::Next(TInt aOffset) |
|
1333 { |
|
1334 ASSERT(0 <= aOffset); |
|
1335 ASSERT(0 <= iCacheSize); |
|
1336 ASSERT(0 != iCacheSize || !iBase.AtEnd()); |
|
1337 if (aOffset <= iCacheSize) |
|
1338 { |
|
1339 iCacheSize -= aOffset; |
|
1340 iCacheStart = (iCacheStart + aOffset) & (KMaxLookAhead - 1); |
|
1341 return; |
|
1342 } |
|
1343 aOffset -= iCacheSize; |
|
1344 iCacheSize = 0; |
|
1345 while (aOffset != 0) |
|
1346 { |
|
1347 iBase.Next(); |
|
1348 --aOffset; |
|
1349 } |
|
1350 } |
|
1351 |
|
1352 /** |
|
1353 Get the character at the position of the iterator plus aOffset steps. |
|
1354 Returns -1 if we are looking too far ahead. |
|
1355 @internalComponent |
|
1356 */ |
|
1357 TChar TCanonicalDecompositionIteratorCached::Get(TInt aOffset) |
|
1358 { |
|
1359 // should be assert debug: there is a chance this could go off with |
|
1360 // bad collation tables |
|
1361 ASSERT(aOffset <= KMaxLookAhead); |
|
1362 while (iCacheSize <= aOffset) |
|
1363 { |
|
1364 if (iBase.AtEnd()) |
|
1365 return TChar(static_cast <TUint> (-1)); |
|
1366 TInt cachePos = (iCacheStart + iCacheSize) & (KMaxLookAhead - 1); |
|
1367 iCache[cachePos].iChar = iBase.Current(); |
|
1368 iCache[cachePos].iPos = iBase.CurrentPositionIfAtCharacter(); |
|
1369 ++iCacheSize; |
|
1370 iBase.Next(); |
|
1371 } |
|
1372 return iCacheSize == aOffset? iBase.Current() : iCache[(iCacheStart + aOffset) & (KMaxLookAhead - 1)].iChar; |
|
1373 } |
|
1374 |
|
1375 /** |
|
1376 If the current position in the original string is representable |
|
1377 as a pointer into it and we know what it is, return it. |
|
1378 @internalComponent |
|
1379 */ |
|
1380 const TText16* TCanonicalDecompositionIteratorCached::CurrentPositionIfAtCharacter() const |
|
1381 { |
|
1382 if(iCacheSize == 0) |
|
1383 { |
|
1384 return iBase.CurrentPositionIfAtCharacter(); |
|
1385 } |
|
1386 return iCache[iCacheStart & (KMaxLookAhead - 1)].iPos; |
|
1387 } |
|
1388 |