author | mikek |
Fri, 18 Jun 2010 18:06:39 +0100 | |
branch | GCC_SURGE |
changeset 166 | d7c1885c0106 |
parent 90 | 947f0dc9f7a8 |
child 257 | 3e88ff8f41d5 |
child 269 | d57b86b1867a |
permissions | -rw-r--r-- |
0 | 1 |
// Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies). |
2 |
// All rights reserved. |
|
3 |
// This component and the accompanying materials are made available |
|
4 |
// under the terms of the License "Eclipse Public License v1.0" |
|
5 |
// which accompanies this distribution, and is available |
|
6 |
// at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 |
// |
|
8 |
// Initial Contributors: |
|
9 |
// Nokia Corporation - initial contribution. |
|
10 |
// |
|
11 |
// Contributors: |
|
12 |
// |
|
13 |
// Description: |
|
14 |
// Folding and decomposition implementation |
|
15 |
// |
|
16 |
// |
|
17 |
||
18 |
#include "FoldDecomp.inl" |
|
19 |
#include "CompareImp.h" |
|
20 |
#include "u32std.h" |
|
21 |
||
22 |
#define ARRAY_LENGTH(a) (static_cast<TInt>(sizeof(a)/sizeof(a[0]))) |
|
23 |
||
24 |
//////////////////////////////////////////////////////////////////////////////////////////// |
|
25 |
// Global functions |
|
26 |
//////////////////////////////////////////////////////////////////////////////////////////// |
|
27 |
||
28 |
/** |
|
29 |
@internalComponent |
|
30 |
*/ |
|
31 |
TChar UTF16ToChar(const TText16* a) |
|
32 |
{ |
|
33 |
if (0xD800 <= a[0]) |
|
34 |
{ |
|
35 |
if (a[0] < 0xE000) |
|
36 |
{ |
|
37 |
if (a[0] < 0xDC00 && ::IsLowSurrogate(a[1])) |
|
38 |
{ |
|
39 |
TChar c = ::PairSurrogates(a[0], a[1]); |
|
40 |
if ((c & 0xFFFE) != 0xFFFE) |
|
41 |
return c; |
|
42 |
} |
|
43 |
return 0xFFFF; |
|
44 |
} |
|
45 |
if (a[0] == 0xFFFE) |
|
46 |
return 0xFFFF; |
|
47 |
} |
|
48 |
return a[0]; |
|
49 |
} |
|
50 |
||
51 |
/** |
|
52 |
Is a character a base character (ETrue) or a combiner (EFalse)? |
|
53 |
For now, we will treat all control characters as base characters. |
|
54 |
@internalComponent |
|
55 |
*/ |
|
56 |
TBool IsBaseCharacter(TChar a) |
|
57 |
{ |
|
58 |
if(a < 0x220) |
|
59 |
{ |
|
60 |
// These Unicode characters are all assigned |
|
61 |
// and none of them is a combining character |
|
62 |
return ETrue; |
|
63 |
} |
|
64 |
return (a.GetCategory() & 0xFFF0) - TChar::EMarkGroup; |
|
65 |
} |
|
66 |
||
67 |
/** |
|
68 |
@internalComponent |
|
69 |
*/ |
|
70 |
inline TInt GetDecompositionIndex(TChar a) |
|
71 |
{ |
|
72 |
TInt i = DecompositionHashStart(a); |
|
73 |
TUint32 v = KUnicodeToIndexHash[i]; |
|
74 |
if (!v) |
|
75 |
return -1; |
|
76 |
if ((v & 0xFFFFF) != a) |
|
77 |
{ |
|
78 |
TInt step = DecompositionHashStep(a); |
|
79 |
do { |
|
80 |
i = (i + step) & KDecompositionHashBitmask; |
|
81 |
v = KUnicodeToIndexHash[i]; |
|
82 |
if (!v) |
|
83 |
return -1; |
|
84 |
} while ((v & 0xFFFFF) != a); |
|
85 |
} |
|
86 |
// it is important that this is an unsigned shift |
|
87 |
return static_cast<TInt>(v >> 20); |
|
88 |
} |
|
89 |
||
90 |
/** |
|
91 |
Will not work if an invalid index is passed. |
|
92 |
@internalComponent |
|
93 |
*/ |
|
94 |
static TUTF32Iterator GetFoldedDecomposition(TInt aIndex) |
|
95 |
{ |
|
96 |
TInt singletonIndex = aIndex - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2); |
|
97 |
if (0 <= singletonIndex) |
|
98 |
return TUTF32Iterator(KSingletonFolds + singletonIndex); |
|
99 |
const TText* start = KNonSingletonFolds + aIndex * 2; |
|
100 |
if (*start != KLongD) |
|
101 |
return TUTF32Iterator(start, start + 2, |
|
102 |
TUTF32Iterator::EStartsWithValidCharacter); |
|
103 |
TInt longDecompIndex = start[1]; |
|
104 |
start = KLongDecompositions + (longDecompIndex & 0xFFF); |
|
105 |
return TUTF32Iterator(start, start + (longDecompIndex >> 12) + 3, |
|
106 |
TUTF32Iterator::EStartsWithValidCharacter); |
|
107 |
} |
|
108 |
||
109 |
/** |
|
110 |
@internalComponent |
|
111 |
*/ |
|
112 |
static TChar GetFirstFoldedChar(TChar a) |
|
113 |
{ |
|
114 |
TInt index = ::GetDecompositionIndex(a); |
|
115 |
return index < 0? a : ::GetFoldedDecomposition(index).Current(); |
|
116 |
} |
|
117 |
||
118 |
/** |
|
119 |
@internalComponent |
|
120 |
*/ |
|
121 |
static TBool FirstFoldedCodeIsNot(TInt aNonSurrogate, TInt aFoldedNonSurrogate) |
|
122 |
{ |
|
123 |
TInt index = ::GetDecompositionIndex(aNonSurrogate); |
|
124 |
if (index < 0) |
|
125 |
return aNonSurrogate - aFoldedNonSurrogate; |
|
126 |
TInt singletonIndex = index - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2); |
|
127 |
if (0 < singletonIndex) |
|
128 |
return KSingletonFolds[singletonIndex] - aFoldedNonSurrogate; |
|
129 |
const TText* start = KNonSingletonFolds + index * 2; |
|
130 |
if (start[0] == KLongD) |
|
131 |
start = KLongDecompositions + (start[1] & 0xFFF); |
|
132 |
return *start - aFoldedNonSurrogate; |
|
133 |
} |
|
134 |
||
135 |
/** |
|
136 |
@internalComponent |
|
137 |
*/ |
|
138 |
static TInt GetClass(TFoldedDecompIterator& a) |
|
139 |
{ |
|
140 |
ASSERT(!a.AtEnd()); |
|
141 |
a.EnterFoldedSequence(); |
|
142 |
TChar ch = a.Current(); |
|
143 |
TInt cl = ch.GetCombiningClass(); |
|
144 |
if (cl == 0) |
|
145 |
// Assume starters have been folded from ypogegrammeni |
|
146 |
cl = 240; |
|
147 |
return cl; |
|
148 |
} |
|
149 |
||
150 |
//////////////////////////////////////////////////////////////////////////////////////////// |
|
151 |
// TUTF32Iterator |
|
152 |
//////////////////////////////////////////////////////////////////////////////////////////// |
|
153 |
||
154 |
/** |
|
155 |
@internalComponent |
|
156 |
*/ |
|
157 |
void TUTF32Iterator::Next() |
|
158 |
{ |
|
159 |
ASSERT(iStart != iEnd); |
|
160 |
while (++iStart != iEnd) |
|
161 |
{ |
|
162 |
iCurrent = ::UTF16ToChar(iStart); |
|
163 |
if (iCurrent != 0xFFFF) |
|
164 |
return; |
|
165 |
} |
|
166 |
} |
|
167 |
||
168 |
/** |
|
169 |
Locates a base character in a string using a folded comparision. Will not find combining |
|
170 |
characters, nor will it consider Korean combining Jamo to be equivalent to Hangul. |
|
171 |
@internalComponent |
|
172 |
*/ |
|
173 |
TBool TUTF32Iterator::LocateFoldedBaseCharacter(TChar aChar) |
|
174 |
{ |
|
175 |
// A quick shortcut for simple rejections |
|
176 |
if (aChar < 0xFFFF) |
|
177 |
{ |
|
178 |
while (iStart != iEnd && *iStart < 0xD800 && ::FirstFoldedCodeIsNot(*iStart, aChar)) |
|
179 |
++iStart; |
|
180 |
if (iStart != iEnd) |
|
181 |
{ |
|
182 |
iCurrent = ::UTF16ToChar(iStart); |
|
183 |
if (iCurrent == 0xFFFF) |
|
184 |
Next(); |
|
185 |
} |
|
186 |
} |
|
187 |
while (!AtEnd()) |
|
188 |
{ |
|
189 |
TChar foldedChar = ::GetFirstFoldedChar(iCurrent); |
|
190 |
if (aChar == foldedChar) |
|
191 |
return ETrue; |
|
192 |
Next(); |
|
193 |
} |
|
194 |
return EFalse; |
|
195 |
} |
|
196 |
||
197 |
//////////////////////////////////////////////////////////////////////////////////////////// |
|
198 |
// TFoldedDecompIterator |
|
199 |
//////////////////////////////////////////////////////////////////////////////////////////// |
|
200 |
||
201 |
/** |
|
202 |
@internalComponent |
|
203 |
*/ |
|
204 |
TFoldedDecompIterator::TFoldedDecompIterator(const TUTF32Iterator& a) |
|
205 |
{ |
|
206 |
Set(a); |
|
207 |
} |
|
208 |
||
209 |
/** |
|
210 |
@internalComponent |
|
211 |
*/ |
|
212 |
TBool TFoldedDecompIterator::AtEnd() const |
|
213 |
{ |
|
214 |
return iOriginal.AtEnd(); |
|
215 |
} |
|
216 |
||
217 |
/** |
|
218 |
@internalComponent |
|
219 |
*/ |
|
220 |
TBool TFoldedDecompIterator::AtEndOrWildcard() const |
|
221 |
{ |
|
222 |
// neither '?' nor '*' have decomposition sequences, so we can assume that |
|
223 |
// if we are pointing at one or the other, we don't need to check if we |
|
224 |
// are in a decomposition sequence or not. |
|
225 |
return iOriginal.AtEnd() || iOriginal.Current() == '?' || iOriginal.Current() == '*'; |
|
226 |
} |
|
227 |
||
228 |
/** |
|
229 |
@internalComponent |
|
230 |
*/ |
|
231 |
TBool TFoldedDecompIterator::EnterFoldedSequence() |
|
232 |
{ |
|
233 |
ASSERT(!AtEnd()); |
|
234 |
return !IsInFoldedSequence() && StrictEnterFoldedSequence(); |
|
235 |
} |
|
236 |
||
237 |
/** |
|
238 |
Enter folded sequence, assuming that we are not already in one |
|
239 |
@internalComponent |
|
240 |
*/ |
|
241 |
TBool TFoldedDecompIterator::StrictEnterFoldedSequence() |
|
242 |
{ |
|
243 |
ASSERT(!AtEnd()); |
|
244 |
ASSERT(!IsInFoldedSequence()); |
|
245 |
TInt index = ::GetDecompositionIndex(iOriginal.Current()); |
|
246 |
if (index < 0) |
|
247 |
return EFalse; |
|
248 |
iFolded = ::GetFoldedDecomposition(index); |
|
249 |
return ETrue; |
|
250 |
} |
|
251 |
||
252 |
/** |
|
253 |
An iota might have folded from a combining ypogegrammeni. |
|
254 |
If the current character is a base character, this function will |
|
255 |
detect whether it was folded from a combining character (and |
|
256 |
therefore does not constitute a grapheme boundary). |
|
257 |
@internalComponent |
|
258 |
*/ |
|
259 |
TBool TFoldedDecompIterator::CurrentIsBaseFoldedFromCombiner() const |
|
260 |
{ |
|
261 |
if (!IsInFoldedSequence()) |
|
262 |
return EFalse; |
|
263 |
// The only character that does this is Ypogerammeni, which folds to iota |
|
264 |
if (iFolded.Current() != 0x3B9) |
|
265 |
return EFalse; |
|
266 |
// If the unfolded character is a combiner then it cannot contain an iota, |
|
267 |
// so it must be an ypogegrammeni that has been folded to one. |
|
268 |
// This will catch 0x345, the ypogegrammeni itself. |
|
269 |
if (!::IsBaseCharacter(iOriginal.Current())) |
|
270 |
return ETrue; |
|
271 |
// Otherwise the base character will be at the start of the decomposition |
|
272 |
// sequence. We will assume that it is folded from a genuine iota if and |
|
273 |
// only if there is an iota at the start of the folded decomposition |
|
274 |
// sequence. (In theory there might be an iota with a combining |
|
275 |
// ypogegrammeni, but in practice this will not occur). |
|
276 |
TInt index = ::GetDecompositionIndex(iOriginal.Current()); |
|
277 |
ASSERT(0 <= index); |
|
278 |
TUTF32Iterator folded = ::GetFoldedDecomposition(index); |
|
279 |
return folded.Current() != 0x3B9; |
|
280 |
} |
|
281 |
||
282 |
/** |
|
283 |
@internalComponent |
|
284 |
*/ |
|
285 |
TChar TFoldedDecompIterator::Current() const |
|
286 |
{ |
|
287 |
ASSERT(!AtEnd()); |
|
288 |
return IsInFoldedSequence()? iFolded.Current() : iOriginal.Current(); |
|
289 |
} |
|
290 |
||
291 |
/** |
|
292 |
Move past this code if it matches unfolded or folded |
|
293 |
@internalComponent |
|
294 |
*/ |
|
295 |
TBool TFoldedDecompIterator::Match(TChar aCode) |
|
296 |
{ |
|
297 |
ASSERT(!AtEnd()); |
|
298 |
if (!IsInFoldedSequence()) |
|
299 |
{ |
|
300 |
if (aCode == iOriginal.Current()) |
|
301 |
{ |
|
302 |
iOriginal.Next(); |
|
303 |
return ETrue; |
|
304 |
} |
|
305 |
if (!StrictEnterFoldedSequence()) |
|
306 |
return EFalse; |
|
307 |
} |
|
308 |
ASSERT(IsInFoldedSequence()); |
|
309 |
if (aCode == iFolded.Current()) |
|
310 |
{ |
|
311 |
iFolded.Next(); |
|
312 |
if (iFolded.AtEnd()) |
|
313 |
iOriginal.Next(); |
|
314 |
return ETrue; |
|
315 |
} |
|
316 |
return EFalse; |
|
317 |
} |
|
318 |
||
319 |
/** |
|
320 |
Move this and argument past matching character. |
|
321 |
@internalComponent |
|
322 |
*/ |
|
323 |
TBool TFoldedDecompIterator::Match(TFoldedDecompIterator& aThat) |
|
324 |
{ |
|
325 |
ASSERT(!AtEnd()); |
|
326 |
if (!IsInFoldedSequence()) |
|
327 |
{ |
|
328 |
if ( aThat.Match(iOriginal.Current()) ) |
|
329 |
{ |
|
330 |
iOriginal.Next(); |
|
331 |
return ETrue; |
|
332 |
} |
|
333 |
if (!StrictEnterFoldedSequence()) |
|
334 |
return EFalse; |
|
335 |
} |
|
336 |
ASSERT(IsInFoldedSequence()); |
|
337 |
if ( aThat.Match(iFolded.Current()) ) |
|
338 |
{ |
|
339 |
iFolded.Next(); |
|
340 |
if (iFolded.AtEnd()) |
|
341 |
iOriginal.Next(); |
|
342 |
return ETrue; |
|
343 |
} |
|
344 |
return EFalse; |
|
345 |
} |
|
346 |
||
347 |
/** |
|
348 |
@internalComponent |
|
349 |
*/ |
|
350 |
void TFoldedDecompIterator::Next() |
|
351 |
{ |
|
352 |
ASSERT(!AtEnd()); |
|
353 |
if (IsInFoldedSequence()) |
|
354 |
{ |
|
355 |
iFolded.Next(); |
|
356 |
if (IsInFoldedSequence()) |
|
357 |
return; |
|
358 |
} |
|
359 |
iOriginal.Next(); |
|
360 |
} |
|
361 |
||
362 |
//////////////////////////////////////////////////////////////////////////////////////////// |
|
363 |
// TFoldedSortedDecompIterator |
|
364 |
//////////////////////////////////////////////////////////////////////////////////////////// |
|
365 |
||
366 |
/** |
|
367 |
Set this iterator to iterate over the next combining characters. |
|
368 |
Iotas in folded sequences are assumed to be character class 240 |
|
369 |
(combining ypogegrammeni). Next() must be used once before |
|
370 |
the first call to Current(), as long as AtEnd() returns false. |
|
371 |
@param aBase Sets the start of the iteration. This value is advanced to the |
|
372 |
end of the iteration. |
|
373 |
@return The number of codes in the iteration. |
|
374 |
@internalComponent |
|
375 |
*/ |
|
376 |
TInt TFoldedSortedDecompIterator::Set(TFoldedDecompIterator &aBase) |
|
377 |
{ |
|
378 |
iStart = aBase; |
|
379 |
TInt length = 0; |
|
380 |
iCurrentClass = 256; |
|
381 |
||
382 |
const TUnicodeDataSet* charDataSet = GetLocaleCharSet()->iCharDataSet; |
|
383 |
||
384 |
// Find the next starter (i.e. character with combining class 0). |
|
385 |
// We must not count iota folded from ypogegrammeni. |
|
386 |
// Ypogegrammeni has combining class 240. |
|
387 |
// Iota has combining class 0. |
|
388 |
// Also we will be searching for the character with the smallest |
|
389 |
// combining class to start at. |
|
390 |
while (!aBase.AtEnd()) |
|
391 |
{ |
|
392 |
aBase.EnterFoldedSequence(); |
|
393 |
TChar ch = aBase.Current(); |
|
394 |
TInt cl = TUnicode(TUint(ch)).GetCombiningClass(charDataSet); |
|
395 |
if (cl == 0) |
|
396 |
{ |
|
397 |
if (aBase.CurrentIsBaseFoldedFromCombiner()) |
|
398 |
cl = 240; |
|
399 |
else |
|
400 |
break; |
|
401 |
} |
|
402 |
if (cl < iCurrentClass) |
|
403 |
{ |
|
404 |
iCurrentClass = cl; |
|
405 |
iCurrent = aBase; |
|
406 |
iCurrentCount = length; |
|
407 |
} |
|
408 |
++length; |
|
409 |
aBase.Next(); |
|
410 |
} |
|
411 |
iRemaining = length; |
|
412 |
iLength = length; |
|
413 |
ASSERT(iLength == 0 || iCurrentClass < 256); |
|
414 |
return length; |
|
415 |
} |
|
416 |
||
417 |
/** |
|
418 |
Set this iterator so that AtEnd() returns ETrue. |
|
419 |
@internalComponent |
|
420 |
*/ |
|
421 |
void TFoldedSortedDecompIterator::Set() |
|
422 |
{ |
|
423 |
iRemaining = 0; |
|
424 |
} |
|
425 |
||
426 |
/** |
|
427 |
@internalComponent |
|
428 |
*/ |
|
429 |
TBool TFoldedSortedDecompIterator::AtEnd() const |
|
430 |
{ |
|
431 |
return iRemaining == 0; |
|
432 |
} |
|
433 |
||
434 |
/** |
|
435 |
@internalComponent |
|
436 |
*/ |
|
437 |
TChar TFoldedSortedDecompIterator::Current() const |
|
438 |
{ |
|
439 |
ASSERT(!AtEnd()); |
|
440 |
return iCurrent.Current(); |
|
441 |
} |
|
442 |
||
443 |
/** |
|
444 |
@internalComponent |
|
445 |
*/ |
|
446 |
void TFoldedSortedDecompIterator::Next() |
|
447 |
{ |
|
448 |
ASSERT(!AtEnd()); |
|
449 |
--iRemaining; |
|
450 |
while (++iCurrentCount != iLength) |
|
451 |
{ |
|
452 |
iCurrent.Next(); |
|
453 |
if (::GetClass(iCurrent) == iCurrentClass) |
|
454 |
return; |
|
455 |
} |
|
456 |
// We have not found any more of the same class, so we will look |
|
457 |
// for the smallest one larger. |
|
458 |
TInt minClass = 256; |
|
459 |
TFoldedDecompIterator searcher(iStart); |
|
460 |
TInt searchCount = 0; |
|
461 |
while (searchCount != iLength) |
|
462 |
{ |
|
463 |
TInt cl = ::GetClass(searcher); |
|
464 |
if (iCurrentClass < cl && cl < minClass) |
|
465 |
{ |
|
466 |
minClass = cl; |
|
467 |
iCurrentCount = searchCount; |
|
468 |
iCurrent = searcher; |
|
469 |
} |
|
470 |
++searchCount; |
|
471 |
searcher.Next(); |
|
472 |
} |
|
473 |
iCurrentClass = minClass; |
|
474 |
ASSERT(minClass < 256 || AtEnd()); |
|
475 |
} |
|
476 |
||
477 |
//////////////////////////////////////////////////////////////////////////////////////////// |
|
478 |
// TFoldedCanonicalIterator |
|
479 |
//////////////////////////////////////////////////////////////////////////////////////////// |
|
480 |
||
481 |
/** |
|
482 |
@internalComponent |
|
483 |
*/ |
|
484 |
TFoldedCanonicalIterator::TFoldedCanonicalIterator(const TUTF32Iterator& a) |
|
485 |
{ |
|
486 |
iBase.Set(a); |
|
487 |
iSorted.Set(); |
|
488 |
if(!iBase.AtEnd()) |
|
489 |
{ |
|
490 |
iBase.EnterFoldedSequence(); |
|
491 |
if (iBase.Current().GetCombiningClass() != 0 || iBase.CurrentIsBaseFoldedFromCombiner()) |
|
492 |
{ |
|
493 |
iSorted.Set(iBase); |
|
494 |
} |
|
495 |
} |
|
496 |
} |
|
497 |
||
498 |
/** |
|
499 |
@internalComponent |
|
500 |
*/ |
|
501 |
TBool TFoldedCanonicalIterator::AtEnd() const |
|
502 |
{ |
|
503 |
return iSorted.AtEnd() && iBase.AtEnd(); |
|
504 |
} |
|
505 |
||
506 |
/** |
|
507 |
@internalComponent |
|
508 |
*/ |
|
509 |
TChar TFoldedCanonicalIterator::Current() const |
|
510 |
{ |
|
511 |
ASSERT(!iBase.AtEnd() || !iSorted.AtEnd()); |
|
512 |
return iSorted.AtEnd()? iBase.Current() : iSorted.Current(); |
|
513 |
} |
|
514 |
||
515 |
/** |
|
516 |
@internalComponent |
|
517 |
*/ |
|
518 |
void TFoldedCanonicalIterator::Next(const TUnicodeDataSet* aCharDataSet) |
|
519 |
{ |
|
520 |
ASSERT(!iBase.AtEnd() || !iSorted.AtEnd()); |
|
521 |
if (!iSorted.AtEnd()) |
|
522 |
{ |
|
523 |
iSorted.Next(); |
|
524 |
return; |
|
525 |
} |
|
526 |
iBase.Next(); |
|
527 |
if(!iBase.AtEnd()) |
|
528 |
{ |
|
529 |
iBase.EnterFoldedSequence(); |
|
530 |
if (TUnicode(TUint(iBase.Current())).GetCombiningClass(aCharDataSet) != 0 || iBase.CurrentIsBaseFoldedFromCombiner()) |
|
531 |
{ |
|
532 |
iSorted.Set(iBase); |
|
533 |
} |
|
534 |
} |
|
535 |
} |
|
536 |
||
537 |
//////////////////////////////////////////////////////////////////////////////////////////// |
|
538 |
// Folding - Global functions and structures |
|
539 |
//////////////////////////////////////////////////////////////////////////////////////////// |
|
540 |
||
541 |
/** |
|
542 |
@internalComponent |
|
543 |
*/ |
|
544 |
struct TEndTester |
|
545 |
{ |
|
546 |
typedef enum {EGenuine, EWildcard} TType; |
|
547 |
||
548 |
inline TEndTester(TType aType) : |
|
549 |
iType(aType) |
|
550 |
{ |
|
551 |
} |
|
552 |
||
553 |
inline TBool operator()(const TFoldedDecompIterator& a) const |
|
554 |
{ |
|
555 |
return iType == EGenuine ? a.AtEnd() : a.AtEndOrWildcard(); |
|
556 |
} |
|
557 |
||
558 |
private: |
|
559 |
TType iType; |
|
560 |
||
561 |
}; |
|
562 |
||
563 |
/** |
|
564 |
Consumes as much of aCandidate as matches aSearchTerm up to the next '?' or |
|
565 |
'*' wildcard or the end of aSearchTerm. |
|
566 |
It is assumed that the search term begins with a base character. |
|
567 |
Returns true if and only if the whole search term was matched |
|
568 |
with a whole number of UTF16s in the candidate string. |
|
569 |
On return of ETrue the candidate string iterator will have consumed the |
|
570 |
matching characters the search term will have had all its matching characters |
|
571 |
consumed. |
|
572 |
@internalComponent |
|
573 |
*/ |
|
574 |
TBool ConsumeFoldedMatch(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm, |
|
575 |
const TEndTester& aEndTester) |
|
576 |
{ |
|
577 |
TBool assumeBase = ETrue; |
|
578 |
TFoldedDecompIterator st(aSearchTerm); |
|
579 |
TFoldedDecompIterator cs(aCandidateString); |
|
580 |
while (!aEndTester(st)) |
|
581 |
{ |
|
582 |
if (cs.AtEnd()) |
|
583 |
return EFalse; |
|
584 |
if (st.Match(cs)) |
|
585 |
{ |
|
586 |
assumeBase = EFalse; |
|
587 |
if (aEndTester(st)) |
|
588 |
{ |
|
589 |
// We have a match... |
|
590 |
if (cs.IsInFoldedSequence()) |
|
591 |
// but it was against only part of a character |
|
592 |
// in the original string, so we fail. |
|
593 |
return EFalse; |
|
594 |
aCandidateString = cs.BaseIterator(); |
|
595 |
aSearchTerm = st.BaseIterator(); |
|
596 |
return ETrue; |
|
597 |
} |
|
598 |
continue; |
|
599 |
} |
|
600 |
// did not match. We need to re-order canonically. |
|
601 |
if (assumeBase) |
|
602 |
// The first characters did not match.. do not bother |
|
603 |
// to re-order. |
|
604 |
return EFalse; |
|
605 |
TFoldedSortedDecompIterator csc; |
|
606 |
TInt cscLength = csc.Set(cs); |
|
607 |
if (cscLength < 2) |
|
608 |
// If there are less than two characters to be reordered, |
|
609 |
// there is no hope of getting a match by re-ordering |
|
610 |
return EFalse; |
|
611 |
TFoldedSortedDecompIterator stc; |
|
612 |
if (cscLength != stc.Set(st)) |
|
613 |
// if the strings have differing numbers of characters, |
|
614 |
// there can be no match |
|
615 |
return EFalse; |
|
616 |
while (!stc.AtEnd()) |
|
617 |
{ |
|
618 |
ASSERT(!csc.AtEnd()); |
|
619 |
if (stc.Current() != csc.Current()) |
|
620 |
// well, we tried. |
|
621 |
return EFalse; |
|
622 |
stc.Next(); |
|
623 |
csc.Next(); |
|
624 |
} |
|
625 |
} |
|
626 |
// If the candidate string is in a folded sequence, then |
|
627 |
// we should not accept the match, as we require all matches |
|
628 |
// to be for a whole number of characters in the original string. |
|
629 |
if (cs.IsInFoldedSequence()) |
|
630 |
return EFalse; |
|
631 |
aCandidateString = cs.BaseIterator(); |
|
632 |
aSearchTerm = st.BaseIterator(); |
|
633 |
return ETrue; |
|
634 |
} |
|
635 |
||
636 |
/** |
|
637 |
@internalComponent |
|
638 |
*/ |
|
639 |
TBool ConsumeFoldedMatchWholeGraphemes(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm, |
|
640 |
const TEndTester& aEndTester) |
|
641 |
{ |
|
642 |
if (!::ConsumeFoldedMatch(aCandidateString, aSearchTerm, aEndTester)) |
|
643 |
return EFalse; |
|
644 |
return aCandidateString.AtEnd() || ::IsBaseCharacter(aCandidateString.Current()); |
|
645 |
} |
|
646 |
||
647 |
/** |
|
648 |
@internalComponent |
|
649 |
*/ |
|
650 |
static TBool ConsumeGrapheme(TUTF32Iterator& a) |
|
651 |
{ |
|
652 |
if (a.AtEnd()) |
|
653 |
return EFalse; |
|
654 |
a.Next(); |
|
655 |
while (!a.AtEnd() && !::IsBaseCharacter(a.Current())) |
|
656 |
a.Next(); |
|
657 |
return ETrue; |
|
658 |
} |
|
659 |
||
660 |
/** |
|
661 |
@internalComponent |
|
662 |
*/ |
|
663 |
TBool MatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm) |
|
664 |
{ |
|
665 |
TEndTester endTester(TEndTester::EWildcard); |
|
666 |
while(::ConsumeFoldedMatchWholeGraphemes(aCandidateString, aSearchTerm, endTester)) |
|
667 |
{ |
|
668 |
if (aSearchTerm.AtEnd() || aSearchTerm.Current() == '*') |
|
669 |
return ETrue; |
|
670 |
ASSERT(aSearchTerm.Current() == '?'); |
|
671 |
aSearchTerm.Next(); |
|
672 |
if (!::ConsumeGrapheme(aCandidateString)) |
|
673 |
return EFalse; |
|
674 |
} |
|
675 |
return EFalse; |
|
676 |
} |
|
677 |
||
678 |
/** |
|
679 |
@internalComponent |
|
680 |
*/ |
|
681 |
TBool FindMatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm) |
|
682 |
{ |
|
683 |
// match as many graphemes as there are leading ?s |
|
684 |
while (!aSearchTerm.AtEnd() |
|
685 |
&& aSearchTerm.Current() != '*' && aSearchTerm.Current() == '?') |
|
686 |
{ |
|
687 |
if (!::ConsumeGrapheme(aCandidateString)) |
|
688 |
return EFalse; |
|
689 |
aSearchTerm.Next(); |
|
690 |
} |
|
691 |
if (aSearchTerm.AtEnd() || aSearchTerm.Current() == '*') |
|
692 |
return ETrue; |
|
693 |
TChar firstCharOfSearchTerm = ::GetFirstFoldedChar(aSearchTerm.Current()); |
|
694 |
TUTF32Iterator savedST(aSearchTerm); |
|
695 |
while (aCandidateString.LocateFoldedBaseCharacter(firstCharOfSearchTerm)) |
|
696 |
{ |
|
697 |
TUTF32Iterator savedCS = aCandidateString; |
|
698 |
if (::MatchSectionFolded(aCandidateString, aSearchTerm)) |
|
699 |
return ETrue; |
|
700 |
aSearchTerm = savedST; |
|
701 |
aCandidateString = savedCS; |
|
702 |
aCandidateString.Next(); |
|
703 |
} |
|
704 |
return EFalse; |
|
705 |
} |
|
706 |
||
707 |
/** |
|
708 |
@internalComponent |
|
709 |
*/ |
|
710 |
TBool MatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd, |
|
711 |
const TText16* aSearchTermStart, const TText16* aSearchTermEnd) |
|
712 |
{ |
|
713 |
TUTF32Iterator candidate(aCandidateStringStart, aCandidateStringEnd); |
|
714 |
TUTF32Iterator searchTerm(aSearchTermStart, aSearchTermEnd); |
|
715 |
// First section of search term must match exactly at the start of the |
|
716 |
// candidate string. |
|
717 |
if (!::MatchSectionFolded(candidate, searchTerm)) |
|
718 |
return EFalse; |
|
719 |
||
720 |
// If there was only one section, it must match the whole candidate string. |
|
721 |
if (searchTerm.AtEnd()) |
|
722 |
return candidate.AtEnd(); |
|
723 |
||
724 |
while (!searchTerm.AtEnd()) |
|
725 |
{ |
|
726 |
searchTerm.Next(); |
|
727 |
if (!::FindMatchSectionFolded(candidate, searchTerm)) |
|
728 |
return EFalse; |
|
729 |
} |
|
730 |
||
731 |
// The last section must match exactly at the end of the candidate string. |
|
732 |
if (candidate.AtEnd()) // shortcut |
|
733 |
return ETrue; |
|
734 |
const TText16* searchTermLastSection = aSearchTermEnd; |
|
735 |
while (searchTermLastSection != aSearchTermStart |
|
736 |
&& searchTermLastSection[-1] != '*') |
|
737 |
--searchTermLastSection; |
|
738 |
if (searchTermLastSection == aSearchTermEnd) |
|
739 |
// last section is null, so trivially matches |
|
740 |
return ETrue; |
|
741 |
// Count graphemes by counting the number of base characters. |
|
742 |
// The first one is assumed to start a grapheme. |
|
743 |
TInt graphemeCount = 1; |
|
744 |
for (const TText16* s = searchTermLastSection + 1; s != aSearchTermEnd; ++s) |
|
745 |
{ |
|
746 |
if (::IsBaseCharacter(*s)) |
|
747 |
++graphemeCount; |
|
748 |
} |
|
749 |
// Count this many graphemes back in the candidate string |
|
750 |
const TText16* candidateLastSection = aCandidateStringEnd; |
|
751 |
while (graphemeCount != 0 |
|
752 |
&& candidateLastSection != aCandidateStringStart) |
|
753 |
{ |
|
754 |
if (::IsBaseCharacter(*--candidateLastSection)) |
|
755 |
--graphemeCount; |
|
756 |
} |
|
757 |
TUTF32Iterator last(candidateLastSection, aCandidateStringEnd); |
|
758 |
TUTF32Iterator st(searchTermLastSection, aSearchTermEnd); |
|
759 |
return ::MatchSectionFolded(last, st); |
|
760 |
} |
|
761 |
||
762 |
/** |
|
763 |
Implementation of MatchF |
|
764 |
(slow if there is a match: MatchStringFolded is better, but does not return |
|
765 |
the position of the match) |
|
766 |
@internalComponent |
|
767 |
*/ |
|
768 |
TInt LocateMatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd, |
|
769 |
const TText16* aSearchTermStart, const TText16* aSearchTermEnd) |
|
770 |
{ |
|
771 |
// Quick shortcut for simple non-match |
|
772 |
if (aSearchTermStart != aSearchTermEnd && *aSearchTermStart != '*') |
|
773 |
{ |
|
774 |
if (aCandidateStringStart == aCandidateStringEnd) |
|
775 |
return KErrNotFound; |
|
776 |
// We won't bother with non-characters and surrogate pairs. |
|
777 |
if (*aSearchTermStart != '?' |
|
778 |
&& *aSearchTermStart < 0xD800 |
|
779 |
&& *aCandidateStringStart < 0xD800 |
|
780 |
&& ::GetFirstFoldedChar(*aSearchTermStart) != ::GetFirstFoldedChar(*aCandidateStringStart)) |
|
781 |
return KErrNotFound; |
|
782 |
} |
|
783 |
if (!::MatchStringFolded(aCandidateStringStart, aCandidateStringEnd, |
|
784 |
aSearchTermStart, aSearchTermEnd)) |
|
785 |
return KErrNotFound; |
|
786 |
// find where it matches |
|
787 |
while (aSearchTermStart != aSearchTermEnd && *aSearchTermStart == '*') |
|
788 |
++aSearchTermStart; |
|
789 |
const TText16* end = aSearchTermStart; |
|
790 |
while (end != aSearchTermEnd && *end != '*') |
|
791 |
++end; |
|
792 |
// To preserve existing behaviour, a search term consisting of nothing |
|
793 |
// but stars is considered to match at 0. |
|
794 |
if (end == aSearchTermStart) |
|
795 |
return 0; |
|
796 |
for (const TText16* csSection = aCandidateStringStart; |
|
797 |
csSection <= aCandidateStringEnd; ++csSection) |
|
798 |
{ |
|
799 |
TUTF32Iterator cs(csSection, aCandidateStringEnd); |
|
800 |
TUTF32Iterator st(aSearchTermStart, end); |
|
801 |
if (::MatchSectionFolded(cs, st)) |
|
802 |
{ |
|
803 |
// If this match must match exactly at the end, we must keep |
|
804 |
// going. |
|
805 |
// This could be a lot faster, with some optimizations. |
|
806 |
if (end == aSearchTermEnd) |
|
807 |
{ |
|
808 |
if (!cs.AtEnd()) |
|
809 |
continue; |
|
810 |
} |
|
811 |
return csSection - aCandidateStringStart; |
|
812 |
} |
|
813 |
// Because this function is using TUTF32Iterator, which means the |
|
814 |
// original author want to support surrogate. Take it as a defect and |
|
815 |
// fix it, while do not define a new LocateMatchStringFoldedSurrogate(). |
|
90
947f0dc9f7a8
Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
816 |
if (TChar::IsSurrogate(*csSection)) |
0 | 817 |
++csSection; |
818 |
} |
|
819 |
// this should never happen! |
|
820 |
ASSERT(0); |
|
821 |
return KErrNotFound; |
|
822 |
} |
|
823 |
||
824 |
/** |
|
825 |
Implementation of FindF |
|
826 |
@internalComponent |
|
827 |
*/ |
|
828 |
TInt FindFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm) |
|
829 |
{ |
|
830 |
//Empty aSearchTerm string? - Then return 0 - keep the new implementation functionally |
|
831 |
//compatible with the old one. |
|
832 |
if(aSearchTerm.Length() == 0) |
|
833 |
{ |
|
834 |
return 0; |
|
835 |
} |
|
836 |
const TText16* candidateStartPosition = aCandidateString.CurrentPosition(); |
|
837 |
TChar firstCharOfSearchTerm = ::GetFirstFoldedChar(aSearchTerm.Current()); |
|
838 |
TUTF32Iterator savedST(aSearchTerm); |
|
839 |
while (aCandidateString.LocateFoldedBaseCharacter(firstCharOfSearchTerm)) |
|
840 |
{ |
|
841 |
TUTF32Iterator savedCS = aCandidateString; |
|
842 |
TEndTester endTester(TEndTester::EGenuine); |
|
843 |
if (::ConsumeFoldedMatchWholeGraphemes(aCandidateString, aSearchTerm, endTester)) |
|
844 |
return savedCS.CurrentPosition() - candidateStartPosition; |
|
845 |
aSearchTerm = savedST; |
|
846 |
aCandidateString = savedCS; |
|
847 |
aCandidateString.Next(); |
|
848 |
} |
|
849 |
return KErrNotFound; |
|
850 |
} |
|
851 |
||
852 |
/** |
|
853 |
Implementation of boolean CompareF |
|
854 |
@internalComponent |
|
855 |
*/ |
|
856 |
TBool EqualsFolded(TUTF32Iterator& aLeft, TUTF32Iterator& aRight) |
|
857 |
{ |
|
858 |
TEndTester endTester(TEndTester::EGenuine); |
|
859 |
if (::ConsumeFoldedMatchWholeGraphemes(aLeft, aRight, endTester)) |
|
860 |
return aLeft.AtEnd(); |
|
861 |
return EFalse; |
|
862 |
} |
|
863 |
||
864 |
/** |
|
865 |
Implementation of tri-state CompareF |
|
866 |
@internalComponent |
|
867 |
*/ |
|
868 |
TInt CompareFolded(const TUTF32Iterator& aLeft, const TUTF32Iterator& aRight) |
|
869 |
{ |
|
870 |
TFoldedCanonicalIterator left(aLeft); |
|
871 |
TFoldedCanonicalIterator right(aRight); |
|
872 |
||
873 |
const TUnicodeDataSet* charDataSet = GetLocaleCharSet()->iCharDataSet; |
|
874 |
||
875 |
while (!left.AtEnd()) |
|
876 |
{ |
|
877 |
if (right.AtEnd()) |
|
878 |
return 1; |
|
879 |
TChar cr = right.Current(); |
|
880 |
TChar cl = left.Current(); |
|
881 |
if (cr != cl) |
|
882 |
return cl - cr; |
|
883 |
right.Next(charDataSet); |
|
884 |
left.Next(charDataSet); |
|
885 |
} |
|
886 |
return right.AtEnd()? 0 : -1; |
|
887 |
} |
|
888 |
||
889 |
//////////////////////////////////////////////////////////////////////////////////////////// |
|
890 |
// Composition/Decomposition - Global functions and structures |
|
891 |
//////////////////////////////////////////////////////////////////////////////////////////// |
|
892 |
||
893 |
/** |
|
894 |
@internalComponent |
|
895 |
*/ |
|
896 |
static TUTF32Iterator IndexToNonSingletonDecomposition(TInt aIndex) |
|
897 |
{ |
|
898 |
const TText* start = KNonSingletonDecompositions + aIndex * 2; |
|
899 |
if (*start != KLongD) |
|
900 |
return TUTF32Iterator(start, start + 2, TUTF32Iterator::EStartsWithValidCharacter); |
|
901 |
TInt longDecompIndex = start[1]; |
|
902 |
start = KLongDecompositions + (longDecompIndex & 0xFFF); |
|
903 |
return TUTF32Iterator(start, start + (longDecompIndex >> 12) + 3, TUTF32Iterator::EStartsWithValidCharacter); |
|
904 |
} |
|
905 |
||
906 |
/** |
|
907 |
Come up with a decomposition for the current value pointed at by the iterator |
|
908 |
@internalComponent |
|
909 |
*/ |
|
910 |
static TBool Decompose(const TUTF32Iterator& aUTF32It, TUTF32Iterator& aOutIt) |
|
911 |
{ |
|
912 |
TInt index = ::GetDecompositionIndex(aUTF32It.Current()); |
|
913 |
TInt singletonIndex = index - (sizeof(KNonSingletonDecompositions)/sizeof(KNonSingletonDecompositions[0])/2); |
|
914 |
const TInt SizeOfSingletonTable = sizeof(KSingletonDecompositions)/sizeof(KSingletonDecompositions[0]); |
|
915 |
if(index < 0 || SizeOfSingletonTable <= singletonIndex) |
|
916 |
{ |
|
917 |
aOutIt = aUTF32It.CurrentAsIterator(); |
|
918 |
return EFalse;//There is not any valid decomposition |
|
919 |
} |
|
920 |
if(0 <= singletonIndex) |
|
921 |
{ |
|
922 |
// KSingletonDecompositions contains some items that come from "ShortDecompsLongFolds". |
|
923 |
// "ShortDecompsLongFolds" contains some characters that have no composition, but have "long fold". |
|
924 |
// This basically specific to non-BMP characters with fold also outside BMP. |
|
925 |
// For example: |
|
926 |
// 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428; |
|
927 |
// In Unicode 5.0.0, totally, there are 40 similar characters, which are U+10400-U+10427. |
|
928 |
if (KSingletonDecompositions[singletonIndex] == 0xFFFF) |
|
929 |
return EFalse; |
|
930 |
aOutIt = TUTF32Iterator(KSingletonDecompositions + singletonIndex); |
|
931 |
} |
|
932 |
else |
|
933 |
{ |
|
934 |
aOutIt = ::IndexToNonSingletonDecomposition(index); |
|
935 |
} |
|
936 |
return ETrue;//Valid decomposition |
|
937 |
} |
|
938 |
||
939 |
/** |
|
940 |
@internalComponent |
|
941 |
*/ |
|
942 |
static TUTF32Iterator CharToUTF32Iterator(TChar aChar, TDes16& aBuf) |
|
943 |
{ |
|
944 |
aBuf.Zero(); |
|
945 |
if (aChar < 0x10000) |
|
946 |
aBuf.Append(aChar); |
|
947 |
else |
|
948 |
{ |
|
949 |
aBuf.Append((aChar >> 10) + 0xD7C0); |
|
950 |
aBuf.Append((aChar & 0x3FF) + 0xDC00); |
|
951 |
} |
|
952 |
const TText16* t = aBuf.Ptr(); |
|
953 |
return TUTF32Iterator(t, t + aBuf.Length()); |
|
954 |
} |
|
955 |
||
956 |
/** |
|
957 |
@internalComponent |
|
958 |
*/ |
|
959 |
TBool DecomposeChar(TChar aCh, TPtrC16& aResult) |
|
960 |
{ |
|
961 |
aResult.Set(NULL, 0); |
|
962 |
TBuf16<2> srcBuf; |
|
963 |
TUTF32Iterator it = ::CharToUTF32Iterator(aCh, srcBuf); |
|
964 |
TBool res = ::Decompose(it, it); |
|
965 |
if(res) |
|
966 |
{ |
|
967 |
aResult.Set(it.CurrentPosition(), it.Length()); |
|
968 |
} |
|
969 |
return res; |
|
970 |
} |
|
971 |
||
972 |
/** |
|
973 |
Turn an index into the hash table known to point to a non-singleton |
|
974 |
decomposition into that decomposition. |
|
975 |
@internalComponent |
|
976 |
*/ |
|
977 |
static TUTF32Iterator HashIndexToDecomposition(TInt aIndex) |
|
978 |
{ |
|
979 |
TUint32 v = KUnicodeToIndexHash[aIndex]; |
|
980 |
ASSERT(v != 0); |
|
981 |
TInt index = static_cast<TInt>(v >> 20); |
|
982 |
ASSERT(index < ARRAY_LENGTH(KNonSingletonDecompositions)/2); |
|
983 |
return ::IndexToNonSingletonDecomposition(index); |
|
984 |
} |
|
985 |
||
986 |
/** |
|
987 |
Takes a start and (one past the) end index into KCompostionMapping |
|
988 |
and a number of UTF16 characters (aLengthSoFar). All of the compositions |
|
989 |
within the range must have their first aLengthSoFar UTF16 characters |
|
990 |
matching. |
|
991 |
||
992 |
On entry, if aStart == aEnd then there is no possibility of a match so return |
|
993 |
immediately with EFalse. To continue, aStart must be strictly less than aEnd. |
|
994 |
||
995 |
Afterwards, aStart and aEnd will be narrowed to all those compositions |
|
996 |
where the aLengthSoFar'th UTF16 character matches aNextCharacter. |
|
997 |
No further compositions existing is indicated by aStart == aEnd. |
|
998 |
||
999 |
@return ETrue if the composition at aStart is exactly of length aLengthSoFar + 1. |
|
1000 |
@internalComponent |
|
1001 |
*/ |
|
1002 |
static TBool RefineComposition(TInt& aStart, TInt& aEnd, TInt aLengthSoFar, TInt aNextCharacter) |
|
1003 |
{ |
|
1004 |
if (aStart == aEnd) |
|
1005 |
return EFalse; |
|
1006 |
ASSERT((TUint)aStart < (TUint)aEnd); |
|
1007 |
ASSERT((TUint)aEnd <= (TUint)ARRAY_LENGTH(KCompositionMapping)); |
|
1008 |
TUTF32Iterator startIterator(::HashIndexToDecomposition(KCompositionMapping[aStart])); |
|
1009 |
if (startIterator.Length() == aLengthSoFar) |
|
1010 |
++aStart; |
|
1011 |
||
1012 |
// Find a single example of a decomposition that is suitable |
|
1013 |
TInt mid; |
|
1014 |
TUTF32Iterator midIt; |
|
1015 |
for (;;) |
|
1016 |
{ |
|
1017 |
if (aStart == aEnd) |
|
1018 |
return EFalse; |
|
1019 |
mid = aStart + ((aEnd - aStart) >> 1); |
|
1020 |
midIt = ::HashIndexToDecomposition(KCompositionMapping[mid]); |
|
1021 |
ASSERT(aLengthSoFar < midIt.Length()); |
|
1022 |
TInt midItChar = midIt[aLengthSoFar]; |
|
1023 |
if (midItChar < aNextCharacter) |
|
1024 |
aStart = mid + 1; |
|
1025 |
else if (aNextCharacter < midItChar) |
|
1026 |
aEnd = mid; |
|
1027 |
else |
|
1028 |
{ |
|
1029 |
startIterator = midIt; |
|
1030 |
break; |
|
1031 |
} |
|
1032 |
} |
|
1033 |
||
1034 |
// FInd the first decomposition that does not match |
|
1035 |
TInt start2 = mid + 1; |
|
1036 |
while (start2 != aEnd) |
|
1037 |
{ |
|
1038 |
ASSERT(start2 < aEnd); |
|
1039 |
TInt mid2 = start2 + ((aEnd - start2) >> 1); |
|
1040 |
midIt = ::HashIndexToDecomposition(KCompositionMapping[mid2]); |
|
1041 |
ASSERT(aLengthSoFar < midIt.Length()); |
|
1042 |
TInt midItChar = midIt[aLengthSoFar]; |
|
1043 |
ASSERT(aNextCharacter <= midItChar); |
|
1044 |
if (aNextCharacter < midItChar) |
|
1045 |
aEnd = mid2; |
|
1046 |
else |
|
1047 |
start2 = mid2 + 1; |
|
1048 |
} |
|
1049 |
||
1050 |
// Find the first decomposition that matches |
|
1051 |
while (aStart != mid) |
|
1052 |
{ |
|
1053 |
ASSERT(aStart < mid); |
|
1054 |
TInt mid2 = aStart + ((mid - aStart) >> 1); |
|
1055 |
midIt = ::HashIndexToDecomposition(KCompositionMapping[mid2]); |
|
1056 |
ASSERT(aLengthSoFar < midIt.Length()); |
|
1057 |
TInt midItChar = midIt[aLengthSoFar]; |
|
1058 |
ASSERT(midItChar <= aNextCharacter); |
|
1059 |
if (midItChar < aNextCharacter) |
|
1060 |
aStart = mid2 + 1; |
|
1061 |
else |
|
1062 |
{ |
|
1063 |
startIterator = midIt; |
|
1064 |
mid = mid2; |
|
1065 |
} |
|
1066 |
} |
|
1067 |
||
1068 |
return startIterator.Length() == (aLengthSoFar + 1); |
|
1069 |
} |
|
1070 |
||
1071 |
/** |
|
1072 |
@internalComponent |
|
1073 |
*/ |
|
1074 |
static TBool RefineCompositionUTF32(TInt& aStart, TInt& aEnd, TInt& aLengthSoFar, TChar aChar) |
|
1075 |
{ |
|
1076 |
if (aChar < 0x10000) |
|
1077 |
return ::RefineComposition(aStart, aEnd, aLengthSoFar++, aChar); |
|
1078 |
::RefineComposition(aStart, aEnd, aLengthSoFar++, (aChar >> 10) + 0xD7C0); |
|
1079 |
if (aStart == aEnd) |
|
1080 |
return EFalse; |
|
1081 |
return ::RefineComposition(aStart, aEnd, aLengthSoFar++, (aChar & 0x3FF) + 0xDC00); |
|
1082 |
} |
|
1083 |
||
1084 |
/** |
|
1085 |
Combine as many of the characters presented as possible into a single |
|
1086 |
character. |
|
1087 |
@return The number of characters successfully combined. |
|
1088 |
@param aCombined If a nonzero value is returned, this contains |
|
1089 |
the character that is that number of characters from the start of |
|
1090 |
aDes combined. |
|
1091 |
@internalComponent |
|
1092 |
*/ |
|
1093 |
TInt CombineAsMuchAsPossible(const TDesC16& aDes, TChar& aCombined) |
|
1094 |
{ |
|
1095 |
TInt start = 0; |
|
1096 |
TInt end = sizeof(KCompositionMapping)/sizeof(KCompositionMapping[0]); |
|
1097 |
TInt length = 0; |
|
1098 |
TInt bestIndex = 0; |
|
1099 |
TInt bestLength = 0; |
|
1100 |
const TText16* ptr = aDes.Ptr(); |
|
1101 |
TUTF32Iterator input(ptr, ptr + aDes.Length()); |
|
1102 |
while (!input.AtEnd()) |
|
1103 |
{ |
|
1104 |
if (::RefineCompositionUTF32(start, end, length, input.Current())) |
|
1105 |
{ |
|
1106 |
bestIndex = start; |
|
1107 |
bestLength = length; |
|
1108 |
} |
|
1109 |
input.Next(); |
|
1110 |
} |
|
1111 |
if (bestLength == 0) |
|
1112 |
return 0; |
|
1113 |
aCombined = KUnicodeToIndexHash[KCompositionMapping[bestIndex]] & 0xFFFFF; |
|
1114 |
return bestLength; |
|
1115 |
} |
|
1116 |
||
1117 |
////////////////////////////////////////////////////////////////////////////////////////////// |
|
1118 |
// COLLATION |
|
1119 |
////////////////////////////////////////////////////////////////////////////////////////////// |
|
1120 |
||
1121 |
////////////////////////////////////////////////////////////////////////////////////////////// |
|
1122 |
// TDecompositionIterator class |
|
1123 |
||
1124 |
/** |
|
1125 |
@internalComponent |
|
1126 |
*/ |
|
1127 |
void TDecompositionIterator::Set(const TUTF32Iterator& a) |
|
1128 |
{ |
|
1129 |
iBase = a; |
|
1130 |
if (!iBase.AtEnd()) |
|
1131 |
{ |
|
1132 |
(void)::Decompose(iBase, iDecomposition); |
|
1133 |
} |
|
1134 |
} |
|
1135 |
||
1136 |
/** |
|
1137 |
@internalComponent |
|
1138 |
*/ |
|
1139 |
TDecompositionIterator::TDecompositionIterator(const TUTF32Iterator& a) |
|
1140 |
{ |
|
1141 |
Set(a); |
|
1142 |
} |
|
1143 |
||
1144 |
/** |
|
1145 |
@internalComponent |
|
1146 |
*/ |
|
1147 |
TBool TDecompositionIterator::AtEnd() const |
|
1148 |
{ |
|
1149 |
return iBase.AtEnd(); |
|
1150 |
} |
|
1151 |
||
1152 |
/** |
|
1153 |
@internalComponent |
|
1154 |
*/ |
|
1155 |
TChar TDecompositionIterator::Current() const |
|
1156 |
{ |
|
1157 |
return iDecomposition.Current(); |
|
1158 |
} |
|
1159 |
||
1160 |
/** |
|
1161 |
@internalComponent |
|
1162 |
*/ |
|
1163 |
void TDecompositionIterator::Next() |
|
1164 |
{ |
|
1165 |
ASSERT(!iBase.AtEnd() && !iDecomposition.AtEnd()); |
|
1166 |
iDecomposition.Next(); |
|
1167 |
if (!iDecomposition.AtEnd()) |
|
1168 |
return; |
|
1169 |
iBase.Next(); |
|
1170 |
if (!iBase.AtEnd()) |
|
1171 |
{ |
|
1172 |
(void)::Decompose(iBase, iDecomposition); |
|
1173 |
} |
|
1174 |
} |
|
1175 |
||
1176 |
/** |
|
1177 |
@internalComponent |
|
1178 |
*/ |
|
1179 |
const TText16* TDecompositionIterator::CurrentPosition() const |
|
1180 |
{ |
|
1181 |
return iBase.CurrentPosition(); |
|
1182 |
} |
|
1183 |
||
1184 |
/** |
|
1185 |
Find out the length and minimum combining class of |
|
1186 |
the current run of characters of nonzero combining class. |
|
1187 |
aMinClass and aMinClassPos are not written to if the return |
|
1188 |
value is 0. |
|
1189 |
aEndOfRun is written to with the final position of the iteration |
|
1190 |
if 0 is returned and aEndOfRun is non-null |
|
1191 |
@internalComponent |
|
1192 |
*/ |
|
1193 |
static TInt ReorderingRun(TInt& aMinClass, TDecompositionIterator& aMinClassPos, |
|
1194 |
const TDecompositionIterator& aStart, TBool* aOpenSequence, |
|
1195 |
TInt aMaxDisallowedClass = 0, TDecompositionIterator* aEndOfRun = 0) |
|
1196 |
{ |
|
1197 |
TInt comclass = aStart.AtEnd()? 0 : aStart.Current().GetCombiningClass(); |
|
1198 |
if (comclass == 0) |
|
1199 |
{ |
|
1200 |
if (aEndOfRun) |
|
1201 |
*aEndOfRun = aStart; |
|
1202 |
if (aOpenSequence) |
|
1203 |
*aOpenSequence = aStart.AtEnd(); |
|
1204 |
return 0; |
|
1205 |
} |
|
1206 |
aMinClass = 256; |
|
1207 |
TDecompositionIterator i = aStart; |
|
1208 |
TInt count = 0; |
|
1209 |
while (comclass != 0) |
|
1210 |
{ |
|
1211 |
if (aMaxDisallowedClass < comclass) |
|
1212 |
{ |
|
1213 |
if (comclass < aMinClass) |
|
1214 |
{ |
|
1215 |
aMinClass = comclass; |
|
1216 |
aMinClassPos = i; |
|
1217 |
} |
|
1218 |
++count; |
|
1219 |
} |
|
1220 |
i.Next(); |
|
1221 |
comclass = i.AtEnd()? 0 : i.Current().GetCombiningClass(); |
|
1222 |
} |
|
1223 |
if (count == 0 && aEndOfRun) |
|
1224 |
*aEndOfRun = i; |
|
1225 |
if (aOpenSequence) |
|
1226 |
*aOpenSequence = i.AtEnd(); |
|
1227 |
return count; |
|
1228 |
} |
|
1229 |
||
1230 |
////////////////////////////////////////////////////////////////////////////////////////////// |
|
1231 |
// TCanonicalDecompositionIterator class |
|
1232 |
||
1233 |
/** |
|
1234 |
@internalComponent |
|
1235 |
*/ |
|
1236 |
void TCanonicalDecompositionIterator::Set(const TUTF32Iterator& a) |
|
1237 |
{ |
|
1238 |
iBase.Set(a); |
|
1239 |
iLastPosition = 0; |
|
1240 |
if (ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, &iInOpenSequence) < 2) |
|
1241 |
iCurrentCombiningClass = 0; |
|
1242 |
} |
|
1243 |
||
1244 |
/** |
|
1245 |
@internalComponent |
|
1246 |
*/ |
|
1247 |
TBool TCanonicalDecompositionIterator::AtEnd() const |
|
1248 |
{ |
|
1249 |
return iBase.AtEnd(); |
|
1250 |
} |
|
1251 |
||
1252 |
/** |
|
1253 |
@internalComponent |
|
1254 |
*/ |
|
1255 |
TChar TCanonicalDecompositionIterator::Current() const |
|
1256 |
{ |
|
1257 |
return iCurrentCombiningClass? iCurrent.Current() : iBase.Current(); |
|
1258 |
} |
|
1259 |
||
1260 |
/** |
|
1261 |
@internalComponent |
|
1262 |
*/ |
|
1263 |
void TCanonicalDecompositionIterator::Next() |
|
1264 |
{ |
|
1265 |
iLastPosition = iBase.CurrentPosition(); |
|
1266 |
if (iCurrentCombiningClass == 0) |
|
1267 |
{ |
|
1268 |
iBase.Next(); |
|
1269 |
if (ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, &iInOpenSequence) < 2) |
|
1270 |
iCurrentCombiningClass = 0; |
|
1271 |
return; |
|
1272 |
} |
|
1273 |
// Find the next character in the run with the same combining class |
|
1274 |
iCurrent.Next(); |
|
1275 |
TInt curclass = iCurrent.AtEnd()? 0 : iCurrent.Current().GetCombiningClass(); |
|
1276 |
while (curclass != 0) |
|
1277 |
{ |
|
1278 |
if (curclass == iCurrentCombiningClass) |
|
1279 |
// success |
|
1280 |
return; |
|
1281 |
iCurrent.Next(); |
|
1282 |
curclass = iCurrent.AtEnd()? 0 : iCurrent.Current().GetCombiningClass(); |
|
1283 |
} |
|
1284 |
// There are none left in the current class. Find out what the next one is. |
|
1285 |
if (0 == ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, 0, iCurrentCombiningClass, &iBase)) |
|
1286 |
iCurrentCombiningClass = 0; |
|
1287 |
} |
|
1288 |
||
1289 |
/** |
|
1290 |
@internalComponent |
|
1291 |
*/ |
|
1292 |
const TText16* TCanonicalDecompositionIterator::CurrentPositionIfAtCharacter() const |
|
1293 |
{ |
|
1294 |
if (iCurrentCombiningClass != 0) |
|
1295 |
return 0; |
|
1296 |
const TText16* p = iBase.CurrentPosition(); |
|
1297 |
return iLastPosition == p? 0 : p; |
|
1298 |
} |
|
1299 |
||
1300 |
/** |
|
1301 |
@internalComponent |
|
1302 |
*/ |
|
1303 |
TBool TCanonicalDecompositionIterator::IsInOpenSequence() const |
|
1304 |
{ |
|
1305 |
return iInOpenSequence; |
|
1306 |
} |
|
1307 |
||
1308 |
////////////////////////////////////////////////////////////////////////////////////////////// |
|
1309 |
// TCanonicalDecompositionIteratorCached class |
|
1310 |
||
1311 |
/** |
|
1312 |
@internalComponent |
|
1313 |
*/ |
|
1314 |
void TCanonicalDecompositionIteratorCached::Set(const TUTF32Iterator& a) |
|
1315 |
{ |
|
1316 |
iBase.Set(a); |
|
1317 |
iCacheStart = 0; |
|
1318 |
iCacheSize = 0; |
|
1319 |
} |
|
1320 |
||
1321 |
/** |
|
1322 |
@internalComponent |
|
1323 |
*/ |
|
1324 |
TBool TCanonicalDecompositionIteratorCached::AtEnd() const |
|
1325 |
{ |
|
1326 |
return iCacheSize == 0 && iBase.AtEnd(); |
|
1327 |
} |
|
1328 |
||
1329 |
/** |
|
1330 |
@internalComponent |
|
1331 |
*/ |
|
1332 |
void TCanonicalDecompositionIteratorCached::Next(TInt aOffset) |
|
1333 |
{ |
|
1334 |
ASSERT(0 <= aOffset); |
|
1335 |
ASSERT(0 <= iCacheSize); |
|
1336 |
ASSERT(0 != iCacheSize || !iBase.AtEnd()); |
|
1337 |
if (aOffset <= iCacheSize) |
|
1338 |
{ |
|
1339 |
iCacheSize -= aOffset; |
|
1340 |
iCacheStart = (iCacheStart + aOffset) & (KMaxLookAhead - 1); |
|
1341 |
return; |
|
1342 |
} |
|
1343 |
aOffset -= iCacheSize; |
|
1344 |
iCacheSize = 0; |
|
1345 |
while (aOffset != 0) |
|
1346 |
{ |
|
1347 |
iBase.Next(); |
|
1348 |
--aOffset; |
|
1349 |
} |
|
1350 |
} |
|
1351 |
||
1352 |
/** |
|
1353 |
Get the character at the position of the iterator plus aOffset steps. |
|
1354 |
Returns -1 if we are looking too far ahead. |
|
1355 |
@internalComponent |
|
1356 |
*/ |
|
1357 |
TChar TCanonicalDecompositionIteratorCached::Get(TInt aOffset) |
|
1358 |
{ |
|
1359 |
// should be assert debug: there is a chance this could go off with |
|
1360 |
// bad collation tables |
|
1361 |
ASSERT(aOffset <= KMaxLookAhead); |
|
1362 |
while (iCacheSize <= aOffset) |
|
1363 |
{ |
|
1364 |
if (iBase.AtEnd()) |
|
1365 |
return TChar(static_cast <TUint> (-1)); |
|
1366 |
TInt cachePos = (iCacheStart + iCacheSize) & (KMaxLookAhead - 1); |
|
1367 |
iCache[cachePos].iChar = iBase.Current(); |
|
1368 |
iCache[cachePos].iPos = iBase.CurrentPositionIfAtCharacter(); |
|
1369 |
++iCacheSize; |
|
1370 |
iBase.Next(); |
|
1371 |
} |
|
1372 |
return iCacheSize == aOffset? iBase.Current() : iCache[(iCacheStart + aOffset) & (KMaxLookAhead - 1)].iChar; |
|
1373 |
} |
|
1374 |
||
1375 |
/** |
|
1376 |
If the current position in the original string is representable |
|
1377 |
as a pointer into it and we know what it is, return it. |
|
1378 |
@internalComponent |
|
1379 |
*/ |
|
1380 |
const TText16* TCanonicalDecompositionIteratorCached::CurrentPositionIfAtCharacter() const |
|
1381 |
{ |
|
1382 |
if(iCacheSize == 0) |
|
1383 |
{ |
|
1384 |
return iBase.CurrentPositionIfAtCharacter(); |
|
1385 |
} |
|
1386 |
return iCache[iCacheStart & (KMaxLookAhead - 1)].iPos; |
|
1387 |
} |
|
1388 |