|
1 // Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of the License "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // The Unicode collation system. |
|
15 // |
|
16 // |
|
17 |
|
18 #include "collateimp.h" |
|
19 #include "foldtable.inl" |
|
20 |
|
21 #include <collate.h> |
|
22 #include <unicode.h> |
|
23 #include "u32std.h" |
|
24 |
|
25 // maximum size of string which has its own sort key |
|
26 // 16 instead of 8, in case all supplementary characters |
|
27 static const TInt KKeyedStringBufferSize = 16; |
|
28 |
|
29 |
|
30 inline TText16 GetHighSurrogate(TUint aChar) |
|
31 /** |
|
32 Retrieve the high surrogate of a supplementary character. |
|
33 |
|
34 @param aChar The 32-bit code point value of a Unicode character. |
|
35 |
|
36 @return High surrogate of aChar, if aChar is a supplementary character; |
|
37 aChar itself, if aChar is not a supplementary character. |
|
38 */ |
|
39 { |
|
40 return STATIC_CAST(TText16, 0xD7C0 + (aChar >> 10)); |
|
41 } |
|
42 |
|
43 inline TText16 GetLowSurrogate(TUint aChar) |
|
44 /** |
|
45 Retrieve the low surrogate of a supplementary character. |
|
46 |
|
47 @param aChar The 32-bit code point value of a Unicode character. |
|
48 |
|
49 @return Low surrogate of aChar, if aChar is a supplementary character; |
|
50 zero, if aChar is not a supplementary character. |
|
51 */ |
|
52 { |
|
53 return STATIC_CAST(TText16, 0xDC00 | (aChar & 0x3FF)); |
|
54 } |
|
55 |
|
56 inline TUint JoinSurrogate(TText16 aHighSurrogate, TText16 aLowSurrogate) |
|
57 /** |
|
58 Combine a high surrogate and a low surrogate into a supplementary character. |
|
59 |
|
60 @return The 32-bit code point value of the generated Unicode supplementary |
|
61 character. |
|
62 */ |
|
63 { |
|
64 return ((aHighSurrogate - 0xD7F7) << 10) + aLowSurrogate; |
|
65 } |
|
66 |
|
67 // Creates a one or two collation keys sequence corresponding to the input character. |
|
68 // Returns the number of keys output. |
|
69 static TInt CreateDefaultCollationKeySequence(TInt aChar, TCollationKey* aBuffer) |
|
70 { |
|
71 if (aChar >= 0x3400 && aChar <= 0x9FFF) // CJK |
|
72 { |
|
73 aBuffer[0].iLow = (TUint32)aChar << 16 | 0x0105; |
|
74 aBuffer[0].iHigh = aChar; |
|
75 return 1;//Collation key sequence consists of 1 key |
|
76 } |
|
77 aBuffer[0].iLow = 0xFF800000 | ((aChar << 1) & 0x3F0000) | 0x0104; // no stop bit |
|
78 aBuffer[0].iHigh = 1; |
|
79 aBuffer[1].iLow = (aChar << 16) | 0x80000105; // stop bit |
|
80 aBuffer[1].iHigh = 0; |
|
81 return 2;//Collation key sequence consists of 2 keys |
|
82 } |
|
83 |
|
84 // Finds a character's key in the main index, or returns -1 if it is not there |
|
85 static TInt FindCollationKeyIndex(TInt aChar, const TCollationKeyTable& aTable) |
|
86 { |
|
87 TInt n = aTable.iIndices; |
|
88 const TUint32 *base = aTable.iIndex; |
|
89 const TUint32 *start = base; |
|
90 const TUint32 *end = aTable.iIndex + n - 1; |
|
91 const TUint32 *p = base; |
|
92 TInt currentCharLength = 0; |
|
93 |
|
94 while (n > 0) |
|
95 { |
|
96 TInt pivot = n / 2; |
|
97 p += pivot; |
|
98 if ((p < start) || (p > end)) |
|
99 { |
|
100 break; |
|
101 } |
|
102 TInt c = *p >> 16; |
|
103 if (IsHighSurrogate( (TText16)c )) |
|
104 { |
|
105 if ((p < end) && (IsLowSurrogate( (TText16)((*(p+1))>>16) ))) |
|
106 { |
|
107 currentCharLength = 2; |
|
108 c = JoinSurrogate( (TText16)(*p>>16), (TText16)((*(p+1))>>16) ); |
|
109 } |
|
110 } |
|
111 else if (IsLowSurrogate( (TText16)c )) |
|
112 { |
|
113 if ((p > start) && (IsHighSurrogate( (TText16)((*(p-1))>>16) ))) |
|
114 { |
|
115 p--; |
|
116 pivot = pivot - 1; |
|
117 currentCharLength = 2; |
|
118 c = JoinSurrogate( (TText16)(*p>>16), (TText16)((*(p+1))>>16) ); |
|
119 } |
|
120 } |
|
121 else |
|
122 { |
|
123 currentCharLength = 1; |
|
124 } |
|
125 if (aChar == c) // found it |
|
126 { |
|
127 return *p & 0xFFFF; |
|
128 } |
|
129 if (aChar < c) // it's before |
|
130 { |
|
131 n = pivot; |
|
132 } |
|
133 else // it's after |
|
134 { |
|
135 ASSERT(currentCharLength != 0); |
|
136 base = p + currentCharLength; |
|
137 n -= pivot + currentCharLength; |
|
138 } |
|
139 p = base; |
|
140 } |
|
141 return -1; |
|
142 } |
|
143 |
|
144 static void ProcessKeys(TUint32& aKey1, TUint32& aKey2, TUint aFlags) |
|
145 { |
|
146 if (aFlags & TCollationMethod::EFoldCase) |
|
147 { |
|
148 static const TUint case_fold_table[21] = |
|
149 { 0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x2, 0x3, 0x4, 0x5, 0x6, |
|
150 0xD, 0xE, 0xF, 0x10, 0x11, 0x12, 0x13, 0x14 }; |
|
151 aKey1 = case_fold_table[aKey1]; |
|
152 aKey2 = case_fold_table[aKey2]; |
|
153 } |
|
154 if (aFlags & TCollationMethod::ESwapCase) |
|
155 { |
|
156 static const TUint case_swap_table[21] = |
|
157 { 0, 0x1, 0x8, 0x9, 0xA, 0xB, 0xC, 0x7, 0x2, 0x3, 0x4, 0x5, 0x6, |
|
158 0xD, 0xE, 0xF, 0x10, 0x11, 0x12, 0x13, 0x14 }; |
|
159 aKey1 = case_swap_table[aKey1]; |
|
160 aKey2 = case_swap_table[aKey2]; |
|
161 } |
|
162 if (aFlags & TCollationMethod::ESwapKana) |
|
163 { |
|
164 static const TUint kana_swap_table[21] = |
|
165 { 0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, |
|
166 0x13, 0x14, 0xD, 0xE, 0xF, 0x10, 0x11, 0x12 }; |
|
167 aKey1 = kana_swap_table[aKey1]; |
|
168 aKey2 = kana_swap_table[aKey2]; |
|
169 } |
|
170 } |
|
171 |
|
172 // Returns the position of the character in the string, or aLength if it is not present. |
|
173 // If aChar is found but it is preceded by aEscapeChar (aEscapeChar != 0), then the search continues. |
|
174 static TInt FindCharacter(TInt aChar, TInt aEscapeChar, const TUint16* aString, TInt aLength) |
|
175 { |
|
176 TBool isEscaped = EFalse; |
|
177 for(TInt pos=0;pos!=aLength;++pos,++aString) |
|
178 { |
|
179 if(isEscaped) |
|
180 { |
|
181 isEscaped = EFalse; |
|
182 } |
|
183 else if(*aString == aEscapeChar) |
|
184 { |
|
185 isEscaped = ETrue; |
|
186 } |
|
187 else if(*aString == aChar) |
|
188 { |
|
189 if(!isEscaped) |
|
190 { |
|
191 return pos; |
|
192 } |
|
193 } |
|
194 } |
|
195 return aLength; |
|
196 } |
|
197 |
|
198 /* |
|
199 The standard collation data, containing keys for all the WGL4 characters, plus |
|
200 commonly-used control characters and spaces. Generated by COLTAB. |
|
201 */ |
|
202 static const TUint32 TheKey[] = |
|
203 { |
|
204 0x21e0112,0x21e0113,0x2260112,0x2260112,0x2260113,0x2740112,0x2740113,0x6c60178, |
|
205 0x266017a,0x6c70179,0x6c60178,0x266017a,0x6c90179,0x6c60178,0x266017a,0x6cd0179, |
|
206 0x6c80178,0x266017a,0x6c90179,0x6c80178,0x266017a,0x6cd0179,0x6ca0178,0x266017a, |
|
207 0x6cd0179,0x6cc0178,0x266017a,0x6cd0179,0x6f70110,0x2650112,0x8050111,0x74b0110, |
|
208 0x78d0111,0x74b0110,0x7bd0111,0x78d0110,0x7a10111,0x78d0128,0x7a10129,0x7bd0110, |
|
209 0x2290113,0x7bd0128,0x2290113,0x7ed0128,0x8050111,0x805dd10,0x71f0111,0x805dd28, |
|
210 0x71f0129,0x85ddd10,0x85d0111,0x8750150,0x7e50151,0x9060110,0x7ed0111,0x3, |
|
211 0x201010b,0x202010b,0x203010b,0x204010b,0x205010b,0x206010b,0x207010b,0x208010b, |
|
212 0x209010b,0x2090113,0x209016f,0x209020b,0x209130b,0x209160b,0x209180b,0x2091d0b, |
|
213 0x209240b,0x209280b,0x2092a0b,0x2092f0b,0x209330b,0x209360b,0x209390b,0x2093b0b, |
|
214 0x2093f0b,0x2096b0b,0x20b010b,0x20c010b,0x20d010b,0x20d016f,0x20e010b,0x20f010b, |
|
215 0x210010b,0x211010b,0x214010b,0x21a010b,0x21c010b,0x21e010b,0x21f010b,0x221010b, |
|
216 0x222010b,0x226010b,0x229010b,0x22d010b,0x22e010b,0x22f010b,0x230010b,0x231010b, |
|
217 0x232010b,0x233010b,0x234010b,0x235010b,0x236010b,0x237010b,0x23c010b,0x23d010b, |
|
218 0x23e010b,0x23f010b,0x240010b,0x241010b,0x242010b,0x243010b,0x25e010b,0x25f010b, |
|
219 0x260010b,0x261010b,0x262010b,0x263010b,0x265010b,0x266010b,0x267010b,0x268010b, |
|
220 0x269010b,0x26a010b,0x26c010b,0x26e010b,0x26f010b,0x270010b,0x274010b,0x2ac010b, |
|
221 0x2ad010b,0x2af010b,0x2d6010b,0x2ff010b,0x300010b,0x301010b,0x302010b,0x303010b, |
|
222 0x304010b,0x317010b,0x35c010b,0x35f010b,0x366010b,0x368010b,0x369010b,0x36a010b, |
|
223 0x36b010b,0x36c010b,0x36d010b,0x36e010b,0x36f010b,0x370010b,0x371010b,0x372010b, |
|
224 0x374010b,0x375010b,0x378010b,0x37c010b,0x37d010b,0x381010b,0x382010b,0x38a010b, |
|
225 0x38c010b,0x3a2010b,0x3b9010b,0x3bb010b,0x3bc010b,0x42f010b,0x43d010b,0x44d010b, |
|
226 0x44e010b,0x4d6010b,0x4d8010b,0x4e2010b,0x4e6010b,0x4ea010b,0x4ee010b,0x4f2010b, |
|
227 0x4fa010b,0x502010b,0x50a010b,0x512010b,0x526010b,0x527010b,0x528010b,0x529010b, |
|
228 0x52a010b,0x52b010b,0x52c010b,0x52d010b,0x52e010b,0x52f010b,0x530010b,0x531010b, |
|
229 0x532010b,0x533010b,0x534010b,0x535010b,0x536010b,0x537010b,0x538010b,0x539010b, |
|
230 0x53a010b,0x53b010b,0x53c010b,0x53d010b,0x53e010b,0x53f010b,0x540010b,0x541010b, |
|
231 0x542010b,0x556010b,0x55a010b,0x55e010b,0x562010b,0x566010b,0x567010b,0x568010b, |
|
232 0x569010b,0x56c010b,0x56d010b,0x576010b,0x577010b,0x578010b,0x57e010b,0x586010b, |
|
233 0x588010b,0x590010b,0x596010b,0x597010b,0x59b010b,0x5a4010b,0x5a5010b,0x5b2010b, |
|
234 0x5f0010b,0x5f1010b,0x5f2010b,0x5f6010b,0x5f8010b,0x616010b,0x619010b,0x61b010b, |
|
235 0x61c010b,0x620010b,0x621010b,0x6b4010b,0x6b5010b,0x1309,0x1609,0x1809, |
|
236 0x1d09,0x2209,0x2409,0x2809,0x2f09,0x3009,0x3309,0x3609, |
|
237 0x3909,0x3b09,0x4109,0x2c20109,0x2c30109,0x2c40109,0x2c50109,0x2c60109, |
|
238 0x2cd0109,0x2ce0109,0x2d10109,0x2d50109,0x2fa0109,0x6c50109,0x6c60109,0x6c60151, |
|
239 0x6c70109,0x6c70151,0x6c80109,0x6c80151,0x6c90109,0x6ca0109,0x6cb0109,0x6cc0109, |
|
240 0x6cd0109,0x6ce0109,0x6cf0109,0x6cf0121,0x6cf0151,0x6d30109,0x6d30121,0x6e30109, |
|
241 0x6e30121,0x6f70109,0x6f70121,0x7030109,0x7030121,0x7070109,0x7070121,0x7170109, |
|
242 0x7170121,0x71f0109,0x71f0121,0x74b0109,0x74b0121,0x74f0109,0x7530109,0x7530121, |
|
243 0x7730109,0x7730121,0x77f0109,0x77f0121,0x78d0109,0x78d0121,0x7910109,0x7a10109, |
|
244 0x7a10121,0x7b10109,0x7b10121,0x7bd0109,0x7bd0115,0x7bd0121,0x7c50109,0x7c50121, |
|
245 0x7e50109,0x7e50121,0x7ed0109,0x7ed0121,0x7ed0151,0x8010109,0x8010121,0x8050109, |
|
246 0x8050121,0x8050151,0x80d0109,0x80d0121,0x81d0109,0x81d0121,0x8290109,0x8290121, |
|
247 0x8310109,0x8350109,0x8350121,0x85d0109,0x85d0121,0x85dde11,0x8750109,0x8750121, |
|
248 0x8790109,0x8790121,0x88d0109,0x88d0121,0x8a50109,0x8a50121,0x8b10109,0x8b10121, |
|
249 0x8b90109,0x8b90121,0x8bd0109,0x8bd0121,0x8c90109,0x8c90121,0x8e90109,0x8e90121, |
|
250 0x9360109,0x9360121,0x9370109,0x9370121,0x9380109,0x9380121,0x9390109,0x9390121, |
|
251 0x93a0109,0x93a0121,0x93d0109,0x93d0121,0x93e0109,0x93e0121,0x93f0109,0x93f0121, |
|
252 0x9400109,0x9400121,0x9420109,0x9420121,0x9430109,0x9430121,0x9440109,0x9440111, |
|
253 0x9440121,0x9450109,0x9450121,0x9460109,0x9460121,0x9470109,0x9470121,0x9480109, |
|
254 0x9480121,0x94a0109,0x94a0121,0x94b0109,0x94b0121,0x94c0109,0x94c0121,0x94d0109, |
|
255 0x94d0121,0x94e0109,0x94e0121,0x94f0109,0x94f0121,0x9500109,0x9500121,0x9510109, |
|
256 0x9510121,0x95a0109,0x95a0121,0x9660109,0x9660121,0x96a0109,0x96a0121,0x96e0109, |
|
257 0x96e0121,0x9720109,0x9720121,0x97e0109,0x97e0121,0x9820109,0x9820121,0x98a0109, |
|
258 0x98a0121,0x98e0109,0x98e0121,0x9920109,0x9920121,0x99a0109,0x99a0121,0x99e0109, |
|
259 0x99e0121,0x9a60109,0x9a60121,0x9aa0109,0x9aa0121,0x9ae0109,0x9ae0121,0x9b20109, |
|
260 0x9b20121,0x9ca0109,0x9ca0121,0x9ce0109,0x9ce0121,0x9d20109,0x9d20121,0x9d60109, |
|
261 0x9d60121,0x9e60109,0x9e60121,0x9ea0109,0x9ea0121,0x9f20109,0x9f20121,0x9fe0109, |
|
262 0x9fe0121,0xa020109,0xa020121,0xa0a0109,0xa0a0121,0xa120109,0xa120121,0xa160109, |
|
263 0xa160121,0xa260109,0xa260121,0xa2a0109,0xa2a0121,0xa460109,0xa460121,0xa4e0109, |
|
264 0xa4e0121,0xa660109,0xa660121,0xa6a0109,0xa6a0121,0xa6e0109,0xa6e0121,0xa720109, |
|
265 0xa720121,0xa760109,0xa760121,0xa7a0109,0xa7a0121,0xa820109,0xa820121,0xa860109, |
|
266 0xa860121,0xa8a0109,0xa8a0121, |
|
267 }; |
|
268 |
|
269 static const TUint32 TheIndex[] = |
|
270 { |
|
271 0x37,0x10037,0x20037,0x30037,0x40037,0x50037,0x60037,0x70037, |
|
272 0x80037,0x90038,0xa0039,0xb003a,0xc003b,0xd003c,0xe0037,0xf0037, |
|
273 0x100037,0x110037,0x120037,0x130037,0x140037,0x150037,0x160037,0x170037, |
|
274 0x180037,0x190037,0x1a0037,0x1b0037,0x1c0037,0x1d0037,0x1e0037,0x1f0037, |
|
275 0x200040,0x21005d,0x22006a,0x230080,0x24010d,0x250081,0x26007f,0x270063, |
|
276 0x280070,0x290071,0x2a007b,0x2b0096,0x2c005a,0x2d0053,0x2e0061,0x2f007c, |
|
277 0x300115,0x310116,0x320118,0x33011a,0x34011c,0x35011d,0x36011e,0x37011f, |
|
278 0x380120,0x390121,0x3a005c,0x3b005b,0x3c009a,0x3d009b,0x3e009c,0x3f005f, |
|
279 0x40007a,0x410123,0x420128,0x43012a,0x44012c,0x450132,0x460134,0x470137, |
|
280 0x480139,0x49013d,0x4a0140,0x4b0142,0x4c0145,0x4d0149,0x4e014b,0x4f0150, |
|
281 0x500155,0x510157,0x52015a,0x53015c,0x54015f,0x550163,0x560165,0x570167, |
|
282 0x580169,0x59016b,0x5a016d,0x5b0072,0x5c007e,0x5d0073,0x5e0047,0x5f0043, |
|
283 0x600045,0x610122,0x620127,0x630129,0x64012b,0x650131,0x660133,0x670136, |
|
284 0x680138,0x69013c,0x6a013f,0x6b0141,0x6c0143,0x6d0148,0x6e014a,0x6f014f, |
|
285 0x700154,0x710156,0x720159,0x73015b,0x74015e,0x750162,0x760164,0x770166, |
|
286 0x780168,0x79016a,0x7a016c,0x7b0074,0x7c009e,0x7d0075,0x7e00a0,0xa00042, |
|
287 0xa1005e,0xa2010c,0xa3010e,0xa4010b,0xa5010f,0xa6009f,0xa70076,0xa80049, |
|
288 0xa90078,0xaa0124,0xab006e,0xac009d,0xad0052,0xae0079,0xaf004f,0xb0008a, |
|
289 0xb10097,0xb20119,0xb3011b,0xb40044,0xb50187,0xb60077,0xb70062,0xb8004d, |
|
290 0xb90117,0xba0151,0xbb006f,0xbc000a,0xbd0007,0xbe0010,0xbf0060,0xc60126, |
|
291 0xd00130,0xd70099,0xd80153,0xde016f,0xdf0031,0xe60125,0xf0012f,0xf70098, |
|
292 0xf80152,0xfe016e,0x110012e,0x111012d,0x126013b,0x127013a,0x131013e,0x1320025, |
|
293 0x1330023,0x1380158,0x13f0029,0x1400027,0x1410147,0x1420146,0x1490035,0x14a014e, |
|
294 0x14b014d,0x152002f,0x153002d,0x1660161,0x1670160,0x17f015d,0x1920135,0x2c60087, |
|
295 0x2c70088,0x2c90089,0x2d80046,0x2d9004c,0x2da0048,0x2db004e,0x2dd004b,0x30000fe, |
|
296 0x30100fd,0x3020100,0x3030105,0x3040109,0x30600ff,0x3070106,0x3080103,0x30a0102, |
|
297 0x30b0104,0x30c0101,0x30d010a,0x3270107,0x3280108,0x3840044,0x385004a,0x3870062, |
|
298 0x3910171,0x3920173,0x3930175,0x3940177,0x3950179,0x396017b,0x397017d,0x398017f, |
|
299 0x3990181,0x39a0183,0x39b0185,0x39c0188,0x39d018a,0x39e018c,0x39f018e,0x3a00190, |
|
300 0x3a10192,0x3a30194,0x3a40196,0x3a50198,0x3a6019a,0x3a7019c,0x3a8019e,0x3a901a0, |
|
301 0x3b10170,0x3b20172,0x3b30174,0x3b40176,0x3b50178,0x3b6017a,0x3b7017c,0x3b8017e, |
|
302 0x3b90180,0x3ba0182,0x3bb0184,0x3bc0186,0x3bd0189,0x3be018b,0x3bf018d,0x3c0018f, |
|
303 0x3c10191,0x3c20193,0x3c30193,0x3c40195,0x3c50197,0x3c60199,0x3c7019b,0x3c8019d, |
|
304 0x3c9019f,0x40201ae,0x40401b2,0x40501b8,0x40601bc,0x40801be,0x40901c4,0x40a01ca, |
|
305 0x40b01d6,0x40f01e2,0x41001a2,0x41101a4,0x41201a6,0x41301a8,0x41401ac,0x41501b0, |
|
306 0x41601b4,0x41701b6,0x41801ba,0x41a01c0,0x41b01c2,0x41c01c6,0x41d01c8,0x41e01cc, |
|
307 0x41f01ce,0x42001d0,0x42101d2,0x42201d4,0x42301d8,0x42401da,0x42501dc,0x42601de, |
|
308 0x42701e0,0x42801e4,0x42901e6,0x42a01e8,0x42b01ea,0x42c01ec,0x42d01ee,0x42e01f0, |
|
309 0x42f01f2,0x43001a1,0x43101a3,0x43201a5,0x43301a7,0x43401ab,0x43501af,0x43601b3, |
|
310 0x43701b5,0x43801b9,0x43a01bf,0x43b01c1,0x43c01c5,0x43d01c7,0x43e01cb,0x43f01cd, |
|
311 0x44001cf,0x44101d1,0x44201d3,0x44301d7,0x44401d9,0x44501db,0x44601dd,0x44701df, |
|
312 0x44801e3,0x44901e5,0x44a01e7,0x44b01e9,0x44c01eb,0x44d01ed,0x44e01ef,0x44f01f1, |
|
313 0x45201ad,0x45401b1,0x45501b7,0x45601bb,0x45801bd,0x45901c3,0x45a01c9,0x45b01d5, |
|
314 0x45f01e1,0x49001aa,0x49101a9,0x20000041,0x20010041,0x20020041,0x20030041,0x20040041, |
|
315 0x20050041,0x20060041,0x20070042,0x20080041,0x20090041,0x200a0041,0x200b003f,0x200c0037, |
|
316 0x200d0037,0x200e0037,0x200f0037,0x20100054,0x20110055,0x20120056,0x20130057,0x20140058, |
|
317 0x20150059,0x20170051,0x20180064,0x20190065,0x201a0066,0x201b0067,0x201c006b,0x201d006c, |
|
318 0x201e006d,0x20200083,0x20210084,0x20220085,0x20260002,0x2028003d,0x2029003e,0x202a0037, |
|
319 0x202b0037,0x202c0037,0x202d0037,0x202e0037,0x20300082,0x20320086,0x20330005,0x20390068, |
|
320 0x203a0069,0x203c0000,0x203e0050,0x2044007d,0x207f014c,0x20a30110,0x20a40111,0x20a70112, |
|
321 0x20ac0113,0x2105001c,0x21130144,0x2116002b,0x21220033,0x212601a0,0x212e0114,0x215b000d, |
|
322 0x215c0013,0x215d0016,0x215e0019,0x2190008b,0x2191008d,0x2192008c,0x2193008e,0x2194008f, |
|
323 0x21950090,0x21a80091,0x22020092,0x22060093,0x220f0094,0x22110095,0x221200a1,0x221500a2, |
|
324 0x221900a3,0x221a00a4,0x221e00a5,0x221f00a6,0x222900a7,0x222b00a8,0x224800a9,0x226100aa, |
|
325 0x226400ab,0x226500ac,0x230200ad,0x231000ae,0x232000af,0x232100b0,0x250000b1,0x250200b2, |
|
326 0x250c00b3,0x251000b4,0x251400b5,0x251800b6,0x251c00b7,0x252400b8,0x252c00b9,0x253400ba, |
|
327 0x253c00bb,0x255000bc,0x255100bd,0x255200be,0x255300bf,0x255400c0,0x255500c1,0x255600c2, |
|
328 0x255700c3,0x255800c4,0x255900c5,0x255a00c6,0x255b00c7,0x255c00c8,0x255d00c9,0x255e00ca, |
|
329 0x255f00cb,0x256000cc,0x256100cd,0x256200ce,0x256300cf,0x256400d0,0x256500d1,0x256600d2, |
|
330 0x256700d3,0x256800d4,0x256900d5,0x256a00d6,0x256b00d7,0x256c00d8,0x258000d9,0x258400da, |
|
331 0x258800db,0x258c00dc,0x259000dd,0x259100de,0x259200df,0x259300e0,0x25a000e1,0x25a100e2, |
|
332 0x25aa00e3,0x25ab00e4,0x25ac00e5,0x25b200e6,0x25ba00e7,0x25bc00e8,0x25c400e9,0x25ca00ea, |
|
333 0x25cb00eb,0x25cf00ec,0x25d800ed,0x25d900ee,0x25e600ef,0x263a00f0,0x263b00f1,0x263c00f2, |
|
334 0x264000f3,0x264200f4,0x266000f5,0x266300f6,0x266500f7,0x266600f8,0x266a00f9,0x266b00fa, |
|
335 0xfb01001f,0xfb020021,0xfeff0037,0xfffc00fb,0xfffd00fc, |
|
336 }; |
|
337 |
|
338 static const TCollationKeyTable TheStandardTable = |
|
339 { TheKey, TheIndex, 517, NULL, NULL, 0 }; |
|
340 |
|
341 const TCollationKeyTable* StandardCollationMethod() |
|
342 { |
|
343 return &TheStandardTable; |
|
344 }; |
|
345 |
|
346 inline void Increment(TUint16 const *& aPointer,TBool aNarrow) |
|
347 { |
|
348 aPointer = aNarrow ? (const TUint16*)(((const TUint8*)aPointer) + 1) : aPointer + 1; |
|
349 } |
|
350 |
|
351 ///////////////////////////////////////////////////////////////////////////////////////////////// |
|
352 // TCollationValueIterator |
|
353 ///////////////////////////////////////////////////////////////////////////////////////////////// |
|
354 |
|
355 /** |
|
356 Initializes TCollationValueIterator object with a new character sequence. |
|
357 @param aSourceIt An iterator used to access the input character (non-normalized or |
|
358 normalized) sequence. |
|
359 @internalComponent |
|
360 */ |
|
361 void TCollationValueIterator::SetSourceIt(TUTF32Iterator& aSourceIt) |
|
362 { |
|
363 iCurrentKeyPos = 0; |
|
364 iKey.iKeys = 0; |
|
365 iDecompStrIt.Set(aSourceIt); |
|
366 } |
|
367 |
|
368 /** |
|
369 Gets current raw key. |
|
370 Note: the method may move the iterator one or more positions forward if there are no produced |
|
371 collation keys. |
|
372 @param aKey A reference to a TCollationKey object, initialized with the |
|
373 current collation key after the call, if there is available key. |
|
374 @return ETrue Successfull call, aKey initialized with the current collation key, |
|
375 EFalse - the iteration has come to the end. |
|
376 @internalComponent |
|
377 */ |
|
378 TBool TCollationValueIterator::GetCurrentKey(TCollationKey& aKey) |
|
379 { |
|
380 ASSERT(iCurrentKeyPos <= iKey.iKeys); |
|
381 if(!ProduceCollationKeys()) |
|
382 { |
|
383 return EFalse; |
|
384 } |
|
385 aKey = iKey.iKey[iCurrentKeyPos]; |
|
386 return ETrue; |
|
387 } |
|
388 |
|
389 /** |
|
390 Gets current key at the specified level. |
|
391 Note: the method may move the iterator one or more positions forward if there are no produced |
|
392 collation keys. |
|
393 @param aLevel Desired level of the collation key: 0..3 |
|
394 @param aKey A reference to TUint32 where the retrieved key will be stored. |
|
395 @return ETrue Success, EFalse - end of the iteration. |
|
396 @internalComponent |
|
397 */ |
|
398 TBool TCollationValueIterator::GetCurrentKey(TInt aLevel, TUint32& aKey) |
|
399 { |
|
400 TCollationKey rawKey; |
|
401 if(GetCurrentKey(rawKey)) |
|
402 { |
|
403 //Key values are ignored if their ignore bit is set and the level is less than 3: in other words, the |
|
404 //actual Unicode value is never ignored. This does NOT conform to the system of alternate weightings |
|
405 //described in Unicode Technical Report 10, and will probably have to be changed. |
|
406 aKey = (aLevel < 3 && (rawKey.iLow & TCollationKeyTable::EIgnoreFlag) && !IgnoringNone()) ? 0 : rawKey.Level(aLevel); |
|
407 return ETrue; |
|
408 } |
|
409 return EFalse; |
|
410 } |
|
411 |
|
412 /** |
|
413 The method iterates through the controlled character sequence and tries to find first non-zero |
|
414 corresponding collation key at the specified level. |
|
415 @param aLevel Desired level of the collation key: 0..3 |
|
416 @return Non-zero collation key value or 0 if the iteration has come to the end. |
|
417 @internalComponent |
|
418 */ |
|
419 TUint32 TCollationValueIterator::GetNextNonZeroKey(TInt aLevel) |
|
420 { |
|
421 TUint32 key = 0; |
|
422 while(GetCurrentKey(aLevel, key) && key == 0) |
|
423 { |
|
424 Increment(); |
|
425 } |
|
426 return key; |
|
427 } |
|
428 |
|
429 /** |
|
430 The method determines wheter the specified as a parameter character matches current iterator's |
|
431 character. |
|
432 If there is a match, the iterator will be moved one position forward. |
|
433 Note: the method may move the iterator one or more positions forward if there are no produced |
|
434 collation keys. |
|
435 @param aMatch The character to compare with the current iterator's character. |
|
436 @return ETrue The characters match, EFalse otherwise (or the iteration has come to the end). |
|
437 @internalComponent |
|
438 */ |
|
439 TBool TCollationValueIterator::MatchChar(TChar aMatch) |
|
440 { |
|
441 TUint32 key; |
|
442 if(GetCurrentKey(3, key)) |
|
443 { |
|
444 // Find a match for the quaternary key.. will probably be the unicode value |
|
445 // This is a bit poor. |
|
446 if(aMatch == key) |
|
447 { |
|
448 Increment(); |
|
449 return ETrue; |
|
450 } |
|
451 } |
|
452 return EFalse; |
|
453 } |
|
454 |
|
455 /** |
|
456 Note: the method may move the iterator one or more positions forward if there are no produced |
|
457 collation keys. |
|
458 @return The method returns ETrue if the iterator is at a combining character, EFalse otherwise |
|
459 (or the iterator has come to the end) |
|
460 @internalComponent |
|
461 */ |
|
462 TBool TCollationValueIterator::AtCombiningCharacter() |
|
463 { |
|
464 TCollationKey rawKey; |
|
465 if(!GetCurrentKey(rawKey)) |
|
466 { |
|
467 return EFalse; // iteration ended |
|
468 } |
|
469 return rawKey.IsStarter() ? (TBool)EFalse : (TBool)ETrue; |
|
470 } |
|
471 |
|
472 /** |
|
473 Skips the following combining characters if they are. |
|
474 Note: the method may move the iterator one or more positions forward. |
|
475 @return The number of skipped combining characters. |
|
476 @internalComponent |
|
477 */ |
|
478 TInt TCollationValueIterator::SkipCombiningCharacters() |
|
479 { |
|
480 TInt count; |
|
481 for(count=0;AtCombiningCharacter();++count) |
|
482 { |
|
483 Increment(); |
|
484 } |
|
485 return count; |
|
486 } |
|
487 |
|
488 /** |
|
489 Moves the iterator one step forward making the next collation key available for getting |
|
490 using GetCurrentKey(). |
|
491 @return ETrue Successfull call, there is a collation key available. |
|
492 EFalse - the iteration has come to the end. |
|
493 @internalComponent |
|
494 @see TCollationValueIterator::GetCurrentKey() |
|
495 */ |
|
496 TBool TCollationValueIterator::Increment() |
|
497 { |
|
498 ASSERT(iCurrentKeyPos <= iKey.iKeys); |
|
499 if(!ProduceCollationKeys()) |
|
500 { |
|
501 return EFalse; |
|
502 } |
|
503 ++iCurrentKeyPos; |
|
504 return ETrue; |
|
505 } |
|
506 |
|
507 /** |
|
508 Returns the position in the underlying string of the iteration, |
|
509 if this is well defined. It is not well defined if either we are |
|
510 half way through keys defined as a string in the collation table |
|
511 or if we are half way through a canonically reordered sequence. |
|
512 @return The position in the underlying string if this is well |
|
513 defined, or 0 if it is not. |
|
514 */ |
|
515 const TText16* TCollationValueIterator::CurrentPositionIfAtCharacter() |
|
516 { |
|
517 if (!ProduceCollationKeys()) |
|
518 return iCurrentPosition; |
|
519 return iCurrentKeyPos == 0? iCurrentPosition : 0; |
|
520 } |
|
521 |
|
522 /** |
|
523 Produces the longest possible collation keys sequence using the decomposed character sequence, |
|
524 pointed by iDecompStrIt iterator. But this will happen only if all keys from iKey array are |
|
525 consumed. |
|
526 @return ETrue Successfull call, iKey initialized with the produced collation keys sequence, |
|
527 EFalse - the iteration has come to the end. |
|
528 @internalComponent |
|
529 */ |
|
530 TBool TCollationValueIterator::ProduceCollationKeys() |
|
531 { |
|
532 //iKey.iKeys represents the keys count in iKey array, so load more keys, only if all |
|
533 //collation keys are already consumed. |
|
534 if(iCurrentKeyPos == iKey.iKeys) |
|
535 { |
|
536 iCurrentPosition = iDecompStrIt.CurrentPositionIfAtCharacter(); |
|
537 if(iDecompStrIt.AtEnd()) |
|
538 {//No more characters in the input decomposed canonical string |
|
539 return EFalse; |
|
540 } |
|
541 //Try to get the next collation key sequence. There should be at least one key. |
|
542 GetNextRawKeySequence(); |
|
543 ASSERT(iKey.iKeys > 0); |
|
544 iCurrentKeyPos = 0; |
|
545 } |
|
546 return ETrue; |
|
547 } |
|
548 |
|
549 /** |
|
550 Consume zero or more characters from the input and convert them into zero or more collation keys. |
|
551 @internalComponent |
|
552 */ |
|
553 void TCollationValueIterator::GetNextRawKeySequence() |
|
554 { |
|
555 //Store the first character combining class type for later use. |
|
556 TChar firstChar = iDecompStrIt.Get(0); |
|
557 TBool combining = !::IsBaseCharacter(firstChar); |
|
558 // Initialise. |
|
559 iKey.iCharactersConsumed = 0; |
|
560 iKey.iKeys = 0; |
|
561 // See if the override table has a key for the current collation unit. |
|
562 if(iMethod.iOverrideTable) |
|
563 { |
|
564 GetKeyFromTable(iMethod.iOverrideTable); |
|
565 } |
|
566 // If not, try the main table. |
|
567 if(iKey.iCharactersConsumed == 0) |
|
568 { |
|
569 GetKeyFromTable(iMethod.iMainTable); |
|
570 } |
|
571 //If no key was found use a default value depending on the current character. |
|
572 //For CJK characters: |
|
573 //the Unicode value itself as the primary key and 1 as the secondary and tertiary keys; |
|
574 //the lower 16 bits end up as 0x0105 because the bottom two bits are used for the ignorable bit, |
|
575 //which is clear, and the stop bit, which is set. |
|
576 //For other characters: |
|
577 //Return two keys containing the 21 bits of the character code (anything from 0 to 0x10FFFF), as |
|
578 //explained in Unicode Technical Report 10. |
|
579 if(iKey.iCharactersConsumed == 0) |
|
580 { |
|
581 iKey.iCharactersConsumed = 1; |
|
582 iDecompStrIt.Next(1); |
|
583 iKey.iKeys = ::CreateDefaultCollationKeySequence(firstChar, iKey.iKey); |
|
584 } |
|
585 if(!combining) |
|
586 { |
|
587 iKey.iKey[0].iHigh |= (TUint32)TCollationKey::KFlagIsStarter; |
|
588 } |
|
589 } |
|
590 |
|
591 /** |
|
592 Search for the string aText. |
|
593 Put the key index in aIndex if found, otherwise set aIndex to -1. |
|
594 If the sought string might be a prefix to a key in the table set aPossiblePrefix to TRUE. |
|
595 @internalComponent |
|
596 */ |
|
597 static void GetStringKey(const TCollationKeyTable* aTable,const TText* aText,TInt aLength, |
|
598 TInt& aIndex,TBool& aPossiblePrefix) |
|
599 { |
|
600 aIndex = -1; |
|
601 aPossiblePrefix = EFalse; |
|
602 TInt n = aTable->iStringIndices; |
|
603 const TUint32* base = aTable->iStringIndex; |
|
604 const TUint32* p = base; |
|
605 TInt pivot; |
|
606 while (n > 0) |
|
607 { |
|
608 pivot = n / 2; |
|
609 p += pivot; |
|
610 TUint16 string_index = (TUint16)(*p >> 16); |
|
611 const TText* cur_text = aTable->iString + string_index + 1; |
|
612 TInt cur_length = aTable->iString[string_index]; |
|
613 TInt order = TUnicode::Compare(aText,aLength,cur_text,cur_length); |
|
614 if (order == 0) // found it |
|
615 { |
|
616 aIndex = *p & 0xFFFF; |
|
617 aPossiblePrefix = ETrue; |
|
618 break; |
|
619 } |
|
620 if (order < 1 && !aPossiblePrefix) |
|
621 { |
|
622 if (aLength < cur_length && TUnicode::Compare(aText,aLength,cur_text,aLength) == 0) |
|
623 aPossiblePrefix = ETrue; |
|
624 n = pivot; |
|
625 } |
|
626 else |
|
627 { |
|
628 base = p + 1; |
|
629 n -= pivot + 1; |
|
630 } |
|
631 p = base; |
|
632 } |
|
633 } |
|
634 |
|
635 /** |
|
636 Consumes output from iDecompStrIt, produces list of keys in iKey. |
|
637 @param aTable A const pointer to the collation key table used by the method. |
|
638 @internalComponent |
|
639 */ |
|
640 void TCollationValueIterator::GetKeyFromTable(const TCollationKeyTable* aTable) |
|
641 { |
|
642 ASSERT(aTable != NULL); |
|
643 iKey.iCharactersConsumed = 0; |
|
644 iKey.iKeys = 0; |
|
645 |
|
646 TInt cur_char = iDecompStrIt.Get(0); |
|
647 |
|
648 // Find the longest matching string. |
|
649 TInt index = -1; |
|
650 if(aTable->iStringIndices > 0) |
|
651 { |
|
652 TInt moved = 0; |
|
653 TText text[KKeyedStringBufferSize]; |
|
654 TInt textLen = 0; |
|
655 if (cur_char <= 0xFFFF) |
|
656 { |
|
657 text[textLen++] = static_cast <TText> (cur_char); |
|
658 } |
|
659 else |
|
660 { |
|
661 text[textLen++] = GetHighSurrogate(cur_char); |
|
662 text[textLen++] = GetLowSurrogate(cur_char); |
|
663 } |
|
664 TBool possible_prefix = ETrue; |
|
665 for(TInt i = 1; (i < KKeyedStringBufferSize) && possible_prefix; i++) |
|
666 { |
|
667 ++moved; |
|
668 TInt c = iDecompStrIt.Get(i);//get the next character |
|
669 if(c == -1) |
|
670 { |
|
671 break; |
|
672 } |
|
673 if (c <= 0xFFFF) |
|
674 { |
|
675 text[textLen++] = static_cast <TText> (c); |
|
676 } |
|
677 else |
|
678 { |
|
679 text[textLen++] = GetHighSurrogate(c); |
|
680 text[textLen++] = GetLowSurrogate(c); |
|
681 } |
|
682 TInt cur_index = -1; |
|
683 ::GetStringKey(aTable, text, textLen, cur_index, possible_prefix); |
|
684 if(cur_index != -1) |
|
685 { |
|
686 index = cur_index; |
|
687 iKey.iCharactersConsumed = i + 1; |
|
688 } |
|
689 } |
|
690 if (iKey.iCharactersConsumed < moved) |
|
691 { |
|
692 moved = 0; |
|
693 } |
|
694 while (moved != iKey.iCharactersConsumed) |
|
695 { |
|
696 ++moved; |
|
697 } |
|
698 if(moved > 0) |
|
699 { |
|
700 iDecompStrIt.Next(moved);//adjust the iterator start position |
|
701 } |
|
702 } |
|
703 |
|
704 // Now search the main index. |
|
705 if(index == -1) |
|
706 { |
|
707 index = ::FindCollationKeyIndex(cur_char, *aTable); |
|
708 if(0 <= index) |
|
709 { |
|
710 iKey.iCharactersConsumed = 1; |
|
711 iDecompStrIt.Next(1);//adjust the iterator start position |
|
712 } |
|
713 } |
|
714 |
|
715 // Fill in the key or keys. |
|
716 if(index != -1) |
|
717 { |
|
718 const TUint32* p = &aTable->iKey[index]; |
|
719 TCollationKey* q = iKey.iKey; |
|
720 iKey.iKeys = 0; |
|
721 while(iKey.iKeys < TKeyInfo::EMaxKeys) |
|
722 { |
|
723 q->iLow = *p; |
|
724 q->iHigh = cur_char; |
|
725 iKey.iKeys++; |
|
726 if(*p & 1) |
|
727 { |
|
728 break; |
|
729 } |
|
730 q++; |
|
731 p++; |
|
732 } |
|
733 } |
|
734 } |
|
735 |
|
736 ///////////////////////////////////////////////////////////////////////////////////////////////// |
|
737 // TCollate |
|
738 ///////////////////////////////////////////////////////////////////////////////////////////////// |
|
739 |
|
740 /** |
|
741 Construct a TCollate object based on the collation method specified |
|
742 within aCharSet, if any. If there is none, or aCharSet is null, the |
|
743 standard collation method will be used. |
|
744 aMask and aFlags provide a method for overriding the flags in the collation method: |
|
745 Each flag set to 1 in aMask is a flag that will be overridden and set to the |
|
746 corresponding flag value in aFlags. |
|
747 Ownership of aCharSet is not passed. |
|
748 @param aCharSet Locale-specific character attribute and collation data |
|
749 @param aMask Provides a method for overriding the flags in the collation method |
|
750 @param aFlags Provides a method for overriding the flags in the collation method |
|
751 @internalComponent |
|
752 */ |
|
753 TCollate::TCollate(const LCharSet* aCharSet, TUint aMask, TUint aFlags) |
|
754 { |
|
755 iMethod.iMainTable = NULL; |
|
756 iMethod.iOverrideTable = NULL; |
|
757 iMethod.iFlags = 0; |
|
758 if (aCharSet && aCharSet->iCollationDataSet && aCharSet->iCollationDataSet->iMethod) |
|
759 { |
|
760 iMethod = aCharSet->iCollationDataSet->iMethod[0]; |
|
761 } |
|
762 if (iMethod.iMainTable == NULL) |
|
763 { |
|
764 iMethod.iMainTable = &TheStandardTable; |
|
765 } |
|
766 if (aMask) |
|
767 { |
|
768 iMethod.iFlags &= ~aMask; |
|
769 iMethod.iFlags |= (aMask & aFlags); |
|
770 } |
|
771 } |
|
772 |
|
773 /** |
|
774 Construct a TCollate object based on an already constructed |
|
775 TCollationMethod specified in aMethod. |
|
776 Ownership is not passed. |
|
777 @param aMethod Collation keys table |
|
778 @internalComponent |
|
779 */ |
|
780 TCollate::TCollate(const TCollationMethod& aMethod) : |
|
781 iMethod(aMethod) |
|
782 { |
|
783 if(!iMethod.iMainTable) |
|
784 { |
|
785 iMethod.iMainTable = &TheStandardTable; |
|
786 } |
|
787 } |
|
788 |
|
789 /** |
|
790 Compare the string beginning at aString1 of length aLength1 against the |
|
791 string beginning at aString2 of length aLength2. |
|
792 |
|
793 @param aString1 First string to compare |
|
794 @param aLength1 Length of aString1 |
|
795 @param aString2 Second string to compare |
|
796 @param aLength2 Length of aString2 |
|
797 @param aMaxLevel Determines the tightness of the collation. At level 0, only |
|
798 character identities are distinguished. At level 1 accents are |
|
799 distinguished as well. At level 2 case is distinguished as well. At |
|
800 level 3 all non canonically equivalent Unicode characters are considered |
|
801 different. By default aMaxLevel is 3. |
|
802 @return EStringsIdentical The strings are identical. |
|
803 ELeftComparesLessAndIsNotPrefix For example: aString1 = "aaa", aString2 = "zzzz". |
|
804 ELeftIsPrefixOfRight For example: aString1 = "abc", aString2 = "abcd". |
|
805 ERightIsPrefixOfLeft For example: aString1 = "abcd", aString2 = "abc". |
|
806 ERightComparesLessAndIsNotPrefix For example: aString1 = "zzzz", aString2 = "aaa". |
|
807 @internalComponent |
|
808 */ |
|
809 TCollate::TComparisonResult TCollate::Compare(const TUint16 *aString1, TInt aLength1, |
|
810 const TUint16 *aString2, TInt aLength2, |
|
811 TInt aMaxLevel) const |
|
812 { |
|
813 TUTF32Iterator itL(aString1, aString1 + aLength1); |
|
814 TUTF32Iterator itR(aString2, aString2 + aLength2); |
|
815 return CompareKeySequences(itL, itR, aMaxLevel, 0, 0); |
|
816 } |
|
817 |
|
818 /** |
|
819 Find the string beginning at aString2 of length aLength2 in the string |
|
820 beginning at aString1 of length aLength1. |
|
821 |
|
822 @param aString1 String to search |
|
823 @param aLength1 Length of aString1 |
|
824 @param aString2 String to search for |
|
825 @param aLength2 Length of aString2 |
|
826 @param aMaxLevel Determines the tightness of the collation. At level 0, only |
|
827 character identities are distinguished. At level 1 accents are |
|
828 distinguished as well. At level 2 case is distinguishes as well. At |
|
829 level 3 all valid different Unicode characters are considered different. |
|
830 @param aString2WildChar Wild card character which may be specified for aString2. By default |
|
831 wild card character is not specified and not used. |
|
832 @return KErrNotFound aString2 not found in aString1. |
|
833 Non-negative value telling the position in aString1 where the first occurrence of |
|
834 aString2 was found. |
|
835 @internalComponent |
|
836 */ |
|
837 TInt TCollate::Find(const TUint16 *aString1, TInt aLength1, |
|
838 const TUint16 *aString2, TInt aLength2, |
|
839 TInt aMaxLevel, TUint aString2WildChar) const |
|
840 { |
|
841 TInt dummy(0); |
|
842 return Find(aString1, aLength1, aString2,aLength2, dummy, aMaxLevel,aString2WildChar ); |
|
843 } |
|
844 |
|
845 /** |
|
846 Find the string beginning at aString2 of length aLength2 in the string |
|
847 beginning at aString1 of length aLength1. |
|
848 |
|
849 @param aString1 String to search |
|
850 @param aLength1 Length of aString1 |
|
851 @param aString2 String to search for |
|
852 @param aLength2 Length of aString2 |
|
853 @param aLengthFound A refernce to the length of the match found in the candidate string |
|
854 @param aMaxLevel Determines the tightness of the collation. At level 0, only |
|
855 character identities are distinguished. At level 1 accents are |
|
856 distinguished as well. At level 2 case is distinguishes as well. At |
|
857 level 3 all valid different Unicode characters are considered different. |
|
858 @param aString2WildChar Wild card character which may be specified for aString2. By default |
|
859 wild card character is not specified and not used. |
|
860 @return KErrNotFound aString2 not found in aString1. |
|
861 Non-negative value telling the position in aString1 where the first occurrence of |
|
862 aString2 was found. |
|
863 @internalComponent |
|
864 */ |
|
865 TInt TCollate::Find(const TUint16 *aString1, TInt aLength1, |
|
866 const TUint16 *aString2, TInt aLength2, |
|
867 TInt &aLengthFound, TInt aMaxLevel, TUint aString2WildChar) const |
|
868 { |
|
869 TUTF32Iterator itL(aString1, aString1 + aLength1); |
|
870 TUTF32Iterator itR(aString2, aString2 + aLength2); |
|
871 return FindKeySequence(itL, itR, aMaxLevel, aString2WildChar, 0, aLengthFound); |
|
872 } |
|
873 |
|
874 /** |
|
875 Match the pattern defined by aSearchTerm with aCandidate. |
|
876 Return the index in aCandidate of the start of the first pattern matched - |
|
877 that is, the first character in aSearchTerm after all wild-sequence characters |
|
878 have been matched. Return KErrNotFound if there is no match. |
|
879 |
|
880 For example, if aCandidate is "abcdefghijkl", the following values of aSearchTerm yield the |
|
881 following results: |
|
882 "abc*" gives 0 |
|
883 "abc" gives KErrNotFound |
|
884 "xyz" gives KErrNotFound |
|
885 "*def" gives KErrNotFound |
|
886 "*def*" gives 3 |
|
887 "*d?f*" gives 3 |
|
888 "a*kl" gives 0 |
|
889 "*d*kl" gives 4 |
|
890 |
|
891 To match a pattern anywhere in aCandidate, aSearchTerm must both start and end |
|
892 with aString2WildSequenceChar |
|
893 |
|
894 @param aCandidate String to search |
|
895 @param aCandidateLength Length of aCandidate |
|
896 @param aSearchTerm String to search for |
|
897 @param aSearchTermLength Length of aSearchTerm |
|
898 @param aMaxLevel Determines the tightness of the collation. At level 0, only |
|
899 character identities are distinguished. At level 1 accents are |
|
900 distinguished as well. At level 2 case is distinguishes as well. At |
|
901 level 3 all valid different Unicode characters are considered different. |
|
902 @param aWildChar Wild card character which may be specified for aSearchTerm. By default |
|
903 the wild card character used is '?'. |
|
904 @param aWildSequenceChar Wild card sequence character which may be specified for aSearchTerm. |
|
905 Its default value is '*'. |
|
906 @param aEscapeChar Escape character. If it is non-zero and precdes aWildChar and aWildSequenceChar characters in |
|
907 aCandidate string, then these characters should be treated as normal characters. |
|
908 @return The index in aCandidate of the start of the first pattern matched. |
|
909 |
|
910 @internalComponent. |
|
911 */ |
|
912 TInt TCollate::Match(const TUint16 *aCandidate, TInt aCandidateLength, |
|
913 const TUint16 *aSearchTerm,TInt aSearchTermLength, |
|
914 TInt aMaxLevel, TUint aWildChar, TUint aWildSequenceChar, |
|
915 TUint aEscapeChar) const |
|
916 { |
|
917 ASSERT(0 <= aSearchTermLength); |
|
918 ASSERT(0 <= aCandidateLength); |
|
919 |
|
920 if(aMaxLevel == 3 && (iMethod.iFlags & TCollationMethod::EFoldCase)) |
|
921 { |
|
922 aMaxLevel = 2; |
|
923 } |
|
924 |
|
925 TUTF32Iterator candidate(aCandidate, aCandidate + aCandidateLength); |
|
926 TUTF32Iterator searchTerm(aSearchTerm, aSearchTerm + aSearchTermLength); |
|
927 |
|
928 TInt firstMatch = KErrNotFound; |
|
929 TInt segEnd = ::FindCharacter(aWildSequenceChar, aEscapeChar, aSearchTerm, aSearchTermLength); |
|
930 |
|
931 // Is there any prefix that the candidate string must have? |
|
932 // aSearchTerm looks like "abc*...". Then segEnd will be 3 (the position of '*'). |
|
933 // Check that aCandidate begins with "abc" too. |
|
934 if(segEnd != 0 || aSearchTermLength == 0) |
|
935 { |
|
936 searchTerm = TUTF32Iterator(aSearchTerm, aSearchTerm + segEnd); |
|
937 TComparisonResult order = CompareKeySequences(candidate, searchTerm, aMaxLevel, aWildChar, aEscapeChar); |
|
938 if(order != ERightIsPrefixOfLeft && order != EStringsIdentical) |
|
939 { |
|
940 return KErrNotFound; |
|
941 } |
|
942 if(aSearchTermLength == segEnd) |
|
943 { |
|
944 return order == EStringsIdentical ? 0 : KErrNotFound; |
|
945 } |
|
946 firstMatch = 0; |
|
947 } |
|
948 |
|
949 // search for all remaining segments |
|
950 // For example: aSearchTerm = "abc*def*ghi", aCandidate = "abc...". |
|
951 // aCandidate was already searched for "abc" and segEnd = 3. |
|
952 // Search aCandidate for the remaining segments: "def" and "ghi". |
|
953 while(aSearchTermLength != (segEnd + 1)) |
|
954 { |
|
955 ++segEnd; |
|
956 aSearchTermLength -= segEnd; |
|
957 aSearchTerm += segEnd; |
|
958 segEnd = ::FindCharacter(aWildSequenceChar, aEscapeChar, aSearchTerm, aSearchTermLength); |
|
959 searchTerm = TUTF32Iterator(aSearchTerm, aSearchTerm + segEnd);//searchTerm holds the next aSearchTerm segment |
|
960 //We will store here the current position of candidate string. |
|
961 const TUint16* candidateCurrentPos = candidate.CurrentPosition(); |
|
962 TInt dummy(0); |
|
963 TInt match = FindKeySequence(candidate, searchTerm, aMaxLevel, aWildChar, aEscapeChar, dummy); |
|
964 if (match < 0) |
|
965 { |
|
966 return KErrNotFound; |
|
967 } |
|
968 if (aSearchTermLength == segEnd) |
|
969 { |
|
970 candidate.SetStart(candidateCurrentPos + match); |
|
971 TComparisonResult order = CompareKeySequences(candidate, searchTerm, aMaxLevel, aWildChar, aEscapeChar); |
|
972 if (order == EStringsIdentical) |
|
973 return firstMatch < 0 ? (match + candidateCurrentPos - aCandidate): firstMatch; |
|
974 while (match >= 0) |
|
975 { |
|
976 // We are at the very end of the search term, so this segment must |
|
977 // match the end of the candidate string. |
|
978 candidate.SetStart(candidateCurrentPos + match + 1); |
|
979 candidateCurrentPos = candidate.CurrentPosition(); |
|
980 match = FindKeySequence(candidate, searchTerm, aMaxLevel, aWildChar, aEscapeChar, dummy); |
|
981 candidate.SetStart(candidateCurrentPos + match); |
|
982 order = CompareKeySequences(candidate, searchTerm, aMaxLevel, aWildChar, aEscapeChar); |
|
983 if (order == EStringsIdentical) |
|
984 return firstMatch < 0 ? (match + candidateCurrentPos - aCandidate): firstMatch; |
|
985 } |
|
986 return KErrNotFound; |
|
987 } |
|
988 //Initialize the first match position, if not initialized yet |
|
989 if (firstMatch < 0 && segEnd != 0) |
|
990 { |
|
991 firstMatch = match; |
|
992 } |
|
993 } |
|
994 return firstMatch < 0 ? aCandidateLength : firstMatch; |
|
995 } |
|
996 |
|
997 /** |
|
998 Compare values output from the iterators. After the comparison, if |
|
999 ERightIsPrefixOfLeft or EStringsIdentical is returned, then aLeft |
|
1000 will be pointing at the next character (at MaxLevel) after the match. |
|
1001 If right is shown to be a prefix of left, this means that it has been |
|
1002 checked at all requested levels. If it is reported that the right is a |
|
1003 prefix of the left, then this will mean also that there are no unmatched |
|
1004 combining characters on the left. |
|
1005 |
|
1006 @internalComponent |
|
1007 */ |
|
1008 TCollate::TComparisonResult TCollate::CompareKeySequences(TUTF32Iterator& aLeft, TUTF32Iterator& aRight, |
|
1009 TInt aMaxLevel, TInt aRightStringWildChar, TInt aEscapeChar) const |
|
1010 { |
|
1011 // Clamp the maximum level of the comparison. |
|
1012 if(aMaxLevel < 0) |
|
1013 { |
|
1014 aMaxLevel = 0; |
|
1015 } |
|
1016 if(aMaxLevel > 3) |
|
1017 { |
|
1018 aMaxLevel = 3; |
|
1019 } |
|
1020 //Case folding forces the maximum level to 2. Case folding could only be done at level 3, which |
|
1021 //makes use of the actual Unicode values, if we had access to a case conversion table appropriate for |
|
1022 //the collation method. |
|
1023 if(aMaxLevel == 3 && (iMethod.iFlags & TCollationMethod::EFoldCase)) |
|
1024 { |
|
1025 aMaxLevel = 2; |
|
1026 } |
|
1027 TCollationValueIterator itL(iMethod); |
|
1028 TCollationValueIterator itR(iMethod); |
|
1029 // Perform the comparison. |
|
1030 TComparisonResult order = EStringsIdentical; |
|
1031 TComparisonResult accumulatedOrder = EStringsIdentical; |
|
1032 const TText16* endOfLeft = 0; |
|
1033 for (int cur_level = 0; cur_level <= aMaxLevel; cur_level++) |
|
1034 { |
|
1035 itL.SetSourceIt(aLeft); |
|
1036 itR.SetSourceIt(aRight); |
|
1037 |
|
1038 for (;;) |
|
1039 { |
|
1040 TUint32 c2 = itR.GetNextNonZeroKey(cur_level); |
|
1041 if (c2 == 0) |
|
1042 { |
|
1043 TUint32 more = itL.GetNextNonZeroKey(cur_level); |
|
1044 if (cur_level == 0) |
|
1045 endOfLeft = itL.CurrentPositionIfAtCharacter(); |
|
1046 if (more == 0) |
|
1047 {//No non-zero keys at all |
|
1048 order = EStringsIdentical; |
|
1049 } |
|
1050 else if (!(TCollationMethod::EIgnoreCombining & iMethod.iFlags) |
|
1051 && itL.AtCombiningCharacter()) |
|
1052 { |
|
1053 order = ERightComparesLessAndIsNotPrefix; |
|
1054 } |
|
1055 else |
|
1056 { |
|
1057 order = ERightIsPrefixOfLeft; |
|
1058 } |
|
1059 break; |
|
1060 } |
|
1061 TUint32 c1 = itL.GetNextNonZeroKey(cur_level); |
|
1062 if (c1 == 0) |
|
1063 { |
|
1064 order = ELeftIsPrefixOfRight; |
|
1065 break; |
|
1066 } |
|
1067 |
|
1068 itL.Increment(); |
|
1069 if(cur_level == 0 && aEscapeChar != 0 && itR.MatchChar(aEscapeChar)) |
|
1070 {//Escape character found. Get the next key. |
|
1071 c2 = itR.GetNextNonZeroKey(cur_level); |
|
1072 itR.Increment(); |
|
1073 } |
|
1074 else |
|
1075 { |
|
1076 if(aRightStringWildChar && itR.MatchChar(aRightStringWildChar)) |
|
1077 { |
|
1078 itL.SkipCombiningCharacters(); |
|
1079 itR.SkipCombiningCharacters(); |
|
1080 c1 = c2; |
|
1081 } |
|
1082 else |
|
1083 { |
|
1084 itR.Increment(); |
|
1085 } |
|
1086 } |
|
1087 |
|
1088 // Has an order been determined by key difference? |
|
1089 if (c1 != c2) |
|
1090 { |
|
1091 // Fold to lower case, or switch ordering for case or kana syllabary if necessary. |
|
1092 if (cur_level == 2 && (c1 <= (0x14 * 4) && c2 <= (0x14 * 4))) |
|
1093 { |
|
1094 // Divide keys by 4 to get them back into the range 0..63 |
|
1095 // because keys returned by GetKey are masked but not shifted. |
|
1096 c1 /= 4; |
|
1097 c2 /= 4; |
|
1098 ProcessKeys(c1, c2, iMethod.iFlags); |
|
1099 } |
|
1100 if (c1 != c2) // test equality again because case folding might have made them equal |
|
1101 { |
|
1102 order = c1 > c2 ? ERightComparesLessAndIsNotPrefix : ELeftComparesLessAndIsNotPrefix; |
|
1103 TBool backwards = cur_level == 1 && (iMethod.iFlags & TCollationMethod::EAccentsBackwards); |
|
1104 if (order && !backwards) |
|
1105 { |
|
1106 break; |
|
1107 } |
|
1108 } |
|
1109 } |
|
1110 } |
|
1111 if (accumulatedOrder != order && order != EStringsIdentical) |
|
1112 { |
|
1113 if (accumulatedOrder == ERightIsPrefixOfLeft) |
|
1114 { |
|
1115 return ERightComparesLessAndIsNotPrefix; |
|
1116 } |
|
1117 else if (accumulatedOrder == ELeftIsPrefixOfRight) |
|
1118 { |
|
1119 return ELeftComparesLessAndIsNotPrefix; |
|
1120 } |
|
1121 else |
|
1122 { |
|
1123 // accumulatedOrder == EStringsIdentical |
|
1124 if (order == ELeftComparesLessAndIsNotPrefix || order == ERightComparesLessAndIsNotPrefix) |
|
1125 { |
|
1126 return order; |
|
1127 } |
|
1128 } |
|
1129 accumulatedOrder = order; |
|
1130 } |
|
1131 } |
|
1132 |
|
1133 if (accumulatedOrder == EStringsIdentical || accumulatedOrder == ERightIsPrefixOfLeft) |
|
1134 { |
|
1135 if (endOfLeft) |
|
1136 { |
|
1137 aLeft.SetStart(endOfLeft); |
|
1138 } |
|
1139 else if (accumulatedOrder == ERightIsPrefixOfLeft) |
|
1140 { |
|
1141 accumulatedOrder = ERightComparesLessAndIsNotPrefix; |
|
1142 } |
|
1143 } |
|
1144 return accumulatedOrder; |
|
1145 } |
|
1146 |
|
1147 /** |
|
1148 Finds search term inside candidate string. Returns KErrNotFound if there |
|
1149 is no match, returns the offset into the candidate string at which the |
|
1150 search term was found. If a string was found, the search term iterator is left |
|
1151 pointing at the end of the search term, and the candidate iterator is |
|
1152 left pointing just after the matched keys. aMatchPos returns where in |
|
1153 the candidate string the match was found. |
|
1154 |
|
1155 @internalComponent |
|
1156 */ |
|
1157 TInt TCollate::FindKeySequence(TUTF32Iterator& aCandidate, TUTF32Iterator& aSearchTerm, |
|
1158 TInt aMaxLevel, TInt aWildChar, TInt aEscapeChar, TInt& aLengthFound) const |
|
1159 { |
|
1160 TInt matchOffset = 0; |
|
1161 //Save the start of the candidate string |
|
1162 const TText* candidateStart = aCandidate.CurrentPosition(); |
|
1163 //Create copies of aCandidate and aSearchTerm |
|
1164 TUTF32Iterator candidateCopy(aCandidate); |
|
1165 TUTF32Iterator searchTermCopy(aSearchTerm); |
|
1166 aLengthFound = KErrNotFound; |
|
1167 //Do the search |
|
1168 for(;;) |
|
1169 { |
|
1170 TComparisonResult order = CompareKeySequences(aCandidate, aSearchTerm, aMaxLevel, aWildChar, aEscapeChar); |
|
1171 if(order == ELeftIsPrefixOfRight) |
|
1172 { |
|
1173 return KErrNotFound; |
|
1174 } |
|
1175 if(order == ERightIsPrefixOfLeft || order == EStringsIdentical) |
|
1176 { |
|
1177 aLengthFound = (aCandidate.CurrentPosition() - candidateStart) - matchOffset; |
|
1178 return matchOffset; |
|
1179 } |
|
1180 |
|
1181 aCandidate = candidateCopy; |
|
1182 aCandidate.Next(); |
|
1183 ::SkipCombiningCharacters(aCandidate); |
|
1184 candidateCopy = aCandidate; |
|
1185 |
|
1186 matchOffset = aCandidate.CurrentPosition() - candidateStart; |
|
1187 |
|
1188 aSearchTerm = searchTermCopy; |
|
1189 } |
|
1190 } |