|
1 // Copyright (c) 1999-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of the License "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // Reads and parses the Unicode collation value table and writes out a C++ source file |
|
15 // containing the data in a form that can be used by the EPOC collation system. |
|
16 // |
|
17 // The program reads three files or one compositive files: |
|
18 // |
|
19 // Three files (by default): |
|
20 // 1. Base keys (maps single Unicode values to single collation key values): must be in the same format as |
|
21 // basekeys.txt, supplied with the Standard Unicode Collation system |
|
22 // |
|
23 // 2. Composite keys (maps single Unicode values to strings of collation keys): must be in the same format as |
|
24 // compkeys.txt, supplied with the Standard Unicode Collation system |
|
25 // |
|
26 // 3. Strings (maps strings of Unicode values to single collation keys OR strings of collation keys): must be in the |
|
27 // same format as compkeys.txt, except that there can be any number of Unicode characters at the start of the line, |
|
28 // space-separated and each exactly 4 hex digits. |
|
29 // |
|
30 // One compositive files (with option /a): |
|
31 // 1. All Keys (combine above three files into one file): must be in the same format as allkeys.txt, supplied with the Standard Unicode Collation system (after Unicode 3.0). |
|
32 // |
|
33 // |
|
34 |
|
35 |
|
36 #include <assert.h> |
|
37 #include <ctype.h> |
|
38 |
|
39 #ifdef __MSVCDOTNET__ |
|
40 #include <fstream> |
|
41 #include <iostream> |
|
42 using namespace std; |
|
43 #else //!__MSVCDOTNET__ |
|
44 #include <fstream.h> |
|
45 #include <iostream.h> |
|
46 #endif //__MSVCDOTNET__ |
|
47 |
|
48 #include <stdlib.h> |
|
49 #include <string.h> |
|
50 #include <stdio.h> |
|
51 |
|
52 /* |
|
53 Constants constraining the range of level-1 and level-2 keys so that they can be packed. |
|
54 Non-zero values are reduced by one less than the minimum value. |
|
55 */ |
|
56 const unsigned int KLevel1Bits = 8; |
|
57 const unsigned int KLevel1Min = 0x20; |
|
58 const unsigned int KLevel1Max = KLevel1Min + (1 << KLevel1Bits) - 2; |
|
59 const unsigned int KLevel2Bits = 6; |
|
60 const unsigned int KLevel2Min = 1; |
|
61 const unsigned int KLevel2Max = KLevel2Min + (1 << KLevel2Bits) - 2; |
|
62 |
|
63 /* |
|
64 Table of characters in the WGL4 set, plus characters in canonical decompositions of |
|
65 those characters, plus commonly used control characters and space characters, |
|
66 given as ranges of Unicode characters. In each pair, the first code is the first in the range, |
|
67 and the second is the first code NOT in the range. |
|
68 |
|
69 The extra characters are added mainly to ensure that control characters and spaces are |
|
70 normally ignored. The extra characters are: |
|
71 |
|
72 0x0000-0x001F: ASCII control characters |
|
73 0x2000-0x2012: spaces, hyphen variants, figure dash |
|
74 0x2028-0x202E: line and paragraph separator, bidirectional control characters |
|
75 0xFEFF : byte-order mark |
|
76 0xFFFC-0xFFFD: object replacement character, replacement character |
|
77 */ |
|
78 const unsigned int Wgl4Range[] = |
|
79 { |
|
80 0x00, 0x7f, // All ASCII |
|
81 0xa0, 0x180, // Non-breaking space, Latin-1, Latin Extended-A |
|
82 0x192,0x193, // Latin f with hook |
|
83 0x1fa,0x200, // A-ring, a-ring, AE, ae, O slash, o slash all with acute accent |
|
84 0x2c6,0x2c8, // non-combining circumflex and caron |
|
85 0x2c9,0x2ca, // non-combining macron |
|
86 0x2d8,0x2dc, // non-combining breve, dot above, ring above, ogonek |
|
87 0x2dd,0x2de, // non-combining double acute |
|
88 0x300,0x305, // combining grave, acute, circumflex, tilde, macron |
|
89 0x306,0x309, // combining breve, dot above, double dot above |
|
90 0x30a,0x30e, // combining ring above, double acute, caron, vertical line above |
|
91 0x327,0x329, // combining cedilla, ogonek |
|
92 0x384,0x38b, // Greek |
|
93 0x38c,0x38d, // Greek |
|
94 0x38e,0x3a2, // Greek |
|
95 0x3a3,0x3cf, // Greek |
|
96 0x401,0x40d, // Cyrillic |
|
97 0x40e,0x450, // Cyrillic |
|
98 0x451,0x45d, // Cyrillic |
|
99 0x45e,0x460, // Cyrillic |
|
100 0x490,0x492, // Cyrillic |
|
101 0x1e80,0x1e86, // Both W and w with each of grave, acute and diaeresis |
|
102 0x1ef2,0x1ef4, // Y with grave, y with grave |
|
103 0x2000,0x2016, // various space and horizontal lines |
|
104 0x2017,0x201f, //double vertical line, double low line, various quotation marks |
|
105 0x2020,0x2023, // dagger, double dagger, bullet |
|
106 0x2026,0x2027, //ellipsis |
|
107 0x2028,0x202F, // line & paragraph separators and directional formatting |
|
108 0x2030,0x2031, // per mille |
|
109 0x2032,0x2034, // prime |
|
110 0x2039,0x203b, // single angle quotation marks |
|
111 0x203c,0x203d, // double exclamation mark |
|
112 0x203e,0x203f, // non-combining overscore |
|
113 0x2044,0x2045, // fraction slash |
|
114 0x207f,0x2080, // superscript n |
|
115 0x20a3,0x20a5, // French Franc, Italian/Turkish Lira |
|
116 0x20a7,0x20a8, // Spanish Peseta |
|
117 0x20ac,0x20ad, // Euro symbol |
|
118 0x2105,0x2106, // care of |
|
119 0x2113,0x2114, // script l |
|
120 0x2116,0x2117, // numero |
|
121 0x2122,0x2123, // trade mark |
|
122 0x2126,0x2127, // ohm |
|
123 0x212e,0x212f, // estimated (net weight) |
|
124 0x215b,0x215f, // 1/8, 3/8, 5/8, 7/8 |
|
125 0x2190,0x2196, // horizontal and vertical arrows |
|
126 0x21a8,0x21a9, // up down arrow with base |
|
127 0x2202,0x2203, // partial differential |
|
128 0x2206,0x2207, // increment (delta) |
|
129 0x220f,0x2210, // n-ary product (pi) |
|
130 0x2211,0x2213, // n-ary sum (sigma), minus |
|
131 0x2215,0x2216, // division (slash) |
|
132 0x2219,0x221b, // bullet operator, square root |
|
133 0x221e,0x2220, // infinity, right angle |
|
134 0x2229,0x222a, // intersection |
|
135 0x222b,0x222c, // union |
|
136 0x2248,0x2249, // almost equal to |
|
137 0x2260,0x2262, // not equal to, identical to |
|
138 0x2264,0x2266, // less-than-or-equal-to, greater-than-or-equal-to |
|
139 0x2302,0x2303, // house |
|
140 0x2310,0x2311, // rversed not sign |
|
141 0x2320,0x2322, // top and bottom of integral |
|
142 0x2500,0x2501, // box drawing |
|
143 0x2502,0x2503, // box drawing |
|
144 0x250c,0x250d, // box drawing |
|
145 0x2510,0x2511, // box drawing |
|
146 0x2514,0x2515, // box drawing |
|
147 0x2518,0x2519, // box drawing |
|
148 0x251c,0x251d, // box drawing |
|
149 0x2524,0x2525, // box drawing |
|
150 0x252c,0x252d, // box drawing |
|
151 0x2534,0x2535, // box drawing |
|
152 0x253c,0x253d, // box drawing |
|
153 0x2550,0x256d, // box drawing |
|
154 0x2580,0x2581, // block element |
|
155 0x2584,0x2585, // block element |
|
156 0x2588,0x2589, // block element |
|
157 0x258c,0x258d, // block element |
|
158 0x2590,0x2594, // block element |
|
159 0x25a0,0x25a2, // geometric shapes |
|
160 0x25aa,0x25ad, // geometric shapes |
|
161 0x25b2,0x25b3, // geometric shapes |
|
162 0x25ba,0x25bb, // geometric shapes |
|
163 0x25bc,0x25bd, // geometric shapes |
|
164 0x25c4,0x25c5, // geometric shapes |
|
165 0x25ca,0x25cc, // geometric shapes |
|
166 0x25cf,0x25d0, // geometric shapes |
|
167 0x25d8,0x25da, // geometric shapes |
|
168 0x25e6,0x25e7, // geometric shapes |
|
169 0x263a,0x263d, // smilies, sun |
|
170 0x2640,0x2641, // female |
|
171 0x2642,0x2643, // male |
|
172 0x2660,0x2661, // spade |
|
173 0x2663,0x2664, // club |
|
174 0x2665,0x2667, // heart |
|
175 0x266a,0x266c, // quaver, beamed quavers |
|
176 0xfb01,0xfb03, // fi, fl ligatures |
|
177 0xfeff,0xff00, // zero-width non-breaking space |
|
178 0xfffc, 0xfffe // object replacement character and replacement character |
|
179 }; |
|
180 const int Wgl4Ranges = sizeof(Wgl4Range) / sizeof(Wgl4Range[0]) / 2; |
|
181 |
|
182 int CompareWgl4Ranges(const void* aRange1,const void* aRange2) |
|
183 { |
|
184 unsigned int* p = (unsigned int*)aRange1; |
|
185 unsigned int* q = (unsigned int*)aRange2; |
|
186 if (q[0] == q[1]) |
|
187 { |
|
188 unsigned int* temp = p; |
|
189 p = q; |
|
190 q = temp; |
|
191 } |
|
192 if (*p < *q) |
|
193 return -1; |
|
194 else if (*p >= q[1]) |
|
195 return 1; |
|
196 else |
|
197 return 0; |
|
198 } |
|
199 |
|
200 // Determine if a character is in the WGL4 character repertoire. |
|
201 static bool InWgl4(unsigned int aChar) |
|
202 { |
|
203 unsigned int key[2]; |
|
204 key[0] = key[1] = aChar; |
|
205 return bsearch(key,Wgl4Range,Wgl4Ranges,sizeof(Wgl4Range[0]) * 2,CompareWgl4Ranges) != NULL; |
|
206 } |
|
207 |
|
208 // A collation key. |
|
209 class CollationKey |
|
210 { |
|
211 public: |
|
212 bool operator==(const CollationKey& k) const |
|
213 { return iLevel[0] == k.iLevel[0] && iLevel[1] == k.iLevel[1] && iLevel[2] == k.iLevel[2] && |
|
214 iIgnorable == k.iIgnorable && iStop == k.iStop; } |
|
215 |
|
216 enum |
|
217 { |
|
218 ELevels = 3 |
|
219 }; |
|
220 int iLevel[ELevels];// the keys at the various levels |
|
221 bool iIgnorable; // TRUE if this key can normally be ignored |
|
222 bool iStop; // TRUE if this is the last key in a string of keys |
|
223 }; |
|
224 |
|
225 // The collation index for a single Unicode value. |
|
226 class CollationIndex |
|
227 { |
|
228 public: |
|
229 static int Compare(const void* aIndex1,const void* aIndex2); |
|
230 |
|
231 int iCode; // Unicode value |
|
232 int iIndex; // index into the key table |
|
233 }; |
|
234 |
|
235 class Reader |
|
236 { |
|
237 public: |
|
238 Reader(bool aWgl4,bool aStandard,const char* aLocaleName, const char* aUidString); |
|
239 ~Reader(); |
|
240 void ReadBaseKeys(const char* aFileName); |
|
241 void ReadCompKeys(const char* aFileName); |
|
242 void ReadStrings(const char* aFileName); |
|
243 void ReadAllKeys(const char* aFileName); |
|
244 void WriteOutput(const char* aFileName, bool aCopyrightMessage); |
|
245 int CompareStringIndices(int aIndex1,int aIndex2) const; |
|
246 |
|
247 private: |
|
248 Reader(const Reader&); |
|
249 int Hex(const char *aString, int &aCharConsumed, bool aTolerate = false); |
|
250 void GetCollationKey(const char* aString, int& aCharConsumed, CollationKey* aKey=NULL); |
|
251 void GetMultipleCollationKeys(const char* aString); |
|
252 unsigned int PackKey(const CollationKey& aValue); |
|
253 int PackIndex(const CollationIndex& aValue, unsigned int result[2]); |
|
254 bool ParseLine(const char* aLine, int aCode[16], int& aCodeCount, int& aKeyStart, int& aKeyCount); |
|
255 void AddKeyOneToOne(const char* aLine, const int aCode, const int aKeyStart); |
|
256 void AddKeyOneToMuch(const char* aLine, const int aCode, const int aKeyStart); |
|
257 void AddKeyMuchToMuch(const char* aLine, const int aCode[16], const int aCodeCount, const int aKeyStart); |
|
258 |
|
259 enum |
|
260 { |
|
261 EMaxCollationKeys = 0x110000 * 2, /*more elements considering composite keys */ |
|
262 EMaxCollationIndices = 0x110000, |
|
263 EMaxStringElements = 65536, |
|
264 EMaxStringIndices = 65536 |
|
265 }; |
|
266 CollationKey iCollationKey[EMaxCollationKeys]; |
|
267 int iKeys; |
|
268 CollationIndex iCollationIndex[EMaxCollationIndices]; |
|
269 int iIndices; |
|
270 int iStringElement[EMaxStringElements]; |
|
271 int iStringElements; |
|
272 unsigned int iStringIndex[EMaxStringIndices]; |
|
273 int iStringIndices; |
|
274 const char* iInputFileName; |
|
275 int iLineNumber; |
|
276 bool iSuppressCanonseqWarning; // have we issued the canonseq warning yet? |
|
277 bool iWgl4; // true if writing keys for wgl4 characters only |
|
278 bool iStandard; // true if reading standard files, not tailoring files |
|
279 const char* iLocaleName; |
|
280 const char* iUidString; |
|
281 char* iCPlusPlusIdentifier; // iLocaleName in title case with difficult characters removed |
|
282 }; |
|
283 |
|
284 bool isValidHexDigit(char c) |
|
285 { |
|
286 if ('0' <= c && c <= '9') |
|
287 return true; |
|
288 if ('a' <= c && c <= 'f') |
|
289 return true; |
|
290 if ('A' <= c && c <= 'F') |
|
291 return true; |
|
292 return false; |
|
293 } |
|
294 |
|
295 void PrintUsage() |
|
296 { |
|
297 cout << "Usage: coltab [/u<uid>] [/c] [/a] [/h<topic>] <locale>\n"; |
|
298 cout << "By Default (without /a option), for the locales 'standard' and 'wgl4' coltab reads basekeys.txt & compkeys.txt\n"; |
|
299 cout << "For any other locale name <name> coltab reads <name>_basekeys.txt,\n"; |
|
300 cout << "<name>_compkeys.txt and <name>_strings.txt.\n"; |
|
301 cout << "Use the /a option, for the locales 'standard' and 'wgl4' coltab reads allkeys.txt\n"; |
|
302 cout << "For any other locale name <name> coltab reads <name>_allkeys.txt.\n"; |
|
303 cout << "The output file is always ls_<name>.cpp.\n"; |
|
304 cout << "Use the /u option to specify the UID that the collation table should have.\n"; |
|
305 cout << "A hex number must follow /u immediately, for example /u800ACBDE\n"; |
|
306 cout << "this hex number must not exceed eight digits. If this is not specified,\n"; |
|
307 cout << "the output file will have to be edited to make it compilable.\n"; |
|
308 cout << "Specify /c to prefix the output with a Nokia copyright message.\n"; |
|
309 cout << "Specify /h for in-depth help."; |
|
310 } |
|
311 |
|
312 void UsageError() |
|
313 { |
|
314 PrintUsage(); |
|
315 exit(1); |
|
316 } |
|
317 |
|
318 void PrintHelp(char* aTopic) |
|
319 { |
|
320 int topic = 0; |
|
321 while ('0' <= *aTopic && *aTopic <= '9') |
|
322 { |
|
323 topic = topic * 10 + (*aTopic - '0'); |
|
324 ++aTopic; |
|
325 } |
|
326 switch(topic) |
|
327 { |
|
328 case 1: |
|
329 cout << "How Coltab interprets CANONSEQ:\n\n"\ |
|
330 "If the CANONSEQ specifier is used in a line, Coltab will ignore the mapping.\n"\ |
|
331 "This because, on the Symbian platform, any canonically composed character is\n"\ |
|
332 "decomposed before the key mapping is applied, so characters with canonical\n"\ |
|
333 "decompositions do not need keys. In files supplied by the Unicode Consortium,\n"\ |
|
334 "all mappings for composed characters are flagged by CANONSEQ, so it is useful\n"\ |
|
335 "if Coltab can just ignore these so that Unicode Consortium files can be used\n"\ |
|
336 "unedited.\n\n"\ |
|
337 "This can cause problems if a localizer copies a line from a Unicode file into,\n"\ |
|
338 "say, the <lang>_strings.txt file, in order to give a mapping for an accented\n"\ |
|
339 "character. The localizer replaces the composed character code with the\n"\ |
|
340 "decomposition and changes the keys but forgets to remove the CANONSEQ\n"\ |
|
341 "specifier. In this case the key would be ignored. Coltab provides a warning so\n"\ |
|
342 "that this can be put right.\n\n"\ |
|
343 "Coltab will only warn about the first CANONSEQ in each file, and does not warn\n"\ |
|
344 "if the 'standard' or 'wgl4' options are used."; |
|
345 exit(1); |
|
346 break; |
|
347 case 2: |
|
348 cout << "How to ensure coltab's output files are compilable.\n\n"\ |
|
349 "By default, Coltab's files for locales need to be edited before they are\n"\ |
|
350 "compilable. The UID for the collation method needs to be filled in. This UID\n"\ |
|
351 "is added so that the collation table can be searched for later. At present,\n"\ |
|
352 "this UID is not necessary for the correct functioning of the Symbian platform\n"\ |
|
353 "and so a value of 0 can be safely used.\n\n"\ |
|
354 "To insert this value into the file directly, use the /u option, for example\n"\ |
|
355 "coltab /u0 french\n"\ |
|
356 "If the /u option is used, the file should be compilable as is. If it is not,\n"\ |
|
357 "please raise it as a defect with Symbian's internationalization team,\n"\ |
|
358 "supplying the files that caused the problem if this is possible.\n"\ |
|
359 "If the 'standard' or 'wgl4' options are used, no UID is output, so the /u\n"\ |
|
360 "option is not required."; |
|
361 exit(1); |
|
362 break; |
|
363 case 3: |
|
364 cout << "How to ensure collation key values are inside the supported range. \n\n"\ |
|
365 "According to Unicode Standard, the range suppored by tool COLTAB:\n"\ |
|
366 " Level 0 (primary): 0000 - FFFF, \n"\ |
|
367 " Level 1 (Secondary): 0020 - 011E, \n"\ |
|
368 " Level 2 (Tertiary): 0001 - 003F. \n"\ |
|
369 "Please edit your collation files and make sure key values are inside the above range"; |
|
370 exit(1); |
|
371 break; |
|
372 default: |
|
373 PrintUsage(); |
|
374 cout << "\n\nSpecify /h1 for help on the use of CANONSEQ\n"; |
|
375 cout << "Specify /h2 for help on making compilable files that do not need editing\n"; |
|
376 exit(1); |
|
377 break; |
|
378 } |
|
379 } |
|
380 |
|
381 short HighSurrogate(int aCode) |
|
382 { |
|
383 return static_cast<short>(0xD7C0 + (aCode >> 10)); |
|
384 } |
|
385 |
|
386 short LowSurrogate(int aCode) |
|
387 { |
|
388 return static_cast<short>(0xDC00 | (aCode & 0x3FF)); |
|
389 } |
|
390 |
|
391 int main(int argc,char** argv) |
|
392 { |
|
393 bool copyright = false; |
|
394 bool wgl4 = false; |
|
395 bool allKeys = false; |
|
396 const char* prefix = ""; |
|
397 const char* infix = ""; |
|
398 const char* locale = ""; |
|
399 char* localeArg = 0; |
|
400 char* uidArg = 0; |
|
401 for (int i = 1; i < argc; ++i) |
|
402 { |
|
403 if (argv[i][0] == '/' || argv[i][0] == '-') |
|
404 { |
|
405 switch (argv[i][1]) |
|
406 { |
|
407 case 'u': |
|
408 case 'U': |
|
409 { |
|
410 uidArg = argv[i] + 2; |
|
411 const char* uidCheck = uidArg; |
|
412 while (*uidCheck) |
|
413 { |
|
414 if (!isValidHexDigit(*uidCheck)) |
|
415 UsageError(); |
|
416 ++uidCheck; |
|
417 } |
|
418 if (uidCheck == uidArg || 8 < uidCheck - uidArg) |
|
419 UsageError(); |
|
420 break; |
|
421 } |
|
422 case 'c': |
|
423 case 'C': |
|
424 copyright = true; |
|
425 break; |
|
426 case 'a': |
|
427 allKeys = true; |
|
428 break; |
|
429 case 'h': |
|
430 case 'H': |
|
431 PrintHelp(argv[i] + 2); |
|
432 break; |
|
433 default: |
|
434 UsageError(); |
|
435 break; |
|
436 } |
|
437 } |
|
438 else if (!localeArg) |
|
439 localeArg = argv[i]; |
|
440 else |
|
441 UsageError(); |
|
442 } |
|
443 if (!localeArg) |
|
444 UsageError(); |
|
445 bool standard = false; |
|
446 if (!_stricmp(localeArg, "standard")) |
|
447 { |
|
448 locale = "Standard"; |
|
449 standard = true; |
|
450 } |
|
451 else if (!_stricmp(localeArg, "wgl4")) |
|
452 { |
|
453 locale = "Wgl4"; |
|
454 wgl4 = true; |
|
455 standard = true; |
|
456 } |
|
457 else |
|
458 { |
|
459 locale = prefix = localeArg; |
|
460 infix = "_"; |
|
461 } |
|
462 |
|
463 Reader* reader = new Reader(wgl4, standard, locale, uidArg); |
|
464 if (!reader) |
|
465 { |
|
466 cout << "out of memory\n"; |
|
467 exit(1); |
|
468 } |
|
469 char* filename = new char[strlen(prefix) + strlen(infix) + 64]; |
|
470 if (allKeys == false) |
|
471 { |
|
472 sprintf(filename,"%s%scompkeys.txt",prefix,infix); |
|
473 reader->ReadCompKeys(filename); |
|
474 if (!standard) |
|
475 { |
|
476 sprintf(filename,"%s%sstrings.txt",prefix,infix); |
|
477 reader->ReadStrings(filename); |
|
478 } |
|
479 sprintf(filename,"%s%sbasekeys.txt",prefix,infix); |
|
480 reader->ReadBaseKeys(filename); |
|
481 } |
|
482 else |
|
483 { |
|
484 sprintf(filename,"%s%sAllKeys.txt",prefix,infix); |
|
485 reader->ReadAllKeys(filename); |
|
486 } |
|
487 sprintf(filename,"ls_%s.cpp", localeArg); |
|
488 reader->WriteOutput(filename, copyright); |
|
489 |
|
490 delete reader; |
|
491 delete [] filename; |
|
492 return 0; |
|
493 } |
|
494 |
|
495 Reader::Reader(bool aWgl4, bool aStandard, |
|
496 const char* aLocaleName, const char* aUidString): |
|
497 iKeys(0), |
|
498 iIndices(0), |
|
499 iStringElements(0), |
|
500 iStringIndices(0), |
|
501 iInputFileName(NULL), |
|
502 iLineNumber(0), |
|
503 iSuppressCanonseqWarning(false), |
|
504 iWgl4(aWgl4), |
|
505 iStandard(aStandard), |
|
506 iLocaleName(aLocaleName), |
|
507 iUidString(aUidString) |
|
508 { |
|
509 if (iStandard) |
|
510 { |
|
511 iCPlusPlusIdentifier = new char[9]; |
|
512 strcpy(iCPlusPlusIdentifier, "Standard"); |
|
513 return; |
|
514 } |
|
515 char* p = iCPlusPlusIdentifier = new char[strlen(aLocaleName) + 2]; |
|
516 int current = toupper(aLocaleName[0]); |
|
517 if (current < 'A' || 'Z' < current) |
|
518 *p++ = 'C'; |
|
519 else |
|
520 { |
|
521 *p++ = static_cast<char>(current); |
|
522 ++aLocaleName; |
|
523 } |
|
524 bool inUnderScore = false; |
|
525 while (*aLocaleName) |
|
526 { |
|
527 current = tolower(*aLocaleName++); |
|
528 if (current < 'a' || 'z' < current) |
|
529 { |
|
530 if (!inUnderScore) |
|
531 { |
|
532 inUnderScore = true; |
|
533 *p++ = '_'; |
|
534 } |
|
535 } |
|
536 else |
|
537 { |
|
538 inUnderScore = false; |
|
539 *p++ = static_cast<char>(current); |
|
540 } |
|
541 } |
|
542 *p = 0; |
|
543 } |
|
544 |
|
545 Reader::~Reader() |
|
546 { |
|
547 delete [] iCPlusPlusIdentifier; |
|
548 } |
|
549 |
|
550 // Get a hex number of exactly four digits from aString. Return -1 if none is found and aTolerate is true. |
|
551 int Reader::Hex(const char *aString, int &aCharConsumed, bool aTolerate) |
|
552 { |
|
553 char *end; |
|
554 unsigned long x = strtoul(aString,&end,16); |
|
555 aCharConsumed = end - aString; |
|
556 if ((aCharConsumed != 4) && (aCharConsumed != 5) && (aCharConsumed != 6)) |
|
557 { |
|
558 if (!aTolerate) |
|
559 { |
|
560 cout << "bad hex number on line " << iLineNumber << " of file " << iInputFileName << '\n'; |
|
561 exit(1); |
|
562 } |
|
563 return -1; |
|
564 } |
|
565 return x; |
|
566 } |
|
567 |
|
568 // Get a collation value from a string of the form [.xxxx.xxxx.xxxx.xxxx] |
|
569 void Reader::GetCollationKey(const char* aString, int& aCharConsumed, CollationKey* aKey) |
|
570 { |
|
571 aCharConsumed = 0; |
|
572 const char *end = strchr(aString, ']'); |
|
573 if (end != NULL){ |
|
574 aCharConsumed = end - aString; |
|
575 } |
|
576 |
|
577 if (aString[0] != '[' || (aCharConsumed != 21 && aCharConsumed != 22 && aCharConsumed != 23)) |
|
578 { |
|
579 cout << "syntax error on line " << iLineNumber << " of file " << iInputFileName << '\n'; |
|
580 exit(1); |
|
581 } |
|
582 if (aKey == NULL) |
|
583 { |
|
584 if (iKeys >= EMaxCollationKeys) |
|
585 { |
|
586 cout << "too many keys"; |
|
587 exit(1); |
|
588 } |
|
589 aKey = &iCollationKey[iKeys++]; |
|
590 } |
|
591 aKey->iIgnorable = aString[1] == '*'; // asterisk means that this character is normally ignored |
|
592 int charConsumed = 0; |
|
593 for (int i = 0; i < CollationKey::ELevels; i++) |
|
594 aKey->iLevel[i] = Hex(aString + 2 + i * 5, charConsumed); |
|
595 |
|
596 if (aKey->iLevel[1] > 0 && (aKey->iLevel[1] < KLevel1Min || aKey->iLevel[1] > KLevel1Max)) |
|
597 { |
|
598 aKey->iLevel[1] = KLevel1Max; |
|
599 cout << "illegal level-1 key value on line " << iLineNumber << "; outside the range " << KLevel1Min << ".." << KLevel1Max << "\n"; |
|
600 cout << "Error: illegal key value in file, please see coltab /h3 for details.\n"; |
|
601 exit(1); |
|
602 } |
|
603 |
|
604 if (aKey->iLevel[2] > 0 && (aKey->iLevel[2] < KLevel2Min || aKey->iLevel[2] > KLevel2Max)) |
|
605 { |
|
606 cout << "illegal level-2 key value on line " << iLineNumber << "; outside the range " << KLevel2Min << ".." << KLevel2Max << "\n"; |
|
607 cout << "Error: illegal key value in file, please see coltab /h3 for details.\n"; |
|
608 exit(1); |
|
609 } |
|
610 |
|
611 aKey->iStop = true; |
|
612 } |
|
613 |
|
614 void Reader::GetMultipleCollationKeys(const char* aString) |
|
615 { |
|
616 int keyCount = 0; |
|
617 int charConsumed =0; |
|
618 while (aString[0] == '[') |
|
619 { |
|
620 GetCollationKey(aString, charConsumed); |
|
621 |
|
622 keyCount++; |
|
623 iCollationKey[iKeys - 1].iStop = false; |
|
624 int length = strlen(aString); |
|
625 if (length <= charConsumed + 1) |
|
626 break; |
|
627 aString += charConsumed + 1; |
|
628 |
|
629 if (aString[0] == ' ') //a space is put between collation keys in keys files provided by previous Unicode Standard (i.e 3.1) |
|
630 aString++; |
|
631 |
|
632 } |
|
633 iCollationKey[iKeys - 1].iStop = true; |
|
634 } |
|
635 |
|
636 /* |
|
637 Partially parse a line, returning its key code and the start of its first block of key data. |
|
638 Return false if it is not a data line, or not relevant. |
|
639 */ |
|
640 bool Reader::ParseLine(const char* aLine, int aCode[16], int& aCodeCount, int& aKeyStart, int& aKeyCount) |
|
641 { |
|
642 int lineLength = strlen(aLine); |
|
643 int charConsumed = 0; |
|
644 aCodeCount = 0; |
|
645 aCode[0] = Hex(aLine,charConsumed,true); |
|
646 |
|
647 /* |
|
648 A data line must start with a hex number and be at least 27 characters long. |
|
649 Canonically decomposable Unicode characters are skipped. |
|
650 Skip non-WGL4 characters if doing WGL4 only. |
|
651 */ |
|
652 if (aCode[0] != -1) |
|
653 { |
|
654 aCodeCount = 1; |
|
655 if (!strcmp(aLine + lineLength - 8,"CANONSEQ")) |
|
656 { |
|
657 if (!iSuppressCanonseqWarning) |
|
658 { |
|
659 cout << "Warning: CANONSEQ used in file " << iInputFileName |
|
660 << " on line " << iLineNumber << ".\nWarning: All mappings specifying CANONSEQ are ignored.\n" |
|
661 << "Warning: Use coltab /h1 for more details."; |
|
662 iSuppressCanonseqWarning = true; |
|
663 } |
|
664 aCodeCount = 0; |
|
665 } |
|
666 else if (lineLength < 27 || |
|
667 (iWgl4 && !InWgl4((unsigned int)aCode))) |
|
668 aCodeCount = 0; |
|
669 } |
|
670 |
|
671 if (aCode[0] != -1) |
|
672 { |
|
673 // find '[' |
|
674 aKeyStart = charConsumed; |
|
675 while (aKeyStart < lineLength && aLine[aKeyStart] != '[') |
|
676 aKeyStart++; |
|
677 |
|
678 // read all hex before '[' |
|
679 int index = charConsumed + 1; |
|
680 while (index < aKeyStart) |
|
681 { |
|
682 aCode[aCodeCount] = Hex(aLine+index, charConsumed, true); |
|
683 if (aCode[aCodeCount] == -1) |
|
684 break; |
|
685 |
|
686 index += charConsumed + 1; |
|
687 aCodeCount++; |
|
688 } |
|
689 |
|
690 // find number of collation keys |
|
691 aKeyCount = 0; |
|
692 index = aKeyStart; |
|
693 while (index < lineLength && aLine[index] != '%' && aLine[index] != '#') |
|
694 { |
|
695 if (aLine[index] == '[') |
|
696 aKeyCount++; |
|
697 index++; |
|
698 } |
|
699 } |
|
700 |
|
701 return aCodeCount > 0; |
|
702 } |
|
703 |
|
704 void Reader::AddKeyOneToOne(const char* aLine, const int aCode, const int aKeyStart) |
|
705 { |
|
706 if (iIndices >= EMaxCollationIndices) |
|
707 { |
|
708 cout << "too many Unicode values"; |
|
709 exit(1); |
|
710 } |
|
711 CollationIndex& index = iCollationIndex[iIndices++]; |
|
712 index.iCode = aCode; |
|
713 index.iIndex = -1; |
|
714 |
|
715 /* |
|
716 First try to find the key in the array of keys found so far. |
|
717 Search backwards to use the fact that runs of the same key occur together. |
|
718 */ |
|
719 CollationKey key; |
|
720 int charConsumed = 0; |
|
721 GetCollationKey(aLine + aKeyStart, charConsumed, &key); |
|
722 for (int i = iKeys - 1; i >= 0 && index.iIndex == -1; i--) |
|
723 if (iCollationKey[i] == key) |
|
724 index.iIndex = i; |
|
725 |
|
726 // If that fails, add a new key. |
|
727 if (index.iIndex == -1) |
|
728 { |
|
729 index.iIndex = iKeys++; |
|
730 if (iKeys > EMaxCollationKeys) |
|
731 { |
|
732 cout << "too many keys"; |
|
733 exit(1); |
|
734 } |
|
735 iCollationKey[index.iIndex] = key; |
|
736 } |
|
737 } |
|
738 /* |
|
739 Read 1-to-1 mapping. Sample: |
|
740 02B9 ; [*02A5.0020.0002.02B9] % MODIFIER LETTER PRIME |
|
741 |
|
742 aCombinedFile = true: aFileName is combined file, which contains base keys, comp keys, and string keys. |
|
743 */ |
|
744 void Reader::ReadBaseKeys(const char* aFileName) |
|
745 { |
|
746 iSuppressCanonseqWarning = iStandard || iWgl4; |
|
747 iLineNumber = 0; |
|
748 iInputFileName = aFileName; |
|
749 ifstream input_file; |
|
750 |
|
751 #ifdef __MSVCDOTNET__ |
|
752 input_file.open(iInputFileName, ios::in); |
|
753 #else //!__MSVCDOTNET__ |
|
754 input_file.open(iInputFileName, ios::in | ios::nocreate); |
|
755 #endif //__MSVCDOTNET__ |
|
756 |
|
757 if (input_file.fail()) |
|
758 { |
|
759 cout << "cannot open input file '" << iInputFileName << "'\n"; |
|
760 exit(1); |
|
761 } |
|
762 cout << "reading base keys from '" << iInputFileName << "'\n"; |
|
763 |
|
764 char line[1024]; |
|
765 for (;;) |
|
766 { |
|
767 input_file.getline(line,sizeof(line)); |
|
768 if (input_file.eof()) |
|
769 break; |
|
770 iLineNumber++; |
|
771 // line number counting |
|
772 if (iLineNumber % 100 == 0) |
|
773 { |
|
774 cout << "line " << iLineNumber << '\n'; |
|
775 cout.flush(); |
|
776 } |
|
777 int code[16]; |
|
778 int codeCount = 0; |
|
779 int key_start = 0; |
|
780 int keyCount = 0; |
|
781 if (ParseLine(line, code, codeCount, key_start, keyCount)) |
|
782 { |
|
783 if (codeCount != 1 || keyCount != 1) |
|
784 continue; // goto next line |
|
785 AddKeyOneToOne(line, code[0], key_start); |
|
786 } |
|
787 } |
|
788 |
|
789 input_file.close(); |
|
790 } |
|
791 |
|
792 void Reader::AddKeyOneToMuch(const char* aLine, const int aCode, const int aKeyStart) |
|
793 { |
|
794 if (iIndices >= EMaxCollationIndices) |
|
795 { |
|
796 cout << "too many Unicode values"; |
|
797 exit(1); |
|
798 } |
|
799 CollationIndex& index = iCollationIndex[iIndices++]; |
|
800 index.iCode = aCode; |
|
801 index.iIndex = iKeys; |
|
802 GetMultipleCollationKeys(aLine + aKeyStart); |
|
803 } |
|
804 /* |
|
805 Read 1-to-much mapping. |
|
806 3303 ; [.279F.0020.001C.3303][.1114.0020.001C.3303][.27C7.0020.001F.3303] # SQUARE AARU; QQKN |
|
807 */ |
|
808 void Reader::ReadCompKeys(const char* aFileName) |
|
809 { |
|
810 iSuppressCanonseqWarning = iStandard || iWgl4; |
|
811 iLineNumber = 0; |
|
812 iInputFileName = aFileName; |
|
813 ifstream input_file; |
|
814 |
|
815 #ifdef __MSVCDOTNET__ |
|
816 input_file.open(iInputFileName, ios::in); |
|
817 #else //!__MSVCDOTNET__ |
|
818 input_file.open(iInputFileName, ios::in | ios::nocreate); |
|
819 #endif //__MSVCDOTNET__ |
|
820 |
|
821 if (input_file.fail()) |
|
822 { |
|
823 cout << "there are no composite keys; '" << iInputFileName << "' not found\n"; |
|
824 return; |
|
825 } |
|
826 cout << "reading composite keys from '" << iInputFileName << "'\n"; |
|
827 |
|
828 char line[1024]; |
|
829 for (;;) |
|
830 { |
|
831 input_file.getline(line,sizeof(line)); |
|
832 if (input_file.eof()) |
|
833 break; |
|
834 iLineNumber++; |
|
835 // line number counting |
|
836 if (iLineNumber % 100 == 0) |
|
837 { |
|
838 cout << "line " << iLineNumber << '\n'; |
|
839 cout.flush(); |
|
840 } |
|
841 int code[16]; |
|
842 int codeCount = 0; |
|
843 int key_start = 0; |
|
844 int keyCount = 0; |
|
845 if (ParseLine(line, code, codeCount, key_start, keyCount)) |
|
846 { |
|
847 if (codeCount != 1 || keyCount < 2) |
|
848 continue; // goto next line |
|
849 AddKeyOneToMuch(line, code[0], key_start); |
|
850 } |
|
851 } |
|
852 |
|
853 input_file.close(); |
|
854 } |
|
855 |
|
856 |
|
857 void Reader::AddKeyMuchToMuch(const char* aLine, const int aCode[16], const int aCodeCount, const int aKeyStart) |
|
858 { |
|
859 |
|
860 // Store the index to the Unicode string and the key sequence. |
|
861 if (iStringIndices > EMaxStringIndices) |
|
862 { |
|
863 cout << "too many string indices"; |
|
864 exit(1); |
|
865 } |
|
866 iStringIndex[iStringIndices++] = (iStringElements << 16) | iKeys; |
|
867 |
|
868 // Reserve space for the length. |
|
869 if (iStringElements >= EMaxStringElements) |
|
870 { |
|
871 cout << "too many string elements"; |
|
872 exit(1); |
|
873 } |
|
874 iStringElements++; |
|
875 |
|
876 // Read the Unicode string. |
|
877 int length = 0; // in unit of int16 |
|
878 int charCount = 0; // in unit of char. for debug. |
|
879 |
|
880 for (int i=0; i<aCodeCount; i++) |
|
881 { |
|
882 if (iStringElements >= EMaxStringElements) |
|
883 { |
|
884 cout << "too many string elements"; |
|
885 exit(1); |
|
886 } |
|
887 |
|
888 if (aCode[i] > 0xFFFF) |
|
889 { |
|
890 // UCS4 --> UTF-16 |
|
891 iStringElement[iStringElements++] = 0xD7C0 + (aCode[i] >> 10); |
|
892 iStringElement[iStringElements++] = 0xDC00 | (aCode[i] & 0x3FF); |
|
893 length += 2; |
|
894 } |
|
895 else |
|
896 { |
|
897 iStringElement[iStringElements++] = aCode[i]; |
|
898 length++; |
|
899 } |
|
900 charCount++; |
|
901 } |
|
902 |
|
903 iStringElement[iStringElements - length - 1] = (unsigned int)length; |
|
904 |
|
905 // Read the key sequence. |
|
906 GetMultipleCollationKeys(aLine + aKeyStart); |
|
907 } |
|
908 /* |
|
909 Read much-to-much mapping. Sample: |
|
910 004F 0338 [.08EA.0020.0008.00D8] % capital O-stroke |
|
911 0E40 0E08 ; [.1E2B.0020.0002.0E08][.1E5E.0020.001F.0E40] # <THAI CHARACTER SARA E, THAI CHARACTER CHO CHAN> |
|
912 */ |
|
913 void Reader::ReadStrings(const char* aFileName) |
|
914 { |
|
915 iSuppressCanonseqWarning = iStandard || iWgl4; |
|
916 iLineNumber = 0; |
|
917 iInputFileName = aFileName; |
|
918 ifstream input_file; |
|
919 |
|
920 #ifdef __MSVCDOTNET__ |
|
921 input_file.open(iInputFileName, ios::in); |
|
922 #else //!__MSVCDOTNET__ |
|
923 input_file.open(iInputFileName, ios::in | ios::nocreate); |
|
924 #endif //__MSVCDOTNET__ |
|
925 |
|
926 if (input_file.fail()) |
|
927 { |
|
928 cout << "there are no strings; '" << iInputFileName << "' not found\n"; |
|
929 return; |
|
930 } |
|
931 cout << "reading strings from '" << iInputFileName << "'\n"; |
|
932 |
|
933 char line[1024]; |
|
934 for (;;) |
|
935 { |
|
936 input_file.getline(line,sizeof(line)); |
|
937 if (input_file.eof()) |
|
938 break; |
|
939 iLineNumber++; |
|
940 // line number counting |
|
941 if (iLineNumber % 100 == 0) |
|
942 { |
|
943 cout << "line " << iLineNumber << '\n'; |
|
944 cout.flush(); |
|
945 } |
|
946 int code[16]; |
|
947 int codeCount = 0; |
|
948 int key_start = 0; |
|
949 int keyCount = 0; |
|
950 if (ParseLine(line, code, codeCount, key_start, keyCount)) |
|
951 { |
|
952 if (codeCount < 2 || keyCount < 1) |
|
953 continue; // goto next line |
|
954 AddKeyMuchToMuch(line, code, codeCount, key_start); |
|
955 } |
|
956 } |
|
957 |
|
958 input_file.close(); |
|
959 } |
|
960 |
|
961 /* |
|
962 Read combined key table. Sample: |
|
963 1-to-1 mapping: |
|
964 02B9 ; [*02A5.0020.0002.02B9] % MODIFIER LETTER PRIME |
|
965 |
|
966 1-to-much mapping: |
|
967 3303 ; [.279F.0020.001C.3303][.1114.0020.001C.3303][.27C7.0020.001F.3303] # SQUARE AARU; QQKN |
|
968 |
|
969 much-to-much mapping: |
|
970 004F 0338 [.08EA.0020.0008.00D8] % capital O-stroke |
|
971 0E40 0E08 ; [.1E2B.0020.0002.0E08][.1E5E.0020.001F.0E40] # <THAI CHARACTER SARA E, THAI CHARACTER CHO CHAN> |
|
972 */ |
|
973 void Reader::ReadAllKeys(const char* aFileName) |
|
974 { |
|
975 iSuppressCanonseqWarning = iStandard || iWgl4; |
|
976 iLineNumber = 0; |
|
977 iInputFileName = aFileName; |
|
978 ifstream input_file; |
|
979 |
|
980 #ifdef __MSVCDOTNET__ |
|
981 input_file.open(iInputFileName, ios::in); |
|
982 #else //!__MSVCDOTNET__ |
|
983 input_file.open(iInputFileName, ios::in | ios::nocreate); |
|
984 #endif //__MSVCDOTNET__ |
|
985 |
|
986 if (input_file.fail()) |
|
987 { |
|
988 cout << "there are no keys; '" << iInputFileName << "' not found\n"; |
|
989 return; |
|
990 } |
|
991 cout << "reading all keys from '" << iInputFileName << "'\n"; |
|
992 |
|
993 char line[1024]; |
|
994 for (;;) |
|
995 { |
|
996 if (input_file.eof()) |
|
997 break; |
|
998 input_file.getline(line,sizeof(line)); |
|
999 iLineNumber++; |
|
1000 |
|
1001 int code[16]; |
|
1002 int codeCount = 0; |
|
1003 int key_start = 0; |
|
1004 int keyCount = 0; |
|
1005 if (ParseLine(line, code, codeCount, key_start, keyCount)) |
|
1006 { |
|
1007 if (codeCount == 1 && keyCount == 1) |
|
1008 AddKeyOneToOne(line, code[0], key_start); |
|
1009 else if (codeCount == 1 && keyCount > 1) |
|
1010 AddKeyOneToMuch(line, code[0], key_start); |
|
1011 else if (codeCount > 1 && keyCount > 0) |
|
1012 AddKeyMuchToMuch(line, code, codeCount, key_start); |
|
1013 else |
|
1014 cout << "ignore line: " << line << "\n"; |
|
1015 } |
|
1016 } |
|
1017 |
|
1018 input_file.close(); |
|
1019 } |
|
1020 |
|
1021 |
|
1022 // Pack the 3 collation key levels into a single 32-bit integer. |
|
1023 unsigned int Reader::PackKey(const CollationKey& aValue) |
|
1024 { |
|
1025 unsigned int level0 = aValue.iLevel[0]; |
|
1026 unsigned int level1 = aValue.iLevel[1]; |
|
1027 if (level1 > 0) |
|
1028 level1 -= (KLevel1Min - 1); |
|
1029 unsigned int level2 = aValue.iLevel[2]; |
|
1030 if (level2 > 0) |
|
1031 level2 -= (KLevel2Min - 1); |
|
1032 unsigned int key = level0 << 16 | level1 << 8 | level2 << 2; |
|
1033 if (aValue.iIgnorable) |
|
1034 key |= 2; |
|
1035 if (aValue.iStop) |
|
1036 key |= 1; |
|
1037 return key; |
|
1038 } |
|
1039 |
|
1040 // Pack a collation index value into a single 32-bit integer. |
|
1041 int Reader::PackIndex(const CollationIndex& aValue, unsigned int result[2]) |
|
1042 { |
|
1043 unsigned int code = aValue.iCode; |
|
1044 unsigned int index = aValue.iIndex; |
|
1045 if (code <= 0xFFFF) |
|
1046 { |
|
1047 result[0] = (code << 16 | index); |
|
1048 return 1; |
|
1049 } |
|
1050 else |
|
1051 { |
|
1052 result[0] = (::HighSurrogate(code) << 16 | index); |
|
1053 result[1] = (::LowSurrogate(code) << 16 | index); |
|
1054 return 2; |
|
1055 } |
|
1056 } |
|
1057 |
|
1058 const Reader* TheReader; |
|
1059 static int CompareStringIndices(const void* aIndex1,const void* aIndex2) |
|
1060 { |
|
1061 return TheReader->CompareStringIndices(*(unsigned int*)aIndex1 >> 16,*(unsigned int*)aIndex2 >> 16); |
|
1062 } |
|
1063 |
|
1064 int CompareUnicodeStrings(const int *aString1,int aLength1,const int *aString2,int aLength2) |
|
1065 { |
|
1066 for (int i = 0; i < aLength1 || i < aLength2; i++, aString1++, aString2++) |
|
1067 { |
|
1068 int x = i < aLength1 ? *aString1 : -1; |
|
1069 int y = i < aLength2 ? *aString2 : -1; |
|
1070 if (x != y) |
|
1071 return x - y; |
|
1072 } |
|
1073 return 0; |
|
1074 } |
|
1075 |
|
1076 int Reader::CompareStringIndices(int aIndex1,int aIndex2) const |
|
1077 { |
|
1078 return CompareUnicodeStrings(iStringElement + aIndex1 + 1,iStringElement[aIndex1], |
|
1079 iStringElement + aIndex2 + 1,iStringElement[aIndex2]); |
|
1080 } |
|
1081 |
|
1082 void Reader::WriteOutput(const char* aFileName, bool aCopyright) |
|
1083 { |
|
1084 int i; |
|
1085 ofstream output_file; |
|
1086 output_file.open(aFileName); |
|
1087 if (output_file.fail()) |
|
1088 { |
|
1089 cout << "cannot open output file '" << aFileName << "'\n"; |
|
1090 exit(1); |
|
1091 } |
|
1092 cout << "writing output to '" << aFileName << "'\n"; |
|
1093 |
|
1094 char *locale = NULL; |
|
1095 if (iStandard) |
|
1096 locale = _strdup("Standard"); |
|
1097 else |
|
1098 locale = _strdup(iLocaleName); |
|
1099 |
|
1100 if (!iStandard) |
|
1101 { |
|
1102 _strlwr(locale); |
|
1103 locale[0] = (char)toupper(locale[0]); |
|
1104 if (aCopyright) |
|
1105 { |
|
1106 char* capsFileName = new char[strlen(aFileName) + 1]; |
|
1107 strcpy(capsFileName, aFileName); |
|
1108 _strupr(capsFileName); |
|
1109 output_file << "/*\n" << capsFileName << "\n\nCopyright (C) 2000-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.\n*/\n"; |
|
1110 delete [] capsFileName; |
|
1111 output_file << "\n/*\nThe LCharSet object used by the " << locale << " locale.\n"; |
|
1112 output_file << "Generated by COLTAB.\n*/\n"; |
|
1113 } |
|
1114 |
|
1115 output_file << "\n#include \"ls_std.h\"\n#include <collate.h>\n"; |
|
1116 output_file << "\nconst TUint KUid" << iCPlusPlusIdentifier << "CollationMethod = "; |
|
1117 if (iUidString) |
|
1118 output_file << "0x" << iUidString << ";\n"; |
|
1119 else |
|
1120 { |
|
1121 output_file << "/* FILL THIS IN */;\n"; |
|
1122 cout << "Warning: File will need editing\nWarning: see coltab /h2 for details.\n"; |
|
1123 } |
|
1124 } |
|
1125 |
|
1126 /* |
|
1127 Write the unique collation keys. |
|
1128 Each one has the format, going from highest to lowest bit: |
|
1129 |
|
1130 16 bits: level-0 key |
|
1131 8 bits: level-1 key |
|
1132 6 bits: level-2 key |
|
1133 1 bit: set if this key is optionally ignorable |
|
1134 1 bit: set if this is the last key in the string of keys for a single Unicode value |
|
1135 |
|
1136 */ |
|
1137 if (iKeys != 0) |
|
1138 { |
|
1139 output_file << "\nstatic const TUint32 The" << iCPlusPlusIdentifier << "Key[] = \n\t{"; |
|
1140 CollationKey* ck = iCollationKey; |
|
1141 output_file << "\t // " << iKeys << " keys"; |
|
1142 output_file << hex; |
|
1143 for (i = 0; i < iKeys; i++, ck++) |
|
1144 { |
|
1145 unsigned int key = PackKey(*ck); |
|
1146 if (i % 8 == 0) |
|
1147 output_file << "\n\t"; |
|
1148 output_file << "0x"; |
|
1149 output_file << key << ","; |
|
1150 } |
|
1151 output_file << dec; |
|
1152 output_file << "\n\t};\n\n"; |
|
1153 } |
|
1154 |
|
1155 if (iIndices != 0) |
|
1156 { |
|
1157 // Sort then write the collation index values - these relate Unicode values to collation keys. |
|
1158 qsort(iCollationIndex,iIndices,sizeof(CollationIndex),CollationIndex::Compare); |
|
1159 output_file << "static const TUint32 The" << iCPlusPlusIdentifier << "Index[] = \n\t{"; |
|
1160 CollationIndex* ci = iCollationIndex; |
|
1161 int entry=0; |
|
1162 output_file << "\t // " << iIndices << " indices"; |
|
1163 output_file << hex; |
|
1164 for (i = 0; i < iIndices; i++, ci++, entry++) |
|
1165 { |
|
1166 unsigned int key[2]; |
|
1167 int bytecount = PackIndex(*ci, key); |
|
1168 |
|
1169 if (entry % 8 == 0) |
|
1170 output_file << "\n\t"; |
|
1171 output_file << "0x"; |
|
1172 output_file << key[0] << ","; |
|
1173 |
|
1174 if (bytecount == 2) |
|
1175 { |
|
1176 entry++; |
|
1177 if (entry % 8 == 0) |
|
1178 output_file << "\n\t"; |
|
1179 output_file << "0x"; |
|
1180 output_file << key[1] << ","; |
|
1181 } |
|
1182 } |
|
1183 output_file << dec; |
|
1184 output_file << "\n\t};"; |
|
1185 output_file << "\t // " << entry << " entries"; |
|
1186 output_file << "\n\n"; |
|
1187 iIndices = entry; //One surrogate pair occupies 2 entries |
|
1188 } |
|
1189 |
|
1190 if (iStringElements) |
|
1191 { |
|
1192 // Write the Unicode strings; these are preceded by their lengths. |
|
1193 output_file << "static const TUint16 The" << iCPlusPlusIdentifier << "StringElement[] = \n\t{"; |
|
1194 output_file << hex; |
|
1195 for (i = 0; i < iStringElements; i++) |
|
1196 { |
|
1197 if (i % 8 == 0) |
|
1198 output_file << "\n\t"; |
|
1199 output_file << "0x" << iStringElement[i] << ","; |
|
1200 } |
|
1201 output_file << dec; |
|
1202 if (iStringElements==0) |
|
1203 output_file << "0"; |
|
1204 output_file << "\n\t};\n\n"; |
|
1205 |
|
1206 /* |
|
1207 Sort then write the string index values - these relate Unicode strings to collation keys. |
|
1208 Each one has the string index in the upper word and the key index in the lower word. |
|
1209 */ |
|
1210 TheReader = this; |
|
1211 qsort(iStringIndex,iStringIndices,sizeof(iStringIndex[0]),::CompareStringIndices); |
|
1212 output_file << "static const TUint32 The" << iCPlusPlusIdentifier << "StringIndex[] = \n\t{"; |
|
1213 output_file << hex; |
|
1214 for (i = 0; i < iStringIndices; i++) |
|
1215 { |
|
1216 if (i % 8 == 0) |
|
1217 output_file << "\n\t"; |
|
1218 output_file << "0x" << iStringIndex[i] << ","; |
|
1219 } |
|
1220 output_file << dec; |
|
1221 if (iStringIndices ==0) |
|
1222 output_file << "0"; |
|
1223 output_file << "\n\t};\n\n"; |
|
1224 } |
|
1225 |
|
1226 // Write the collation table structure. |
|
1227 output_file << "static const TCollationKeyTable The" << iCPlusPlusIdentifier << "Table = \n\t{ "; |
|
1228 if (iKeys) |
|
1229 output_file << "The" << iCPlusPlusIdentifier << "Key"; |
|
1230 else |
|
1231 output_file << "0"; |
|
1232 if (iIndices) |
|
1233 output_file << ", The" << iCPlusPlusIdentifier << "Index, " << iIndices; |
|
1234 else |
|
1235 output_file << ", 0, 0"; |
|
1236 if (iStringElements) |
|
1237 output_file << ", The" << iCPlusPlusIdentifier << "StringElement, The" << iCPlusPlusIdentifier << "StringIndex, " << iStringIndices << " };\n"; |
|
1238 else |
|
1239 output_file << ", 0, 0, 0 };\n"; |
|
1240 |
|
1241 if (!iStandard) |
|
1242 output_file << "\nstatic const TCollationMethod TheCollationMethod[] = \n"\ |
|
1243 " {\n"\ |
|
1244 " {\n"\ |
|
1245 " KUid" << iCPlusPlusIdentifier << "CollationMethod, // the method for the locale\n"\ |
|
1246 " NULL, // use the standard table as the main table\n"\ |
|
1247 " &The" << iCPlusPlusIdentifier << "Table, // the locale values override the standard values\n"\ |
|
1248 " 0 // the flags are standard\n"\ |
|
1249 " },\n"\ |
|
1250 " {\n"\ |
|
1251 " KUidBasicCollationMethod, // the standard unlocalised method\n"\ |
|
1252 " NULL, // null means use the standard table\n"\ |
|
1253 " NULL, // there's no override table\n"\ |
|
1254 " 0 // the flags are standard\n"\ |
|
1255 " }\n"\ |
|
1256 " };\n"\ |
|
1257 "\n"\ |
|
1258 "static const TCollationDataSet TheCollationDataSet =\n"\ |
|
1259 " {\n"\ |
|
1260 " TheCollationMethod,\n"\ |
|
1261 " 2\n"\ |
|
1262 " };"\ |
|
1263 "\n\n"\ |
|
1264 "// The one and only locale character set object.\n"\ |
|
1265 "const LCharSet TheCharSet =\n"\ |
|
1266 " {\n"\ |
|
1267 " NULL,\n"\ |
|
1268 " &TheCollationDataSet\n"\ |
|
1269 " };\n"; |
|
1270 |
|
1271 output_file.close(); |
|
1272 delete [] locale; |
|
1273 } |
|
1274 |
|
1275 int CollationIndex::Compare(const void* aIndex1,const void* aIndex2) |
|
1276 { |
|
1277 return ((CollationIndex*)aIndex1)->iCode - ((CollationIndex*)aIndex2)->iCode; |
|
1278 } |