|
1 /* |
|
2 * Copyright (c) 1999 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 |
|
20 /* |
|
21 |
|
22 Reads and parses the Unicode collation value table and writes out a C++ source file |
|
23 containing the data in a form that can be used by the EPOC collation system. |
|
24 |
|
25 The program reads three files: |
|
26 |
|
27 1. Base keys (maps single Unicode values to single collation key values): must be in the same format as |
|
28 basekeys.txt, supplied with the Standard Unicode Collation system |
|
29 |
|
30 2. Composite keys (maps single Unicode values to strings of collation keys): must be in the same format as |
|
31 compkeys.txt, supplied with the Standard Unicode Collation system |
|
32 |
|
33 3. Strings (maps strings of Unicode values to single collation keys OR strings of collation keys): must be in the |
|
34 same format as compkeys.txt, except that there can be any number of Unicode characters at the start of the line, |
|
35 space-separated and each exactly 4 hex digits. |
|
36 */ |
|
37 |
|
38 #include <assert.h> |
|
39 #include <ctype.h> |
|
40 #include <fstream.h> |
|
41 #include <stdlib.h> |
|
42 #include <string.h> |
|
43 #include <stdio.h> |
|
44 |
|
45 /* |
|
46 Constants constraining the range of level-1 and level-2 keys so that they can be packed. |
|
47 Non-zero values are reduced by one less than the minimum value. |
|
48 */ |
|
49 const unsigned int KLevel1Bits = 8; |
|
50 const unsigned int KLevel1Min = 0x20; |
|
51 const unsigned int KLevel1Max = KLevel1Min + (1 << KLevel1Bits) - 2; |
|
52 const unsigned int KLevel2Bits = 6; |
|
53 const unsigned int KLevel2Min = 1; |
|
54 const unsigned int KLevel2Max = KLevel2Min + (1 << KLevel2Bits) - 2; |
|
55 |
|
56 /* |
|
57 Table of characters in the WGL4 set, plus characters in canonical decompositions of |
|
58 those characters, plus commonly used control characters and space characters, |
|
59 given as ranges of Unicode characters. In each pair, the first code is the first in the range, |
|
60 and the second is the first code NOT in the range. |
|
61 |
|
62 The extra characters are added mainly to ensure that control characters and spaces are |
|
63 normally ignored. The extra characters are: |
|
64 |
|
65 0x0000-0x001F: ASCII control characters |
|
66 0x2000-0x2012: spaces, hyphen variants, figure dash |
|
67 0x2028-0x202E: line and paragraph separator, bidirectional control characters |
|
68 0xFEFF : byte-order mark |
|
69 0xFFFC-0xFFFD: object replacement character, replacement character |
|
70 */ |
|
71 const unsigned short Wgl4Range[] = |
|
72 { |
|
73 0x00, 0x7f, // All ASCII |
|
74 0xa0, 0x180, // Non-breaking space, Latin-1, Latin Extended-A |
|
75 0x192,0x193, // Latin f with hook |
|
76 0x1fa,0x200, // A-ring, a-ring, AE, ae, O slash, o slash all with acute accent |
|
77 0x2c6,0x2c8, // non-combining circumflex and caron |
|
78 0x2c9,0x2ca, // non-combining macron |
|
79 0x2d8,0x2dc, // non-combining breve, dot above, ring above, ogonek |
|
80 0x2dd,0x2de, // non-combining double acute |
|
81 0x300,0x305, // combining grave, acute, circumflex, tilde, macron |
|
82 0x306,0x309, // combining breve, dot above, double dot above |
|
83 0x30a,0x30e, // combining ring above, double acute, caron, vertical line above |
|
84 0x327,0x329, // combining cedilla, ogonek |
|
85 0x384,0x38b, // Greek |
|
86 0x38c,0x38d, // Greek |
|
87 0x38e,0x3a2, // Greek |
|
88 0x3a3,0x3cf, // Greek |
|
89 0x401,0x40d, // Cyrillic |
|
90 0x40e,0x450, // Cyrillic |
|
91 0x451,0x45d, // Cyrillic |
|
92 0x45e,0x460, // Cyrillic |
|
93 0x490,0x492, // Cyrillic |
|
94 0x1e80,0x1e86, // Both W and w with each of grave, acute and diaeresis |
|
95 0x1ef2,0x1ef4, // Y with grave, y with grave |
|
96 0x2000,0x2016, // various space and horizontal lines |
|
97 0x2017,0x201f, //double vertical line, double low line, various quotation marks |
|
98 0x2020,0x2023, // dagger, double dagger, bullet |
|
99 0x2026,0x2027, //ellipsis |
|
100 0x2028,0x202F, // line & paragraph separators and directional formatting |
|
101 0x2030,0x2031, // per mille |
|
102 0x2032,0x2034, // prime |
|
103 0x2039,0x203b, // single angle quotation marks |
|
104 0x203c,0x203d, // double exclamation mark |
|
105 0x203e,0x203f, // non-combining overscore |
|
106 0x2044,0x2045, // fraction slash |
|
107 0x207f,0x2080, // superscript n |
|
108 0x20a3,0x20a5, // French Franc, Italian/Turkish Lira |
|
109 0x20a7,0x20a8, // Spanish Peseta |
|
110 0x20ac,0x20ad, // Euro symbol |
|
111 0x2105,0x2106, // care of |
|
112 0x2113,0x2114, // script l |
|
113 0x2116,0x2117, // numero |
|
114 0x2122,0x2123, // trade mark |
|
115 0x2126,0x2127, // ohm |
|
116 0x212e,0x212f, // estimated (net weight) |
|
117 0x215b,0x215f, // 1/8, 3/8, 5/8, 7/8 |
|
118 0x2190,0x2196, // horizontal and vertical arrows |
|
119 0x21a8,0x21a9, // up down arrow with base |
|
120 0x2202,0x2203, // partial differential |
|
121 0x2206,0x2207, // increment (delta) |
|
122 0x220f,0x2210, // n-ary product (pi) |
|
123 0x2211,0x2213, // n-ary sum (sigma), minus |
|
124 0x2215,0x2216, // division (slash) |
|
125 0x2219,0x221b, // bullet operator, square root |
|
126 0x221e,0x2220, // infinity, right angle |
|
127 0x2229,0x222a, // intersection |
|
128 0x222b,0x222c, // union |
|
129 0x2248,0x2249, // almost equal to |
|
130 0x2260,0x2262, // not equal to, identical to |
|
131 0x2264,0x2266, // less-than-or-equal-to, greater-than-or-equal-to |
|
132 0x2302,0x2303, // house |
|
133 0x2310,0x2311, // rversed not sign |
|
134 0x2320,0x2322, // top and bottom of integral |
|
135 0x2500,0x2501, // box drawing |
|
136 0x2502,0x2503, // box drawing |
|
137 0x250c,0x250d, // box drawing |
|
138 0x2510,0x2511, // box drawing |
|
139 0x2514,0x2515, // box drawing |
|
140 0x2518,0x2519, // box drawing |
|
141 0x251c,0x251d, // box drawing |
|
142 0x2524,0x2525, // box drawing |
|
143 0x252c,0x252d, // box drawing |
|
144 0x2534,0x2535, // box drawing |
|
145 0x253c,0x253d, // box drawing |
|
146 0x2550,0x256d, // box drawing |
|
147 0x2580,0x2581, // block element |
|
148 0x2584,0x2585, // block element |
|
149 0x2588,0x2589, // block element |
|
150 0x258c,0x258d, // block element |
|
151 0x2590,0x2594, // block element |
|
152 0x25a0,0x25a2, // geometric shapes |
|
153 0x25aa,0x25ad, // geometric shapes |
|
154 0x25b2,0x25b3, // geometric shapes |
|
155 0x25ba,0x25bb, // geometric shapes |
|
156 0x25bc,0x25bd, // geometric shapes |
|
157 0x25c4,0x25c5, // geometric shapes |
|
158 0x25ca,0x25cc, // geometric shapes |
|
159 0x25cf,0x25d0, // geometric shapes |
|
160 0x25d8,0x25da, // geometric shapes |
|
161 0x25e6,0x25e7, // geometric shapes |
|
162 0x263a,0x263d, // smilies, sun |
|
163 0x2640,0x2641, // female |
|
164 0x2642,0x2643, // male |
|
165 0x2660,0x2661, // spade |
|
166 0x2663,0x2664, // club |
|
167 0x2665,0x2667, // heart |
|
168 0x266a,0x266c, // quaver, beamed quavers |
|
169 0xfb01,0xfb03, // fi, fl ligatures |
|
170 0xfeff,0xff00, // zero-width non-breaking space |
|
171 0xfffc, 0xfffe // object replacement character and replacement character |
|
172 }; |
|
173 const int Wgl4Ranges = sizeof(Wgl4Range) / sizeof(Wgl4Range[0]) / 2; |
|
174 |
|
175 int CompareWgl4Ranges(const void* aRange1,const void* aRange2) |
|
176 { |
|
177 unsigned short* p = (unsigned short*)aRange1; |
|
178 unsigned short* q = (unsigned short*)aRange2; |
|
179 if (q[0] == q[1]) |
|
180 { |
|
181 unsigned short* temp = p; |
|
182 p = q; |
|
183 q = temp; |
|
184 } |
|
185 if (*p < *q) |
|
186 return -1; |
|
187 else if (*p >= q[1]) |
|
188 return 1; |
|
189 else |
|
190 return 0; |
|
191 } |
|
192 |
|
193 // Determine if a character is in the WGL4 character repertoire. |
|
194 static bool InWgl4(unsigned short aChar) |
|
195 { |
|
196 unsigned short key[2]; |
|
197 key[0] = key[1] = aChar; |
|
198 return bsearch(key,Wgl4Range,Wgl4Ranges,sizeof(Wgl4Range[0]) * 2,CompareWgl4Ranges) != NULL; |
|
199 } |
|
200 |
|
201 // A collation key. |
|
202 class CollationKey |
|
203 { |
|
204 public: |
|
205 bool operator==(const CollationKey& k) const |
|
206 { return iLevel[0] == k.iLevel[0] && iLevel[1] == k.iLevel[1] && iLevel[2] == k.iLevel[2] && |
|
207 iIgnorable == k.iIgnorable && iStop == k.iStop; } |
|
208 |
|
209 enum |
|
210 { |
|
211 ELevels = 3 |
|
212 }; |
|
213 int iLevel[ELevels];// the keys at the various levels |
|
214 bool iIgnorable; // TRUE if this key can normally be ignored |
|
215 bool iStop; // TRUE if this is the last key in a string of keys |
|
216 }; |
|
217 |
|
218 // The collation index for a single Unicode value. |
|
219 class CollationIndex |
|
220 { |
|
221 public: |
|
222 static int Compare(const void* aIndex1,const void* aIndex2); |
|
223 |
|
224 int iCode; // Unicode value |
|
225 int iIndex; // index into the key table |
|
226 }; |
|
227 |
|
228 class Reader |
|
229 { |
|
230 public: |
|
231 Reader(bool aWgl4,bool aStandard,const char* aLocaleName); |
|
232 ~Reader(); |
|
233 void ReadBaseKeys(const char* aFileName); |
|
234 void ReadCompKeys(const char* aFileName); |
|
235 void ReadStrings(const char* aFileName); |
|
236 void WriteOutput(const char* aFileName); |
|
237 int CompareStringIndices(int aIndex1,int aIndex2) const; |
|
238 |
|
239 private: |
|
240 int Hex(const char *aString,bool aTolerate = false); |
|
241 void GetCollationKey(const char* aString,CollationKey* aKey = NULL); |
|
242 void GetMultipleCollationKeys(const char* aString); |
|
243 unsigned int PackKey(const CollationKey& aValue); |
|
244 unsigned int PackIndex(const CollationIndex& aValue); |
|
245 bool ParseLine(const char* aLine,int& aCode,int& aKeyStart); |
|
246 |
|
247 enum |
|
248 { |
|
249 EMaxCollationKeys = 65536, |
|
250 EMaxCollationIndices = 65536, |
|
251 EMaxStringElements = 65536, |
|
252 EMaxStringIndices = 65536 |
|
253 }; |
|
254 CollationKey iCollationKey[EMaxCollationKeys]; |
|
255 int iKeys; |
|
256 CollationIndex iCollationIndex[EMaxCollationIndices]; |
|
257 int iIndices; |
|
258 unsigned short iStringElement[EMaxStringElements]; |
|
259 int iStringElements; |
|
260 unsigned int iStringIndex[EMaxStringIndices]; |
|
261 int iStringIndices; |
|
262 const char* iInputFileName; |
|
263 int iLineNumber; |
|
264 bool iWgl4; // true if writing keys for wgl4 characters only |
|
265 bool iStandard; // true if reading standard files, not tailoring files |
|
266 const char* iLocaleName; |
|
267 }; |
|
268 |
|
269 void UsageError() |
|
270 { |
|
271 cout << "Usage: coltab <locale>\n"; |
|
272 cout << "For the locales 'standard' and 'wgl4' coltab reads basekeys.txt & compkeys.txt\n"; |
|
273 cout << "For any other locale name <name> coltab reads <name>_basekeys.txt,\n"; |
|
274 cout << "<name>_compkeys.txt and <name>_strings.txt.\n"; |
|
275 cout << "The output file is always ls_<name>.cpp."; |
|
276 exit(1); |
|
277 } |
|
278 |
|
279 int main(int argc,char** argv) |
|
280 { |
|
281 if (argc != 2) |
|
282 UsageError(); |
|
283 bool wgl4 = false; |
|
284 const char* prefix = ""; |
|
285 const char* infix = ""; |
|
286 const char* locale = ""; |
|
287 bool standard = false; |
|
288 if (!_stricmp(argv[1],"standard")) |
|
289 { |
|
290 locale = "Standard"; |
|
291 standard = true; |
|
292 } |
|
293 else if (!_stricmp(argv[1],"wgl4")) |
|
294 { |
|
295 locale = "Wgl4"; |
|
296 wgl4 = true; |
|
297 standard = true; |
|
298 } |
|
299 else |
|
300 { |
|
301 locale = prefix = argv[1]; |
|
302 infix = "_"; |
|
303 } |
|
304 |
|
305 Reader* reader = new Reader(wgl4,standard,locale); |
|
306 if (!reader) |
|
307 { |
|
308 cout << "out of memory\n"; |
|
309 exit(1); |
|
310 } |
|
311 char* filename = new char[strlen(prefix) + 64]; |
|
312 sprintf(filename,"%s%scompkeys.txt",prefix,infix); |
|
313 reader->ReadCompKeys(filename); |
|
314 if (!standard) |
|
315 { |
|
316 sprintf(filename,"%s%sstrings.txt",prefix,infix); |
|
317 reader->ReadStrings(filename); |
|
318 } |
|
319 sprintf(filename,"%s%sbasekeys.txt",prefix,infix); |
|
320 reader->ReadBaseKeys(filename); |
|
321 sprintf(filename,"ls_%s.cpp",argv[1]); |
|
322 reader->WriteOutput(filename); |
|
323 |
|
324 delete reader; |
|
325 delete [] filename; |
|
326 return 0; |
|
327 } |
|
328 |
|
329 Reader::Reader(bool aWgl4,bool aStandard,const char* aLocaleName): |
|
330 iKeys(0), |
|
331 iIndices(0), |
|
332 iStringElements(0), |
|
333 iStringIndices(0), |
|
334 iInputFileName(NULL), |
|
335 iLineNumber(0), |
|
336 iWgl4(aWgl4), |
|
337 iStandard(aStandard), |
|
338 iLocaleName(aLocaleName) |
|
339 { |
|
340 } |
|
341 |
|
342 Reader::~Reader() |
|
343 { |
|
344 } |
|
345 |
|
346 // Get a hex number of exactly four digits from aString. Return -1 if none is found and aTolerate is true. |
|
347 int Reader::Hex(const char *aString,bool aTolerate) |
|
348 { |
|
349 char *end; |
|
350 unsigned long x = strtoul(aString,&end,16); |
|
351 if (end != aString + 4) |
|
352 { |
|
353 if (!aTolerate) |
|
354 { |
|
355 cout << "bad hex number on line " << iLineNumber << " of file " << iInputFileName << '\n'; |
|
356 exit(1); |
|
357 } |
|
358 return -1; |
|
359 } |
|
360 return x; |
|
361 } |
|
362 |
|
363 // Get a collation value from a string of the form [.xxxx.xxxx.xxxx.xxxx] |
|
364 void Reader::GetCollationKey(const char* aString,CollationKey* aKey) |
|
365 { |
|
366 if (aString[0] != '[' || aString[21] != ']') |
|
367 { |
|
368 cout << "syntax error on line " << iLineNumber << " of file " << iInputFileName << '\n'; |
|
369 exit(1); |
|
370 } |
|
371 if (aKey == NULL) |
|
372 { |
|
373 if (iKeys >= EMaxCollationKeys) |
|
374 { |
|
375 cout << "too many keys"; |
|
376 exit(1); |
|
377 } |
|
378 aKey = &iCollationKey[iKeys++]; |
|
379 } |
|
380 aKey->iIgnorable = aString[1] == '*'; // asterisk means that this character is normally ignored |
|
381 for (int i = 0; i < CollationKey::ELevels; i++) |
|
382 aKey->iLevel[i] = Hex(aString + 2 + i * 5); |
|
383 |
|
384 if (aKey->iLevel[1] > 0 && (aKey->iLevel[1] < KLevel1Min || aKey->iLevel[1] > KLevel1Max)) |
|
385 { |
|
386 cout << "illegal level-1 key value on line " << iLineNumber << "; outside the range " << KLevel1Min << ".." << KLevel1Max; |
|
387 exit(1); |
|
388 } |
|
389 if (aKey->iLevel[2] > 0 && (aKey->iLevel[2] < KLevel2Min || aKey->iLevel[2] > KLevel2Max)) |
|
390 { |
|
391 cout << "illegal level-2 key value on line " << iLineNumber << "; outside the range " << KLevel2Min << ".." << KLevel2Max; |
|
392 exit(1); |
|
393 } |
|
394 |
|
395 aKey->iStop = true; |
|
396 } |
|
397 |
|
398 void Reader::GetMultipleCollationKeys(const char* aString) |
|
399 { |
|
400 while (aString[0] == '[') |
|
401 { |
|
402 GetCollationKey(aString); |
|
403 iCollationKey[iKeys - 1].iStop = false; |
|
404 if (strlen(aString) <= 23) |
|
405 break; |
|
406 aString += 23; |
|
407 } |
|
408 iCollationKey[iKeys - 1].iStop = true; |
|
409 } |
|
410 |
|
411 /* |
|
412 Partially parse a line, returning its key code and the start of its first block of key data. |
|
413 Return false if it is not a data line, or not relevant. |
|
414 */ |
|
415 bool Reader::ParseLine(const char* aLine,int& aCode,int& aKeyStart) |
|
416 { |
|
417 int line_length = strlen(aLine); |
|
418 aCode = Hex(aLine,true); |
|
419 |
|
420 /* |
|
421 A data line must start with a hex number and be at least 27 characters long. |
|
422 Canonically decomposable Unicode characters are skipped. |
|
423 Skip non-WGL4 characters if doing WGL4 only. |
|
424 */ |
|
425 if (aCode != -1) |
|
426 { |
|
427 if (line_length < 27 || |
|
428 !strcmp(aLine + line_length - 8,"CANONSEQ") || |
|
429 (iWgl4 && !InWgl4((unsigned short)aCode))) |
|
430 aCode = -1; |
|
431 } |
|
432 |
|
433 if (aCode != -1) |
|
434 { |
|
435 aKeyStart = 4; |
|
436 while (aKeyStart < line_length && aLine[aKeyStart] != '[') |
|
437 aKeyStart++; |
|
438 } |
|
439 |
|
440 return aCode != -1; |
|
441 } |
|
442 |
|
443 void Reader::ReadBaseKeys(const char* aFileName) |
|
444 { |
|
445 iLineNumber = 0; |
|
446 iInputFileName = aFileName; |
|
447 ifstream input_file; |
|
448 input_file.open(iInputFileName,ios::in | ios::nocreate); |
|
449 if (input_file.fail()) |
|
450 { |
|
451 cout << "cannot open input file '" << iInputFileName << "'\n"; |
|
452 exit(1); |
|
453 } |
|
454 cout << "reading base keys from '" << iInputFileName << "'\n"; |
|
455 |
|
456 char line[1024]; |
|
457 for (;;) |
|
458 { |
|
459 input_file.getline(line,sizeof(line)); |
|
460 if (input_file.eof()) |
|
461 break; |
|
462 iLineNumber++; |
|
463 if (iLineNumber % 100 == 0) |
|
464 { |
|
465 cout << "line " << iLineNumber << '\n'; |
|
466 cout.flush(); |
|
467 } |
|
468 int code = 0; |
|
469 int key_start = 0; |
|
470 if (ParseLine(line,code,key_start)) |
|
471 { |
|
472 if (iIndices >= EMaxCollationIndices) |
|
473 { |
|
474 cout << "too many Unicode values"; |
|
475 exit(1); |
|
476 } |
|
477 CollationIndex& index = iCollationIndex[iIndices++]; |
|
478 index.iCode = code; |
|
479 index.iIndex = -1; |
|
480 |
|
481 /* |
|
482 First try to find the key in the array of keys found so far. |
|
483 Search backwards to use the fact that runs of the same key occur together. |
|
484 */ |
|
485 CollationKey key; |
|
486 GetCollationKey(line + key_start,&key); |
|
487 for (int i = iKeys - 1; i >= 0 && index.iIndex == -1; i--) |
|
488 if (iCollationKey[i] == key) |
|
489 index.iIndex = i; |
|
490 |
|
491 // If that fails, add a new key. |
|
492 if (index.iIndex == -1) |
|
493 { |
|
494 index.iIndex = iKeys++; |
|
495 if (iKeys > EMaxCollationKeys) |
|
496 { |
|
497 cout << "too many keys"; |
|
498 exit(1); |
|
499 } |
|
500 iCollationKey[index.iIndex] = key; |
|
501 } |
|
502 } |
|
503 } |
|
504 |
|
505 input_file.close(); |
|
506 } |
|
507 |
|
508 void Reader::ReadCompKeys(const char* aFileName) |
|
509 { |
|
510 iLineNumber = 0; |
|
511 iInputFileName = aFileName; |
|
512 ifstream input_file; |
|
513 input_file.open(iInputFileName,ios::in | ios::nocreate); |
|
514 if (input_file.fail()) |
|
515 { |
|
516 cout << "there are no composite keys; '" << iInputFileName << "' not found\n"; |
|
517 return; |
|
518 } |
|
519 cout << "reading composite keys from '" << iInputFileName << "'\n"; |
|
520 |
|
521 char line[1024]; |
|
522 for (;;) |
|
523 { |
|
524 input_file.getline(line,sizeof(line)); |
|
525 if (input_file.eof()) |
|
526 break; |
|
527 iLineNumber++; |
|
528 if (iLineNumber % 100 == 0) |
|
529 { |
|
530 cout << "line " << iLineNumber << '\n'; |
|
531 cout.flush(); |
|
532 } |
|
533 int code = 0; |
|
534 int key_start = 0; |
|
535 if (ParseLine(line,code,key_start)) |
|
536 { |
|
537 if (iIndices >= EMaxCollationIndices) |
|
538 { |
|
539 cout << "too many Unicode values"; |
|
540 exit(1); |
|
541 } |
|
542 CollationIndex& index = iCollationIndex[iIndices++]; |
|
543 index.iCode = code; |
|
544 index.iIndex = iKeys; |
|
545 GetMultipleCollationKeys(line + key_start); |
|
546 } |
|
547 } |
|
548 |
|
549 input_file.close(); |
|
550 } |
|
551 |
|
552 |
|
553 void Reader::ReadStrings(const char* aFileName) |
|
554 { |
|
555 iLineNumber = 0; |
|
556 iInputFileName = aFileName; |
|
557 ifstream input_file; |
|
558 input_file.open(iInputFileName,ios::in | ios::nocreate); |
|
559 if (input_file.fail()) |
|
560 { |
|
561 cout << "there are no strings; '" << iInputFileName << "' not found\n"; |
|
562 return; |
|
563 } |
|
564 cout << "reading strings from '" << iInputFileName << "'\n"; |
|
565 |
|
566 char line[1024]; |
|
567 for (;;) |
|
568 { |
|
569 input_file.getline(line,sizeof(line)); |
|
570 if (input_file.eof()) |
|
571 break; |
|
572 iLineNumber++; |
|
573 if (iLineNumber % 100 == 0) |
|
574 { |
|
575 cout << "line " << iLineNumber << '\n'; |
|
576 cout.flush(); |
|
577 } |
|
578 int code = 0; |
|
579 int key_start = 0; |
|
580 if (ParseLine(line,code,key_start)) |
|
581 { |
|
582 // Store the index to the Unicode string and the key sequence. |
|
583 if (iStringIndices > EMaxStringIndices) |
|
584 { |
|
585 cout << "too many string indices"; |
|
586 exit(1); |
|
587 } |
|
588 iStringIndex[iStringIndices++] = (iStringElements << 16) | iKeys; |
|
589 |
|
590 // Reserve space for the length. |
|
591 if (iStringElements >= EMaxStringElements) |
|
592 { |
|
593 cout << "too many string elements"; |
|
594 exit(1); |
|
595 } |
|
596 iStringElements++; |
|
597 |
|
598 // Read the Unicode string. |
|
599 int index = 0; |
|
600 int length = 0; |
|
601 while (index < key_start) |
|
602 { |
|
603 if (iStringElements >= EMaxStringElements) |
|
604 { |
|
605 cout << "too many string elements"; |
|
606 exit(1); |
|
607 } |
|
608 iStringElement[iStringElements++] = (unsigned short)Hex(line + index); |
|
609 index += 5; |
|
610 length++; |
|
611 } |
|
612 iStringElement[iStringElements - length - 1] = (unsigned short)length; |
|
613 |
|
614 // Read the key sequence. |
|
615 GetMultipleCollationKeys(line + key_start); |
|
616 } |
|
617 } |
|
618 |
|
619 input_file.close(); |
|
620 } |
|
621 |
|
622 // Pack the 3 collation key levels into a single 32-bit integer. |
|
623 unsigned int Reader::PackKey(const CollationKey& aValue) |
|
624 { |
|
625 unsigned int level0 = aValue.iLevel[0]; |
|
626 unsigned int level1 = aValue.iLevel[1]; |
|
627 if (level1 > 0) |
|
628 level1 -= (KLevel1Min - 1); |
|
629 unsigned int level2 = aValue.iLevel[2]; |
|
630 if (level2 > 0) |
|
631 level2 -= (KLevel2Min - 1); |
|
632 unsigned int key = level0 << 16 | level1 << 8 | level2 << 2; |
|
633 if (aValue.iIgnorable) |
|
634 key |= 2; |
|
635 if (aValue.iStop) |
|
636 key |= 1; |
|
637 return key; |
|
638 } |
|
639 |
|
640 // Pack a collation index value into a single 32-bit integer. |
|
641 unsigned int Reader::PackIndex(const CollationIndex& aValue) |
|
642 { |
|
643 unsigned int code = aValue.iCode; |
|
644 unsigned int index = aValue.iIndex; |
|
645 return code << 16 | index; |
|
646 } |
|
647 |
|
648 const Reader* TheReader; |
|
649 static int CompareStringIndices(const void* aIndex1,const void* aIndex2) |
|
650 { |
|
651 return TheReader->CompareStringIndices(*(unsigned int*)aIndex1 >> 16,*(unsigned int*)aIndex2 >> 16); |
|
652 } |
|
653 |
|
654 int CompareUnicodeStrings(const unsigned short *aString1,int aLength1,const unsigned short *aString2,int aLength2) |
|
655 { |
|
656 for (int i = 0; i < aLength1 || i < aLength2; i++, aString1++, aString2++) |
|
657 { |
|
658 int x = i < aLength1 ? *aString1 : -1; |
|
659 int y = i < aLength2 ? *aString2 : -1; |
|
660 if (x != y) |
|
661 return x - y; |
|
662 } |
|
663 return 0; |
|
664 } |
|
665 |
|
666 int Reader::CompareStringIndices(int aIndex1,int aIndex2) const |
|
667 { |
|
668 return CompareUnicodeStrings(iStringElement + aIndex1 + 1,iStringElement[aIndex1], |
|
669 iStringElement + aIndex2 + 1,iStringElement[aIndex2]); |
|
670 } |
|
671 |
|
672 void Reader::WriteOutput(const char* aFileName) |
|
673 { |
|
674 ofstream output_file; |
|
675 output_file.open(aFileName); |
|
676 if (output_file.fail()) |
|
677 { |
|
678 cout << "cannot open output file '" << aFileName << "'\n"; |
|
679 exit(1); |
|
680 } |
|
681 cout << "writing output to '" << aFileName << "'\n"; |
|
682 |
|
683 char *locale = NULL; |
|
684 if (iStandard) |
|
685 locale = _strdup("Standard"); |
|
686 else |
|
687 locale = _strdup(iLocaleName); |
|
688 |
|
689 if (!iStandard) |
|
690 { |
|
691 _strupr(locale); |
|
692 output_file << "/*\nLS_" << locale << ".CPP\n\nCopyright (C) 2000 Symbian Ltd. All rights reserved.\n*/\n"; |
|
693 _strlwr(locale); |
|
694 locale[0] = (char)toupper(locale[0]); |
|
695 output_file << "\n/*\nThe LCharSet object used by the " << locale << " locale.\n"; |
|
696 output_file << "Generated by COLTAB.\n*/\n"; |
|
697 |
|
698 output_file << "\n#include \"ls_std.h\"\n#include <collate.h>\n"; |
|
699 output_file << "\nconst TUint KUid" << locale << "CollationMethod = /* FILL THIS IN */;\n"; |
|
700 } |
|
701 |
|
702 /* |
|
703 Write the unique collation keys. |
|
704 Each one has the format, going from highest to lowest bit: |
|
705 |
|
706 16 bits: level-0 key |
|
707 8 bits: level-1 key |
|
708 6 bits: level-2 key |
|
709 1 bit: set if this key is optionally ignorable |
|
710 1 bit: set if this is the last key in the string of keys for a single Unicode value |
|
711 |
|
712 */ |
|
713 if (iKeys != 0) |
|
714 { |
|
715 output_file << "\nstatic const TUint32 The" << locale << "Key[] = \n\t{"; |
|
716 CollationKey* ck = iCollationKey; |
|
717 output_file << hex; |
|
718 for (int i = 0; i < iKeys; i++, ck++) |
|
719 { |
|
720 unsigned int key = PackKey(*ck); |
|
721 if (i % 8 == 0) |
|
722 output_file << "\n\t"; |
|
723 output_file << "0x" << key << ","; |
|
724 } |
|
725 output_file << dec; |
|
726 output_file << "\n\t};\n\n"; |
|
727 } |
|
728 |
|
729 if (iIndices != 0) |
|
730 { |
|
731 // Sort then write the collation index values - these relate Unicode values to collation keys. |
|
732 qsort(iCollationIndex,iIndices,sizeof(CollationIndex),CollationIndex::Compare); |
|
733 output_file << "static const TUint32 The" << locale << "Index[] = \n\t{"; |
|
734 CollationIndex* ci = iCollationIndex; |
|
735 output_file << hex; |
|
736 for (int i = 0; i < iIndices; i++, ci++) |
|
737 { |
|
738 unsigned int key = PackIndex(*ci); |
|
739 if (i % 8 == 0) |
|
740 output_file << "\n\t"; |
|
741 output_file << "0x" << key << ","; |
|
742 } |
|
743 output_file << dec; |
|
744 output_file << "\n\t};\n\n"; |
|
745 } |
|
746 |
|
747 if (iStringElements) |
|
748 { |
|
749 // Write the Unicode strings; these are preceded by their lengths. |
|
750 output_file << "static const TUint16 The" << locale << "StringElement[] = \n\t{"; |
|
751 output_file << hex; |
|
752 for (int i = 0; i < iStringElements; i++) |
|
753 { |
|
754 if (i % 8 == 0) |
|
755 output_file << "\n\t"; |
|
756 output_file << "0x" << iStringElement[i] << ","; |
|
757 } |
|
758 output_file << dec; |
|
759 if (iStringElements==0) |
|
760 output_file << "0"; |
|
761 output_file << "\n\t};\n\n"; |
|
762 /* |
|
763 Sort then write the string index values - these relate Unicode strings to collation keys. |
|
764 Each one has the string index in the upper word and the key index in the lower word. |
|
765 */ |
|
766 TheReader = this; |
|
767 qsort(iStringIndex,iStringIndices,sizeof(iStringIndex[0]),::CompareStringIndices); |
|
768 output_file << "static const TUint32 The" << locale << "StringIndex[] = \n\t{"; |
|
769 output_file << hex; |
|
770 for (i = 0; i < iStringIndices; i++) |
|
771 { |
|
772 if (i % 8 == 0) |
|
773 output_file << "\n\t"; |
|
774 output_file << "0x" << iStringIndex[i] << ","; |
|
775 } |
|
776 output_file << dec; |
|
777 if (iStringIndices ==0) |
|
778 output_file << "0"; |
|
779 output_file << "\n\t};\n\n"; |
|
780 } |
|
781 |
|
782 // Write the collation table structure. |
|
783 output_file << "static const TCollationKeyTable The" << locale << "Table = \n\t{ "; |
|
784 if (iKeys) |
|
785 output_file << "The" << locale << "Key"; |
|
786 else |
|
787 output_file << "0"; |
|
788 if (iIndices) |
|
789 output_file << ", The" << locale << "Index, " << iIndices; |
|
790 else |
|
791 output_file << ", 0, 0"; |
|
792 if (iStringElements) |
|
793 output_file << ", The" << locale << "StringElement, The" << locale << "StringIndex, " << iStringIndices << " };\n"; |
|
794 else |
|
795 output_file << ", 0, 0, 0 };\n"; |
|
796 |
|
797 if (!iStandard) |
|
798 output_file << "\nstatic const TCollationMethod TheCollationMethod[] = \n"\ |
|
799 " {\n"\ |
|
800 " {\n"\ |
|
801 " KUid" << locale << "CollationMethod, // the method for the locale\n"\ |
|
802 " NULL, // use the standard table as the main table\n"\ |
|
803 " &The" << locale << "Table, // the locale values override the standard values\n"\ |
|
804 " 0 // the flags are standard\n"\ |
|
805 " },\n"\ |
|
806 " {\n"\ |
|
807 " KUidBasicCollationMethod, // the standard unlocalised method\n"\ |
|
808 " NULL, // null means use the standard table\n"\ |
|
809 " NULL, // there's no override table\n"\ |
|
810 " 0 // the flags are standard\n"\ |
|
811 " }\n"\ |
|
812 " };\n"\ |
|
813 "\n"\ |
|
814 "static const TCollationDataSet TheCollationDataSet =\n"\ |
|
815 " {\n"\ |
|
816 " TheCollationMethod,\n"\ |
|
817 " 2\n"\ |
|
818 " };"\ |
|
819 "\n\n"\ |
|
820 "// The one and only locale character set object.\n"\ |
|
821 "const LCharSet TheCharSet =\n"\ |
|
822 " {\n"\ |
|
823 " NULL,\n"\ |
|
824 " &TheCollationDataSet\n"\ |
|
825 " };\n"; |
|
826 |
|
827 output_file.close(); |
|
828 delete [] locale; |
|
829 } |
|
830 |
|
831 int CollationIndex::Compare(const void* aIndex1,const void* aIndex2) |
|
832 { |
|
833 return ((CollationIndex*)aIndex1)->iCode - ((CollationIndex*)aIndex2)->iCode; |
|
834 } |