|
1 /* |
|
2 * Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: GB18030 converter implementation |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 #include <e32std.h> |
|
20 #include <charconv.h> |
|
21 #include <convutils.h> |
|
22 #include <convdata.h> |
|
23 #include "gb2312.h" |
|
24 #include "gbk.h" |
|
25 #include "gb18030_4byte.h" |
|
26 #include "gb18030_diff_gbk.h" |
|
27 #include <ecom/implementationproxy.h> |
|
28 #include <charactersetconverter.h> |
|
29 |
|
30 class CGB18030ConverterImpl : public CCharacterSetConverterPluginInterface |
|
31 { |
|
32 |
|
33 public: |
|
34 virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters(); |
|
35 |
|
36 virtual TInt ConvertFromUnicode( |
|
37 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
38 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, |
|
39 TDes8& aForeign, |
|
40 const TDesC16& aUnicode, |
|
41 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters); |
|
42 |
|
43 virtual TInt ConvertToUnicode( |
|
44 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
45 TDes16& aUnicode, |
|
46 const TDesC8& aForeign, |
|
47 TInt& aState, |
|
48 TInt& aNumberOfUnconvertibleCharacters, |
|
49 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter); |
|
50 |
|
51 virtual TBool IsInThisCharacterSetL( |
|
52 TBool& aSetToTrue, |
|
53 TInt& aConfidenceLevel, |
|
54 const TDesC8& aSample); |
|
55 |
|
56 static CGB18030ConverterImpl* NewL(); |
|
57 virtual ~CGB18030ConverterImpl(); |
|
58 |
|
59 private: |
|
60 CGB18030ConverterImpl(); |
|
61 TInt ConstructL(); |
|
62 SCnvConversionData * completeGb18030_2byteConversionData; // a merged conversion data including Gb18030-diff-g2312, GB2312, Gb18030-diff-gbk and Gbk |
|
63 TUint8 * workingMemory; |
|
64 }; |
|
65 |
|
66 // Implement gb18030 plug-in using cnvutils framework in which gb2312 and gbk conversion data is re-used for memory saving |
|
67 // 1) foreign->unicode: |
|
68 // 1.1) 1 byte->unicode bmp: use gb2312 mapping table; |
|
69 // |
|
70 // 1.2) 2 byte->unicode bmp: use gb18030-2byte mapping table (gb18030_diff_gb2312ConversionData + gb2312ConversionData + gb18030_diff_gbkConversionData + gbkConversionData); |
|
71 // |
|
72 // 1.3) 4 byte->unicode bmp: use gb18030-4byte mapping table; |
|
73 // |
|
74 // 1.4) 4 byte->unicode non-bmp: calculate with formula. |
|
75 // |
|
76 // 2) unicode->foreign: |
|
77 // 2.1) firstly check gb18030-2byte mapping table (gb18030_diff_gb2312ConversionData + gb2312ConversionData + gb18030_diff_gbkConversionData + gbkConversionData); |
|
78 // |
|
79 // 2.2) if not found in 2.1), check gb18030-4byte mapping table; |
|
80 // |
|
81 // 2.3) if not found in 2.2), calculate with formula (gb18030-4byte non BMP); |
|
82 // |
|
83 |
|
84 |
|
85 // GB18030-diff-gb2312 defines 1 foreign-to-Unicode range and 2 unicode-to-Foreign range |
|
86 // GB2312.CTL defines 21 foreign-to-Unicode ranges and 21 Unicode-to-foreign ranges |
|
87 // GB18030-diff-gbk defines 1 foreign-to-Unicode ranges and 2 Unicode-to-foreign range |
|
88 // GBK.CTL defines 2 foreign-to-Unicode ranges and 2 Unicode-to-foreign range |
|
89 const TInt KNumberOfBytesOfWorkingMemory=(1+2+21+21+1+2+2+2)*sizeof(SCnvConversionData::SOneDirectionData::SRange); //totally 1040 bytes |
|
90 |
|
91 const TDesC8& CGB18030ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters() |
|
92 { |
|
93 return CnvGb18030_diff_gbk::ReplacementForUnconvertibleUnicodeCharacters(); |
|
94 } |
|
95 |
|
96 _LIT(KLitPanicText, "GB18030"); |
|
97 enum TPanic |
|
98 { |
|
99 EPanicNothingToConvert1=1, |
|
100 EPanicNothingToConvert2, |
|
101 EPanicNothingToConvert3, |
|
102 EPanicNothingToConvert4, |
|
103 EPanicNothingToConvert5, |
|
104 EPanicNothingToConvert6, |
|
105 EPanicOddNumberOfBytes1, |
|
106 EPanicOddNumberOfBytes2, |
|
107 EPanicOddNumberOfBytes3, |
|
108 EPanicOddNumberOfBytes4, |
|
109 EPanicOddNumberOfBytes5, |
|
110 EPanicOddNumberOfBytes6, |
|
111 EPanicBadHighBit1, |
|
112 EPanicBadHighBit2, |
|
113 EPanicBadHighBit3, |
|
114 EPanicBadHighBit4, |
|
115 EPanicBadHighBit5, |
|
116 EPanicBadHighBit6, |
|
117 EPanicBadHighBit7, |
|
118 EPanicBadPointers1, |
|
119 EPanicBadPointers2, |
|
120 EPanicBadPointers3, |
|
121 EPanicBadPointers4, |
|
122 EPanicBadPointers5, |
|
123 EPanicBadPointers6, |
|
124 EPanicBadPointers7, |
|
125 EPanicBadPointers8, |
|
126 EPanicBadPointers9, |
|
127 EPanicBadPointers10, |
|
128 EPanicBadPointers11, |
|
129 EPanicBadPointers12, |
|
130 EPanicBadPointers13, |
|
131 EPanicBadPointers14, |
|
132 EPanicBadPointers15, |
|
133 EPanicBadPointers16, |
|
134 EPanicBadPointers17, |
|
135 EPanicBadPointers18, |
|
136 EPanicBadPointers19, |
|
137 EPanicBadPointers20, |
|
138 EPanicBadPointers21, |
|
139 EPanicBadPointers22, |
|
140 EPanicBadPointers23, |
|
141 EPanicBadPointers24, |
|
142 EPanicBadPointers25, |
|
143 EPanicBadPointers26, |
|
144 EPanicBadPointers27, |
|
145 EPanicBadPointers28, |
|
146 EPanicBadPointers29, |
|
147 EPanicBadPointers30, |
|
148 EPanicBadPointers31, |
|
149 EPanicBadPointers32, |
|
150 EPanicBadPointers33, |
|
151 EPanicBadPointers34, |
|
152 EPanicBadPointers35, |
|
153 EPanicBadPointers36, |
|
154 EPanicBadCalculation1, |
|
155 EPanicBadCalculation2, |
|
156 EPanicNumberOfBytesIsNotMultipleOfThree1, |
|
157 EPanicNumberOfBytesIsNotMultipleOfThree2, |
|
158 EPanicSingleShift2Expected, |
|
159 EPanicSingleShift3Expected, |
|
160 EPanicTooManyBytesOfWorkingMemoryUsed1, |
|
161 EPanicTooManyBytesOfWorkingMemoryUsed2 |
|
162 }; |
|
163 |
|
164 LOCAL_C void Panic(TPanic aPanic) |
|
165 { |
|
166 User::Panic(KLitPanicText, aPanic); |
|
167 } |
|
168 |
|
169 #define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0])) |
|
170 |
|
171 LOCAL_C void Step12DummyConvertFromIntermediateBufferInPlace(TInt, TDes8&, TInt& aNumberOfCharactersThatDroppedOut) |
|
172 { |
|
173 aNumberOfCharactersThatDroppedOut=0; |
|
174 } |
|
175 |
|
176 // Perform the actual conversion (unicode -> gb18030 4byte non-BMP) using formula in this function |
|
177 LOCAL_C void Step3ConvertFromIntermediateBufferInPlace(TInt aStartPositionInDescriptor, TDes8& aDescriptor, TInt& aNumberOfCharactersThatDroppedOut) |
|
178 { |
|
179 aNumberOfCharactersThatDroppedOut = 0; // no drop out, because all GB18030 outside BMP are exactly 4-bytes |
|
180 |
|
181 const TInt descriptorLength=aDescriptor.Length(); |
|
182 TUint8* pVeryFrom = CONST_CAST(TUint8*, aDescriptor.Ptr()); |
|
183 const TUint8* pEnd = pVeryFrom + descriptorLength; |
|
184 TUint8* pFrom = pVeryFrom + aStartPositionInDescriptor; |
|
185 FOREVER |
|
186 { |
|
187 if (pFrom + 4 > pEnd) |
|
188 { |
|
189 __ASSERT_DEBUG(pFrom==pEnd, Panic(EPanicBadPointers25)); |
|
190 break; |
|
191 } |
|
192 TUint characterCode = 0; |
|
193 for (TInt i=0; i<4; i++) |
|
194 { |
|
195 characterCode <<= 8; |
|
196 characterCode += pFrom[i]; |
|
197 } |
|
198 |
|
199 // to gb18030 |
|
200 characterCode -= 0x10000; |
|
201 TUint b4 = characterCode % 10 + 0x30; |
|
202 characterCode /= 10; |
|
203 TUint b3 = characterCode % 126 + 0x81; |
|
204 characterCode /= 126; |
|
205 TUint b2 = characterCode % 10 + 0x30; |
|
206 TUint b1 = characterCode / 10 + 0x90; |
|
207 |
|
208 *pFrom++ = b1; |
|
209 *pFrom++ = b2; |
|
210 *pFrom++ = b3; |
|
211 *pFrom++ = b4; |
|
212 } |
|
213 aDescriptor.SetLength(pFrom-pVeryFrom); |
|
214 } |
|
215 |
|
216 // gb2312-1byte ->unicode (0x00 - 0x7F) |
|
217 LOCAL_C TInt Step0NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor) |
|
218 { |
|
219 const TInt descriptorLength=aDescriptor.Length(); |
|
220 const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1; |
|
221 const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length(); |
|
222 |
|
223 TInt numOfBytes = 0; |
|
224 FOREVER |
|
225 { |
|
226 if (pointerToPreviousByte>=pointerToLastByte) |
|
227 { |
|
228 break; |
|
229 } |
|
230 // byte 1 |
|
231 TUint b1 = pointerToPreviousByte[1]; |
|
232 if (b1 <= 0x7F) |
|
233 { |
|
234 pointerToPreviousByte++; |
|
235 numOfBytes++; |
|
236 } |
|
237 else |
|
238 break; |
|
239 } |
|
240 return numOfBytes; |
|
241 } |
|
242 |
|
243 // gb18030-2byte --> unicode (0x8140 - 0xFE7E, 0x8180 - 0xFEFE) |
|
244 LOCAL_C TInt Step1NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor) |
|
245 { |
|
246 const TInt descriptorLength=aDescriptor.Length(); |
|
247 const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1; |
|
248 const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length(); |
|
249 |
|
250 TInt numOfBytes = 0; |
|
251 FOREVER |
|
252 { |
|
253 if (pointerToPreviousByte>=pointerToLastByte) |
|
254 { |
|
255 break; |
|
256 } |
|
257 // byte 1 |
|
258 TUint b1 = pointerToPreviousByte[1]; |
|
259 if (b1 <= 0x80 || b1 > 0xFE) |
|
260 break; |
|
261 |
|
262 // byte 2 |
|
263 if (pointerToPreviousByte+1 >= pointerToLastByte) |
|
264 break; |
|
265 TUint b2 = pointerToPreviousByte[2]; |
|
266 if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F) // all gb18030 2-byte code |
|
267 { |
|
268 pointerToPreviousByte = pointerToPreviousByte + 2; |
|
269 numOfBytes = numOfBytes + 2; |
|
270 } |
|
271 else if (b2 < 0x30 || b2 > 0x39) |
|
272 { |
|
273 if (numOfBytes <= 0) |
|
274 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
275 else |
|
276 break; |
|
277 } |
|
278 else |
|
279 break; |
|
280 } |
|
281 return numOfBytes; |
|
282 } |
|
283 |
|
284 |
|
285 // gb18030 4-bytes bmp --> unicode (0x81308130 - 0x8439FE39) |
|
286 LOCAL_C TInt Step2NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor) |
|
287 { |
|
288 const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1; |
|
289 const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length(); |
|
290 __ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastByte, Panic(EPanicBadPointers25)); |
|
291 |
|
292 TInt numOfBytes = 0; |
|
293 FOREVER |
|
294 { |
|
295 if (pointerToPreviousByte>=pointerToLastByte) |
|
296 { |
|
297 break; |
|
298 } |
|
299 |
|
300 // byte 1 |
|
301 TUint b1 = pointerToPreviousByte[1]; |
|
302 if ((b1 < 0x81) || (b1 > 0x84)){ |
|
303 break; |
|
304 } |
|
305 |
|
306 // byte 2 |
|
307 if (pointerToPreviousByte+1 >= pointerToLastByte) |
|
308 break; |
|
309 TUint b2 = pointerToPreviousByte[2]; |
|
310 if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F) // all gb18030 2-byte code |
|
311 break; |
|
312 else if (b2 < 0x30 || b2 > 0x39) |
|
313 { |
|
314 if (numOfBytes == 0) |
|
315 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
316 else |
|
317 break; |
|
318 } |
|
319 |
|
320 |
|
321 // byte 3 |
|
322 if (pointerToPreviousByte+2 >= pointerToLastByte) |
|
323 break; |
|
324 TUint b3 = pointerToPreviousByte[3]; |
|
325 if (b3 < 0x81 || b3 > 0xFE) |
|
326 { |
|
327 if (numOfBytes == 0) |
|
328 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
329 else |
|
330 break; |
|
331 } |
|
332 |
|
333 // byte 4 |
|
334 if (pointerToPreviousByte+3 >= pointerToLastByte) |
|
335 break; |
|
336 TUint b4 = pointerToPreviousByte[4]; |
|
337 if (b4 < 0x30 || b4 > 0x39) |
|
338 { |
|
339 if (numOfBytes == 0) |
|
340 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
341 else |
|
342 break; |
|
343 } |
|
344 else |
|
345 { |
|
346 numOfBytes = numOfBytes + 4; |
|
347 pointerToPreviousByte = pointerToPreviousByte+4; |
|
348 } |
|
349 } |
|
350 |
|
351 return numOfBytes; |
|
352 } |
|
353 |
|
354 // gb18030 4-bytes non-bmp --> unicode (0x90308130~0xE339FE39) |
|
355 LOCAL_C TInt Step3NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor) |
|
356 { |
|
357 const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1; |
|
358 const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length(); |
|
359 __ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastByte, Panic(EPanicBadPointers25)); |
|
360 |
|
361 TInt numOfBytes = 0; |
|
362 FOREVER |
|
363 { |
|
364 if (pointerToPreviousByte>=pointerToLastByte) |
|
365 { |
|
366 break; |
|
367 } |
|
368 |
|
369 // byte 1 |
|
370 TUint b1 = pointerToPreviousByte[1]; |
|
371 if (b1 < 0x90 || b1 > 0xE3) |
|
372 break; |
|
373 |
|
374 // byte 2 |
|
375 if (pointerToPreviousByte+1 >= pointerToLastByte) |
|
376 break; |
|
377 TUint b2 = pointerToPreviousByte[2]; |
|
378 if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F) |
|
379 break; |
|
380 else if (b2 < 0x30 || b2 > 0x39) |
|
381 { |
|
382 if (numOfBytes == 0) |
|
383 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
384 else |
|
385 break; |
|
386 } |
|
387 |
|
388 // byte 3 |
|
389 if (pointerToPreviousByte+2 >= pointerToLastByte) |
|
390 break; |
|
391 TUint b3 = pointerToPreviousByte[3]; |
|
392 if (b3 < 0x81 || b3 > 0xFE) |
|
393 { |
|
394 if (numOfBytes == 0) |
|
395 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
396 else |
|
397 break; |
|
398 } |
|
399 |
|
400 // byte 4 |
|
401 if (pointerToPreviousByte+3 >= pointerToLastByte) |
|
402 break; |
|
403 TUint b4 = pointerToPreviousByte[4]; |
|
404 if (b4 < 0x30 || b4 > 0x39) |
|
405 { |
|
406 if (numOfBytes == 0) |
|
407 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
408 else |
|
409 break; |
|
410 } |
|
411 else |
|
412 { |
|
413 numOfBytes = numOfBytes + 4; |
|
414 pointerToPreviousByte = pointerToPreviousByte + 4; |
|
415 } |
|
416 } |
|
417 return numOfBytes; |
|
418 } |
|
419 |
|
420 void Step012DummyConvertToIntermediateBufferInPlace(TDes8&) |
|
421 { |
|
422 } |
|
423 |
|
424 // Perform the actual conversion (gb18030 4byte non-BMP -> unicode) using formula in this function |
|
425 LOCAL_C void Step3ConvertToIntermediateBufferInPlace(TDes8& aDescriptor) |
|
426 { |
|
427 const TInt descriptorLength=aDescriptor.Length(); |
|
428 __ASSERT_DEBUG(descriptorLength%4 == 0, Panic(EPanicNothingToConvert5)); |
|
429 TUint8* pointerToTargetByte=CONST_CAST(TUint8*, aDescriptor.Ptr()); |
|
430 const TUint8* pointerToSourceByte=pointerToTargetByte; |
|
431 const TUint8* const pointerToLastByte=pointerToSourceByte+descriptorLength; |
|
432 |
|
433 FOREVER |
|
434 { |
|
435 if (pointerToLastByte - pointerToSourceByte < 4) |
|
436 break; |
|
437 |
|
438 // conversion |
|
439 TUint8 b1 = pointerToSourceByte[0]; |
|
440 TUint8 b2 = pointerToSourceByte[1]; |
|
441 TUint8 b3 = pointerToSourceByte[2]; |
|
442 TUint8 b4 = pointerToSourceByte[3]; |
|
443 |
|
444 TUint characterCode = 0x10000 + (b1 - 0x90) * 12600 + |
|
445 (b2 - 0x30) * 1260 + |
|
446 (b3 - 0x81) * 10 + |
|
447 (b4 - 0x30); |
|
448 |
|
449 pointerToTargetByte[0] = ((characterCode >> 24) & 0xFF); |
|
450 pointerToTargetByte[1] = ((characterCode >> 16) & 0xFF); |
|
451 pointerToTargetByte[2] = ((characterCode >> 8) & 0xFF); |
|
452 pointerToTargetByte[3] = (characterCode & 0xFF); |
|
453 |
|
454 pointerToSourceByte = pointerToSourceByte + 4; |
|
455 pointerToTargetByte = pointerToTargetByte + 4; |
|
456 } |
|
457 |
|
458 aDescriptor.SetLength(descriptorLength); |
|
459 } |
|
460 |
|
461 |
|
462 // A dummy "direct" mapping table for non-Bmp chars in step 3 |
|
463 // Use 32-bit Unicode value as intermediate coding |
|
464 LOCAL_D const SCnvConversionData::SVariableByteData::SRange step3ForeignVariableByteDataRanges[]= |
|
465 { |
|
466 { |
|
467 0x00, // from 0x10000 |
|
468 0x00, // to 0x10FFFF |
|
469 3, // total 4 bytes |
|
470 0 |
|
471 }, |
|
472 }; |
|
473 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange step3ForeignToUnicodeDataRanges[]= |
|
474 { |
|
475 { |
|
476 0x10000, // from 0x10000 |
|
477 0x10ffff, // to 0x10FFFF |
|
478 SCnvConversionData::SOneDirectionData::SRange::EDirect, |
|
479 0, |
|
480 0, |
|
481 { |
|
482 0 // map from intermediate to unicode with offset = 0 |
|
483 } |
|
484 }, |
|
485 }; |
|
486 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange step3UnicodeToForeignDataRanges[]= |
|
487 { |
|
488 { |
|
489 0x10000, //from 0x10000 |
|
490 0x10FFFF, //to 0x10FFFF |
|
491 SCnvConversionData::SOneDirectionData::SRange::EDirect, |
|
492 4, // output byte count = 4 |
|
493 0, |
|
494 { |
|
495 0 // offset = 0 |
|
496 } |
|
497 }, |
|
498 }; |
|
499 GLDEF_D const SCnvConversionData step3ConversionData= |
|
500 { |
|
501 SCnvConversionData::EFixedBigEndian, |
|
502 { |
|
503 ARRAY_LENGTH(step3ForeignVariableByteDataRanges), |
|
504 step3ForeignVariableByteDataRanges |
|
505 }, |
|
506 { |
|
507 ARRAY_LENGTH(step3ForeignToUnicodeDataRanges), |
|
508 step3ForeignToUnicodeDataRanges |
|
509 }, |
|
510 { |
|
511 ARRAY_LENGTH(step3UnicodeToForeignDataRanges), |
|
512 step3UnicodeToForeignDataRanges |
|
513 }, |
|
514 NULL, |
|
515 NULL |
|
516 }; |
|
517 |
|
518 |
|
519 // An internal mapping table to reslove the conflict introduced in symbian GB2312-80 plug-in. |
|
520 // It will be merged into the gb18030-2byte Conversion Data. |
|
521 // It includes mapping: (0xA1A4 -> 0x00B7, 0xA1AA -> 0x2014, 0xA844 <- 0x2015, 0x8139A739 <- 0x30FB) |
|
522 LOCAL_D const SCnvConversionData::SVariableByteData::SRange gb18030_diff_gb2312ForeignVariableByteDataRanges[]= |
|
523 { |
|
524 { |
|
525 0xA1, //from 0xA1A4 |
|
526 0xA1, //to 0xA1AA |
|
527 1, |
|
528 0 |
|
529 }, |
|
530 }; |
|
531 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_foreignToUnicode_1[]= |
|
532 { |
|
533 { |
|
534 0xA1A4, |
|
535 0x00B7 |
|
536 }, |
|
537 { |
|
538 0xA1AA, |
|
539 0x2014 |
|
540 } |
|
541 }; |
|
542 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange gb18030_diff_gb2312ForeignToUnicodeDataRanges[]= |
|
543 { |
|
544 { |
|
545 0xA1A4, |
|
546 0xA1AA, |
|
547 SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616, |
|
548 0, |
|
549 0, |
|
550 { |
|
551 UData_SKeyedTable1616(keyedTable1616_foreignToUnicode_1) |
|
552 } |
|
553 }, |
|
554 }; |
|
555 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_unicodeToForeign_1[]= |
|
556 { |
|
557 { |
|
558 0x2015, |
|
559 0xA844 |
|
560 } |
|
561 }; |
|
562 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable3232::SEntry keyedTable3232_unicodeToForeign_1[]= |
|
563 { |
|
564 { |
|
565 0x30FB, |
|
566 0x8139A739 |
|
567 } |
|
568 }; |
|
569 |
|
570 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange gb18030_diff_gb2312UnicodeToForeignDataRanges[]= |
|
571 { |
|
572 { |
|
573 0x2015, |
|
574 0x2015, |
|
575 SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616, |
|
576 2, // output byte count = 2 |
|
577 0, |
|
578 { |
|
579 UData_SKeyedTable1616(keyedTable1616_unicodeToForeign_1) |
|
580 } |
|
581 }, |
|
582 { |
|
583 0x30FB, |
|
584 0x30FB, |
|
585 SCnvConversionData::SOneDirectionData::SRange::EKeyedTable3232, |
|
586 4, // output byte count = 4 |
|
587 0, |
|
588 { |
|
589 UData_SKeyedTable3232(keyedTable3232_unicodeToForeign_1) |
|
590 } |
|
591 }, |
|
592 }; |
|
593 GLDEF_D const SCnvConversionData gb18030_diff_gb2312ConversionData= |
|
594 { |
|
595 SCnvConversionData::EFixedBigEndian, |
|
596 { |
|
597 ARRAY_LENGTH(gb18030_diff_gb2312ForeignVariableByteDataRanges), |
|
598 gb18030_diff_gb2312ForeignVariableByteDataRanges |
|
599 }, |
|
600 { |
|
601 ARRAY_LENGTH(gb18030_diff_gb2312ForeignToUnicodeDataRanges), |
|
602 gb18030_diff_gb2312ForeignToUnicodeDataRanges |
|
603 }, |
|
604 { |
|
605 ARRAY_LENGTH(gb18030_diff_gb2312UnicodeToForeignDataRanges), |
|
606 gb18030_diff_gb2312UnicodeToForeignDataRanges |
|
607 }, |
|
608 NULL, |
|
609 NULL |
|
610 }; |
|
611 |
|
612 LOCAL_D const SCnvConversionData::SVariableByteData::SRange foreignVariableByteDataRanges[]= |
|
613 { |
|
614 { |
|
615 0x00, |
|
616 0x7f, |
|
617 0, |
|
618 0 |
|
619 }, |
|
620 { |
|
621 0x80, |
|
622 0xff, |
|
623 1, |
|
624 0 |
|
625 } |
|
626 }; |
|
627 |
|
628 LOCAL_C void SetUpCompleteGb18030_2byteConversionData(SCnvConversionData& aCompleteGb18030_2byteConversionData, TUint8* aWorkingMemory) |
|
629 { |
|
630 const SCnvConversionData& gb2312ConversionData=CnvGb2312::ConversionData(); |
|
631 const SCnvConversionData& gb18030_diff_gbkConversionData=CnvGb18030_diff_gbk::ConversionData(); |
|
632 const SCnvConversionData& gbkConversionData=CnvGbk::ConversionData(); |
|
633 // create a SCnvConversionData that is the combination of gb18030_diff_gb2312ConversionData, gb2312ConversionData, gb18030_diff_gbkConversionData and gbkConversionData; |
|
634 aCompleteGb18030_2byteConversionData.iEndiannessOfForeignCharacters=SCnvConversionData::EFixedBigEndian; |
|
635 aCompleteGb18030_2byteConversionData.iForeignVariableByteData.iNumberOfRanges=ARRAY_LENGTH(foreignVariableByteDataRanges); |
|
636 aCompleteGb18030_2byteConversionData.iForeignVariableByteData.iRangeArray=foreignVariableByteDataRanges; |
|
637 TInt numberOfBytesOfWorkingMemoryUsed=0; |
|
638 |
|
639 // set up the foreign-to-Unicode data |
|
640 const TInt numberOfForeignToUnicodeDataRanges=gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb18030_diff_gbkConversionData.iForeignToUnicodeData.iNumberOfRanges + gbkConversionData.iForeignToUnicodeData.iNumberOfRanges; |
|
641 aCompleteGb18030_2byteConversionData.iForeignToUnicodeData.iNumberOfRanges=numberOfForeignToUnicodeDataRanges; |
|
642 SCnvConversionData::SOneDirectionData::SRange* foreignToUnicodeDataRangeArray=REINTERPRET_CAST(SCnvConversionData::SOneDirectionData::SRange*, aWorkingMemory+numberOfBytesOfWorkingMemoryUsed); |
|
643 numberOfBytesOfWorkingMemoryUsed+=(numberOfForeignToUnicodeDataRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); |
|
644 __ASSERT_ALWAYS(numberOfBytesOfWorkingMemoryUsed<=KNumberOfBytesOfWorkingMemory, Panic(EPanicTooManyBytesOfWorkingMemoryUsed1)); |
|
645 aCompleteGb18030_2byteConversionData.iForeignToUnicodeData.iRangeArray=foreignToUnicodeDataRangeArray; |
|
646 Mem::Copy(foreignToUnicodeDataRangeArray, gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iRangeArray, gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); |
|
647 Mem::Copy(foreignToUnicodeDataRangeArray + gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges, gb2312ConversionData.iForeignToUnicodeData.iRangeArray, gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); |
|
648 Mem::Copy(foreignToUnicodeDataRangeArray + gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges, gb18030_diff_gbkConversionData.iForeignToUnicodeData.iRangeArray, gb18030_diff_gbkConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); |
|
649 Mem::Copy(foreignToUnicodeDataRangeArray + gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb18030_diff_gbkConversionData.iForeignToUnicodeData.iNumberOfRanges, gbkConversionData.iForeignToUnicodeData.iRangeArray, gbkConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); |
|
650 |
|
651 // set up the Unicode-to-foreign data |
|
652 const TInt numberOfUnicodeToForeignDataRanges=gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb18030_diff_gbkConversionData.iUnicodeToForeignData.iNumberOfRanges + gbkConversionData.iUnicodeToForeignData.iNumberOfRanges; |
|
653 aCompleteGb18030_2byteConversionData.iUnicodeToForeignData.iNumberOfRanges=numberOfUnicodeToForeignDataRanges; |
|
654 SCnvConversionData::SOneDirectionData::SRange* unicodeToForeignDataRangeArray=REINTERPRET_CAST(SCnvConversionData::SOneDirectionData::SRange*, aWorkingMemory+numberOfBytesOfWorkingMemoryUsed); |
|
655 numberOfBytesOfWorkingMemoryUsed+=(numberOfUnicodeToForeignDataRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); |
|
656 __ASSERT_ALWAYS(numberOfBytesOfWorkingMemoryUsed<=KNumberOfBytesOfWorkingMemory, Panic(EPanicTooManyBytesOfWorkingMemoryUsed2)); |
|
657 aCompleteGb18030_2byteConversionData.iUnicodeToForeignData.iRangeArray=unicodeToForeignDataRangeArray; |
|
658 Mem::Copy(unicodeToForeignDataRangeArray, gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iRangeArray, gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); |
|
659 Mem::Copy(unicodeToForeignDataRangeArray + gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges, gb2312ConversionData.iUnicodeToForeignData.iRangeArray, gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); |
|
660 Mem::Copy(unicodeToForeignDataRangeArray + gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges, gb18030_diff_gbkConversionData.iUnicodeToForeignData.iRangeArray, gb18030_diff_gbkConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); |
|
661 Mem::Copy(unicodeToForeignDataRangeArray + gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb18030_diff_gbkConversionData.iUnicodeToForeignData.iNumberOfRanges, gbkConversionData.iUnicodeToForeignData.iRangeArray, gbkConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); |
|
662 } |
|
663 |
|
664 |
|
665 TInt CGB18030ConverterImpl::ConvertFromUnicode( |
|
666 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
667 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, |
|
668 TDes8& aForeign, |
|
669 const TDesC16& aUnicode, |
|
670 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) |
|
671 { |
|
672 TFixedArray<CnvUtilities::SCharacterSet, 3> characterSets; |
|
673 |
|
674 // step 1) gb18030-2byte |
|
675 characterSets[0].iConversionData = completeGb18030_2byteConversionData; |
|
676 characterSets[0].iConvertFromIntermediateBufferInPlace = Step12DummyConvertFromIntermediateBufferInPlace; |
|
677 characterSets[0].iEscapeSequence = &KNullDesC8; |
|
678 |
|
679 // step 2) gb18030-4byte BMP |
|
680 characterSets[1].iConversionData = &CnvGb18030_4byte::ConversionData(); |
|
681 characterSets[1].iConvertFromIntermediateBufferInPlace = Step12DummyConvertFromIntermediateBufferInPlace; |
|
682 characterSets[1].iEscapeSequence = &KNullDesC8; |
|
683 |
|
684 // step 3) gb18030-4byte non-BMP |
|
685 characterSets[2].iConversionData = &step3ConversionData; |
|
686 characterSets[2].iConvertFromIntermediateBufferInPlace = Step3ConvertFromIntermediateBufferInPlace; |
|
687 characterSets[2].iEscapeSequence = &KNullDesC8; |
|
688 |
|
689 return CnvUtilities::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters, characterSets.Array()); |
|
690 } |
|
691 |
|
692 |
|
693 TInt CGB18030ConverterImpl::ConvertToUnicode( |
|
694 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
695 TDes16& aUnicode, |
|
696 const TDesC8& aForeign, |
|
697 TInt& /*aState*/, |
|
698 TInt& aNumberOfUnconvertibleCharacters, |
|
699 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) |
|
700 { |
|
701 TFixedArray<CnvUtilities::SMethod, 4> methods; |
|
702 // step 0) gb2312-1byte |
|
703 methods[0].iNumberOfBytesAbleToConvert = Step0NumberOfBytesAbleToConvertToUnicode; |
|
704 methods[0].iConvertToIntermediateBufferInPlace = Step012DummyConvertToIntermediateBufferInPlace; |
|
705 methods[0].iConversionData = &CnvGb2312::ConversionData(); //only use one byte part |
|
706 methods[0].iNumberOfBytesPerCharacter = 1; |
|
707 methods[0].iNumberOfCoreBytesPerCharacter = 1; |
|
708 |
|
709 // step 1) gb18030-2byte |
|
710 methods[1].iNumberOfBytesAbleToConvert = Step1NumberOfBytesAbleToConvertToUnicode; |
|
711 methods[1].iConvertToIntermediateBufferInPlace = Step012DummyConvertToIntermediateBufferInPlace; |
|
712 methods[1].iConversionData = completeGb18030_2byteConversionData; |
|
713 methods[1].iNumberOfBytesPerCharacter = 2; |
|
714 methods[1].iNumberOfCoreBytesPerCharacter = 2; |
|
715 |
|
716 // step 2) gb18030 4-byte BMP |
|
717 methods[2].iNumberOfBytesAbleToConvert = Step2NumberOfBytesAbleToConvertToUnicode; |
|
718 methods[2].iConvertToIntermediateBufferInPlace = Step012DummyConvertToIntermediateBufferInPlace; |
|
719 methods[2].iConversionData = &CnvGb18030_4byte::ConversionData(); |
|
720 methods[2].iNumberOfBytesPerCharacter = 4; |
|
721 methods[2].iNumberOfCoreBytesPerCharacter = 4; |
|
722 |
|
723 // step 3) gb18030 4-byte non-BMP |
|
724 methods[3].iNumberOfBytesAbleToConvert = Step3NumberOfBytesAbleToConvertToUnicode; |
|
725 methods[3].iConvertToIntermediateBufferInPlace = Step3ConvertToIntermediateBufferInPlace; |
|
726 methods[3].iConversionData = &step3ConversionData; |
|
727 methods[3].iNumberOfBytesPerCharacter = 4; |
|
728 methods[3].iNumberOfCoreBytesPerCharacter = 4; |
|
729 |
|
730 return CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter, methods.Array()); |
|
731 } |
|
732 |
|
733 TBool CGB18030ConverterImpl::IsInThisCharacterSetL( |
|
734 TBool& aSetToTrue, |
|
735 TInt& aConfidenceLevel, |
|
736 const TDesC8& aSample) |
|
737 { |
|
738 aSetToTrue = ETrue; |
|
739 return CnvGb2312::IsCharGBBased(aConfidenceLevel, aSample); |
|
740 } |
|
741 |
|
742 CGB18030ConverterImpl* CGB18030ConverterImpl::NewL() |
|
743 { |
|
744 CGB18030ConverterImpl* self = new(ELeave) CGB18030ConverterImpl(); |
|
745 CleanupStack::PushL(self); |
|
746 self->ConstructL(); |
|
747 CleanupStack::Pop(); // self |
|
748 return self; |
|
749 } |
|
750 |
|
751 CGB18030ConverterImpl::~CGB18030ConverterImpl() |
|
752 { |
|
753 if (workingMemory) |
|
754 delete[] workingMemory; |
|
755 if (completeGb18030_2byteConversionData) |
|
756 delete completeGb18030_2byteConversionData; |
|
757 } |
|
758 |
|
759 CGB18030ConverterImpl::CGB18030ConverterImpl() |
|
760 { |
|
761 } |
|
762 |
|
763 TInt CGB18030ConverterImpl::ConstructL() |
|
764 { |
|
765 completeGb18030_2byteConversionData = new (ELeave)SCnvConversionData; |
|
766 CleanupStack::PushL(completeGb18030_2byteConversionData); |
|
767 workingMemory = new (ELeave) TUint8[KNumberOfBytesOfWorkingMemory]; //1040 bytes |
|
768 CleanupStack::Pop(); // completeGb18030_2byteConversionData |
|
769 SetUpCompleteGb18030_2byteConversionData(*completeGb18030_2byteConversionData, workingMemory); |
|
770 return 1; |
|
771 } |
|
772 |
|
773 const TImplementationProxy ImplementationTable[] = |
|
774 { |
|
775 IMPLEMENTATION_PROXY_ENTRY(0x10287038,CGB18030ConverterImpl::NewL) |
|
776 }; |
|
777 |
|
778 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount) |
|
779 { |
|
780 aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy); |
|
781 |
|
782 return ImplementationTable; |
|
783 } |
|
784 |