|
1 /* |
|
2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * HZ is defined in RFC 1843 |
|
16 * |
|
17 */ |
|
18 |
|
19 |
|
20 #include <e32std.h> |
|
21 #include <charconv.h> |
|
22 #include "gb2312.h" |
|
23 #include <ecom/implementationproxy.h> |
|
24 #include <charactersetconverter.h> |
|
25 |
|
26 const TInt KIsInGbBlock=CCnvCharacterSetConverter::KStateDefault+1; |
|
27 #if defined(_DEBUG) |
|
28 const TInt KLengthOfIntermediateBuffer=6; |
|
29 #else |
|
30 const TInt KLengthOfIntermediateBuffer=150; |
|
31 #endif |
|
32 |
|
33 #if defined(_DEBUG) |
|
34 |
|
35 _LIT(KLitPanicText, "HZ"); |
|
36 |
|
37 enum TPanic |
|
38 { |
|
39 EPanicTooManyMatchingIndicesFound=1, |
|
40 EPanicBadNumberOfBytesRequiredToBeAvailable, |
|
41 EPanicBadNumberOfBytesAvailable, |
|
42 EPanicBadNumberOfBytesThatCanBeMadeAvailable, |
|
43 EPanicBadNumberOfBytesMadeAvailable1, |
|
44 EPanicBadNumberOfBytesMadeAvailable2, |
|
45 EPanicBadDescriptorSubDivision1, |
|
46 EPanicBadDescriptorSubDivision2, |
|
47 EPanicBadDescriptorSubDivision3, |
|
48 EPanicBadDescriptorSubDivision4, |
|
49 EPanicBadPointers1, |
|
50 EPanicBadPointers2, |
|
51 EPanicBadPointers3, |
|
52 EPanicBadPointers4, |
|
53 EPanicBadPointers5, |
|
54 EPanicBadPointers6, |
|
55 EPanicBadPointers7, |
|
56 EPanicBadPointers8, |
|
57 EPanicBadPointers9, |
|
58 EPanicBadPointers10, |
|
59 EPanicBadPointers11, |
|
60 EPanicBadPointers12, |
|
61 EPanicStillInGbBlock, |
|
62 EPanicBadState, |
|
63 EPanicSplitBoundaryIsNotAsLateAsPossible1, |
|
64 EPanicSplitBoundaryIsNotAsLateAsPossible2, |
|
65 EPanicBadGb2312Index, |
|
66 EPanicBadHzIndex, |
|
67 EPanicBadTildeSequence, |
|
68 EPanicBadReturnValue1, |
|
69 EPanicBadReturnValue2, |
|
70 EPanicRemainderOfHzHasGotLonger |
|
71 }; |
|
72 |
|
73 LOCAL_C void Panic(TPanic aPanic) |
|
74 { |
|
75 User::Panic(KLitPanicText, aPanic); |
|
76 } |
|
77 |
|
78 #endif |
|
79 |
|
80 class CHZConverterImpl : public CCharacterSetConverterPluginInterface |
|
81 { |
|
82 |
|
83 public: |
|
84 virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters(); |
|
85 |
|
86 virtual TInt ConvertFromUnicode( |
|
87 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
88 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, |
|
89 TDes8& aForeign, |
|
90 const TDesC16& aUnicode, |
|
91 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters); |
|
92 |
|
93 virtual TInt ConvertToUnicode( |
|
94 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
95 TDes16& aUnicode, |
|
96 const TDesC8& aForeign, |
|
97 TInt& aState, |
|
98 TInt& aNumberOfUnconvertibleCharacters, |
|
99 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter); |
|
100 |
|
101 virtual TBool IsInThisCharacterSetL( |
|
102 TBool& aSetToTrue, |
|
103 TInt& aConfidenceLevel, |
|
104 const TDesC8& aSample); |
|
105 |
|
106 static CHZConverterImpl* NewL(); |
|
107 virtual ~CHZConverterImpl(); |
|
108 |
|
109 private: |
|
110 CHZConverterImpl(); |
|
111 |
|
112 }; |
|
113 |
|
114 |
|
115 |
|
116 const TDesC8& CHZConverterImpl::ReplacementForUnconvertibleUnicodeCharacters() |
|
117 { |
|
118 return CnvGb2312::ReplacementForUnconvertibleUnicodeCharacters(); |
|
119 } |
|
120 |
|
121 LOCAL_C void IncrementNumberOfUnicodeCharactersNotConverted(TInt aLengthOfUnicode, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) // these seemingly haphazard order of these paramters is to match the position of the second and third parameters with the caller |
|
122 { |
|
123 ++aNumberOfUnicodeCharactersNotConverted; |
|
124 const TInt indexOfUnicodeCharacterNowNotConverted=aLengthOfUnicode-aNumberOfUnicodeCharactersNotConverted; |
|
125 #if defined(_DEBUG) |
|
126 TInt numberOfMatchingIndicesFound=0; |
|
127 #endif |
|
128 for (TInt i=aIndicesOfUnconvertibleCharacters.NumberOfIndices()-1; i>=0; --i) // must iterate backwards as items from aIndicesOfUnconvertibleCharacters may be deleted |
|
129 { |
|
130 if (aIndicesOfUnconvertibleCharacters[i]==indexOfUnicodeCharacterNowNotConverted) |
|
131 { |
|
132 aIndicesOfUnconvertibleCharacters.Remove(i); |
|
133 #if defined(_DEBUG) |
|
134 ++numberOfMatchingIndicesFound; |
|
135 #endif |
|
136 } |
|
137 } |
|
138 __ASSERT_DEBUG(numberOfMatchingIndicesFound<=1, Panic(EPanicTooManyMatchingIndicesFound)); |
|
139 } |
|
140 |
|
141 LOCAL_C void MakeAvailable(TInt aNumberOfBytesRequiredToBeAvailable, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TInt aLengthOfUnicode, const TUint8*& aPointerToLastUsedByte, TInt& aNumberOfBytesAvailable, TInt aNumberOfBytesThatCanBeMadeAvailable) // these seemingly haphazard order of these paramters is to match the position of the second to fourth parameters (inclusive) with the caller |
|
142 // makes available as much of aNumberOfBytesRequiredToBeAvailable as it can, even if the final value (i.e. value on returning) of aNumberOfBytesAvailable<aNumberOfBytesRequiredToBeAvailable (i.e. it doesn't initially give up straight away and do nothing if aNumberOfBytesRequiredToBeAvailable>aNumberOfBytesThatCanBeMadeAvailable+aNumberOfBytesAvailable) |
|
143 { |
|
144 __ASSERT_DEBUG(aNumberOfBytesRequiredToBeAvailable>0, Panic(EPanicBadNumberOfBytesRequiredToBeAvailable)); |
|
145 __ASSERT_DEBUG(aNumberOfBytesAvailable>=0, Panic(EPanicBadNumberOfBytesAvailable)); |
|
146 __ASSERT_DEBUG(aNumberOfBytesThatCanBeMadeAvailable>=0, Panic(EPanicBadNumberOfBytesThatCanBeMadeAvailable)); |
|
147 TInt numberOfBytesMadeAvailable=0; |
|
148 FOREVER |
|
149 { |
|
150 if (aNumberOfBytesAvailable>=aNumberOfBytesRequiredToBeAvailable) |
|
151 { |
|
152 break; // no more needs to be done |
|
153 } |
|
154 __ASSERT_DEBUG(numberOfBytesMadeAvailable<=aNumberOfBytesThatCanBeMadeAvailable, Panic(EPanicBadNumberOfBytesMadeAvailable1)); |
|
155 if (numberOfBytesMadeAvailable>=aNumberOfBytesThatCanBeMadeAvailable) |
|
156 { |
|
157 break; // give up - no more can be done |
|
158 } |
|
159 const TInt numberOfBytesInCharacter=(*aPointerToLastUsedByte&0x80)? 2: 1; |
|
160 aPointerToLastUsedByte-=numberOfBytesInCharacter; |
|
161 aNumberOfBytesAvailable+=numberOfBytesInCharacter; |
|
162 numberOfBytesMadeAvailable+=numberOfBytesInCharacter; |
|
163 IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters); |
|
164 } |
|
165 __ASSERT_DEBUG(numberOfBytesMadeAvailable<=aNumberOfBytesThatCanBeMadeAvailable, Panic(EPanicBadNumberOfBytesMadeAvailable2)); |
|
166 } |
|
167 |
|
168 LOCAL_C void ConvertFromGb2312ToHzInPlace(TDes8& aDescriptor, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TInt aLengthOfUnicode) |
|
169 { |
|
170 // it is legal for aDescriptor to be of length 0 |
|
171 const TInt originalLengthOfDescriptor=aDescriptor.Length(); |
|
172 if (originalLengthOfDescriptor>0) |
|
173 { |
|
174 TInt numberOfBytesAvailable=aDescriptor.MaxLength()-originalLengthOfDescriptor; |
|
175 TUint8* pointerToPreviousByte=CONST_CAST(TUint8*, aDescriptor.Ptr()-1); |
|
176 const TUint8* pointerToLastUsedByte=pointerToPreviousByte+originalLengthOfDescriptor; |
|
177 TBool isInGbBlock=EFalse; |
|
178 FOREVER |
|
179 { |
|
180 __ASSERT_DEBUG((pointerToLastUsedByte-(aDescriptor.Ptr()-1))+numberOfBytesAvailable==aDescriptor.MaxLength(), Panic(EPanicBadDescriptorSubDivision1)); |
|
181 __ASSERT_DEBUG(pointerToPreviousByte<pointerToLastUsedByte, Panic(EPanicBadPointers1)); |
|
182 const TUint currentByte=*(pointerToPreviousByte+1); |
|
183 if (currentByte&0x80) |
|
184 { |
|
185 if (!isInGbBlock) |
|
186 { |
|
187 MakeAvailable(4, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, (pointerToLastUsedByte-pointerToPreviousByte)-2); // what's passed into the last parameter is not a typo - we do not want the two-byte character currently pointed to by (pointerToPreviousByte+1) to be made available |
|
188 if (numberOfBytesAvailable<4) // 4 bytes are required for the "~{" "~}" escape sequences (thus ensuring that at least a single double-byte character can be put into the GB-block) |
|
189 { |
|
190 break; |
|
191 } |
|
192 isInGbBlock=ETrue; |
|
193 Mem::Copy(pointerToPreviousByte+3, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte); |
|
194 ++pointerToPreviousByte; |
|
195 *pointerToPreviousByte='~'; |
|
196 ++pointerToPreviousByte; |
|
197 *pointerToPreviousByte='{'; |
|
198 numberOfBytesAvailable-=2; |
|
199 pointerToLastUsedByte+=2; |
|
200 } |
|
201 ++pointerToPreviousByte; |
|
202 *pointerToPreviousByte&=~0x80; |
|
203 __ASSERT_DEBUG(pointerToPreviousByte<pointerToLastUsedByte, Panic(EPanicBadPointers2)); |
|
204 ++pointerToPreviousByte; |
|
205 *pointerToPreviousByte&=~0x80; |
|
206 } |
|
207 else |
|
208 { |
|
209 if (isInGbBlock) |
|
210 { |
|
211 closeGbBlock: |
|
212 isInGbBlock=EFalse; |
|
213 MakeAvailable(2, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, pointerToLastUsedByte-pointerToPreviousByte); |
|
214 if (numberOfBytesAvailable<2) // 2 bytes are required for the "~}" escape sequence |
|
215 { |
|
216 IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters); |
|
217 *(pointerToPreviousByte-1)='~'; |
|
218 *pointerToPreviousByte='}'; |
|
219 break; |
|
220 } |
|
221 Mem::Copy(pointerToPreviousByte+3, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte); |
|
222 ++pointerToPreviousByte; |
|
223 *pointerToPreviousByte='~'; |
|
224 ++pointerToPreviousByte; |
|
225 *pointerToPreviousByte='}'; |
|
226 numberOfBytesAvailable-=2; |
|
227 pointerToLastUsedByte+=2; |
|
228 __ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers3)); |
|
229 if (pointerToPreviousByte>=pointerToLastUsedByte) |
|
230 { |
|
231 break; |
|
232 } |
|
233 } |
|
234 if (currentByte=='~') |
|
235 { |
|
236 MakeAvailable(1, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, (pointerToLastUsedByte-pointerToPreviousByte)-1); // what's passed into the last parameter is not a typo - we do not want the "~" currently pointed to by (pointerToPreviousByte+1) to be made available |
|
237 if (numberOfBytesAvailable<1) // 1 byte is required for the extra "~" character |
|
238 { |
|
239 break; |
|
240 } |
|
241 Mem::Copy(pointerToPreviousByte+2, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte); |
|
242 ++pointerToPreviousByte; |
|
243 *pointerToPreviousByte='~'; |
|
244 numberOfBytesAvailable-=1; |
|
245 pointerToLastUsedByte+=1; |
|
246 } |
|
247 ++pointerToPreviousByte; |
|
248 } |
|
249 __ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers4)); |
|
250 if (pointerToPreviousByte>=pointerToLastUsedByte) |
|
251 { |
|
252 if (isInGbBlock) |
|
253 { |
|
254 goto closeGbBlock; // this is to share the code for closing the GB-block |
|
255 } |
|
256 break; |
|
257 } |
|
258 } |
|
259 __ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers5)); |
|
260 if (pointerToPreviousByte<pointerToLastUsedByte) |
|
261 { |
|
262 __ASSERT_DEBUG((pointerToPreviousByte==pointerToLastUsedByte-1) || (pointerToPreviousByte==pointerToLastUsedByte-2), Panic(EPanicBadPointers6)); |
|
263 numberOfBytesAvailable+=(pointerToLastUsedByte-pointerToPreviousByte); |
|
264 pointerToLastUsedByte=pointerToPreviousByte; |
|
265 IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters); |
|
266 } |
|
267 //if it gets out from FOREVER, isInGbBlock could not be ETrue ~~~ so wouldn't need the assert |
|
268 //__ASSERT_DEBUG(!isInGbBlock, Panic(EPanicStillInGbBlock)); |
|
269 aDescriptor.SetLength(aDescriptor.MaxLength()-numberOfBytesAvailable); |
|
270 __ASSERT_DEBUG(aDescriptor.Length()==pointerToLastUsedByte-(aDescriptor.Ptr()-1), Panic(EPanicBadDescriptorSubDivision2)); |
|
271 } |
|
272 } |
|
273 |
|
274 TInt CHZConverterImpl::ConvertFromUnicode( |
|
275 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
276 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, |
|
277 TDes8& aForeign, |
|
278 const TDesC16& aUnicode, |
|
279 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) |
|
280 { |
|
281 TInt returnValue=CCnvCharacterSetConverter::DoConvertFromUnicode(CnvGb2312::ConversionData(), aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters); |
|
282 if (returnValue<0) |
|
283 { |
|
284 return returnValue; // this is an error-code |
|
285 } |
|
286 ConvertFromGb2312ToHzInPlace(aForeign, returnValue, aIndicesOfUnconvertibleCharacters, aUnicode.Length()); |
|
287 return returnValue; |
|
288 } |
|
289 |
|
290 LOCAL_C TInt ConvertFromHzToHomogeneousGb2312(TBuf8<KLengthOfIntermediateBuffer>& aGb2312, TPtrC8& aHzBeingConsumed, TPtrC8& aRemainderOfHz, TInt& aState, TUint& aOutputConversionFlags) |
|
291 { |
|
292 // this function panics if aRemainderOfHz is of length 0 |
|
293 TUint8* pointerToPreviousGb2312Byte=CONST_CAST(TUint8*, aGb2312.Ptr()-1); |
|
294 const TUint8* pointerToCurrentHzByte=aRemainderOfHz.Ptr(); |
|
295 const TUint8* const pointerToLastHzByte=pointerToCurrentHzByte+(aRemainderOfHz.Length()-1); |
|
296 const TUint8* const pointerToLastHzByteToConvertThisTime=Min(pointerToLastHzByte, pointerToCurrentHzByte+(KLengthOfIntermediateBuffer-1)); |
|
297 FOREVER |
|
298 { |
|
299 const TUint currentHzByte=*pointerToCurrentHzByte; |
|
300 if (currentHzByte=='~') |
|
301 { |
|
302 __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers7)); |
|
303 if (pointerToCurrentHzByte>=pointerToLastHzByte) |
|
304 { |
|
305 aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated; |
|
306 --pointerToCurrentHzByte; |
|
307 break; |
|
308 } |
|
309 ++pointerToCurrentHzByte; |
|
310 const TUint nextHzByte=*pointerToCurrentHzByte; |
|
311 switch (nextHzByte) |
|
312 { |
|
313 case '{': |
|
314 if (aState==KIsInGbBlock) |
|
315 { |
|
316 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
317 } |
|
318 aState=KIsInGbBlock; |
|
319 break; |
|
320 case '}': |
|
321 if (aState==CCnvCharacterSetConverter::KStateDefault) |
|
322 { |
|
323 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
324 } |
|
325 aState=CCnvCharacterSetConverter::KStateDefault; |
|
326 break; |
|
327 case '~': |
|
328 ++pointerToPreviousGb2312Byte; |
|
329 *pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte); |
|
330 break; |
|
331 case 0x0a: |
|
332 break; |
|
333 default: |
|
334 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
335 } |
|
336 } |
|
337 else |
|
338 { |
|
339 __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers8)); |
|
340 if (pointerToCurrentHzByte>pointerToLastHzByteToConvertThisTime) |
|
341 { |
|
342 --pointerToCurrentHzByte; |
|
343 break; |
|
344 } |
|
345 if (aState==CCnvCharacterSetConverter::KStateDefault) |
|
346 { |
|
347 ++pointerToPreviousGb2312Byte; |
|
348 *pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte); |
|
349 } |
|
350 else |
|
351 { |
|
352 __ASSERT_DEBUG(aState==KIsInGbBlock, Panic(EPanicBadState)); |
|
353 __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByteToConvertThisTime, Panic(EPanicBadPointers9)); |
|
354 if (pointerToCurrentHzByte>=pointerToLastHzByteToConvertThisTime) |
|
355 { |
|
356 aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated; |
|
357 --pointerToCurrentHzByte; |
|
358 break; |
|
359 } |
|
360 ++pointerToCurrentHzByte; |
|
361 ++pointerToPreviousGb2312Byte; |
|
362 *pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte|0x80); |
|
363 ++pointerToPreviousGb2312Byte; |
|
364 *pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, *pointerToCurrentHzByte|0x80); |
|
365 } |
|
366 } |
|
367 __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers10)); |
|
368 if (pointerToCurrentHzByte>=pointerToLastHzByte) |
|
369 { |
|
370 break; |
|
371 } |
|
372 ++pointerToCurrentHzByte; |
|
373 } |
|
374 aGb2312.SetLength((pointerToPreviousGb2312Byte+1)-aGb2312.Ptr()); |
|
375 const TInt numberOfHzBytesBeingConsumed=(pointerToCurrentHzByte+1)-aRemainderOfHz.Ptr(); |
|
376 aHzBeingConsumed.Set(aRemainderOfHz.Left(numberOfHzBytesBeingConsumed)); |
|
377 aRemainderOfHz.Set(aRemainderOfHz.Mid(numberOfHzBytesBeingConsumed)); |
|
378 #if defined(_DEBUG) |
|
379 // AAA: check that if the split occurs on a boundary between some one-byte and some two-byte text, then aState corresponds to the state *after* the split (the code marked "BBB" relies on this) |
|
380 if (aRemainderOfHz.Length()>=2) |
|
381 { |
|
382 __ASSERT_DEBUG(aRemainderOfHz.Left(2)!=_L8("~{"), Panic(EPanicSplitBoundaryIsNotAsLateAsPossible1)); |
|
383 __ASSERT_DEBUG(aRemainderOfHz.Left(2)!=_L8("~}"), Panic(EPanicSplitBoundaryIsNotAsLateAsPossible2)); |
|
384 } |
|
385 #endif |
|
386 return 0; |
|
387 } |
|
388 |
|
389 LOCAL_C TInt Gb2312IndexToHzIndex(const TDesC8& aHz, TInt aGb2312Index, TBool aReturnMaximalHzIndex) |
|
390 { |
|
391 // this function panics if aHz is of length 0 |
|
392 // aHz may start in either KIsInGbBlock or CCnvCharacterSetConverter::KStateDefault state, but it must *not* have any truncated sequences (i.e. "tilde <something>" sequence that is not complete, or part of a 2-byte character sequence) at either its start or its end |
|
393 __ASSERT_DEBUG(aGb2312Index>=0, Panic(EPanicBadGb2312Index)); |
|
394 TInt hzIndex=0; |
|
395 TInt offsetFromGb2312IndexToHzIndex=0; |
|
396 const TUint8* const pointerToFirstHzByte=aHz.Ptr(); |
|
397 const TUint8* pointerToCurrentHzByte=pointerToFirstHzByte; |
|
398 const TUint8* const pointerToLastHzByte=pointerToFirstHzByte+(aHz.Length()-1); |
|
399 FOREVER |
|
400 { |
|
401 const TInt newHzIndex=pointerToCurrentHzByte-pointerToFirstHzByte; |
|
402 const TInt candidateHzIndex=aGb2312Index+offsetFromGb2312IndexToHzIndex; |
|
403 __ASSERT_DEBUG(hzIndex<=candidateHzIndex, Panic(EPanicBadHzIndex)); |
|
404 if (aReturnMaximalHzIndex? (newHzIndex>candidateHzIndex): (hzIndex>=candidateHzIndex)) |
|
405 { |
|
406 break; |
|
407 } |
|
408 hzIndex=newHzIndex; |
|
409 if (*pointerToCurrentHzByte=='~') |
|
410 { |
|
411 __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers11)); |
|
412 if (pointerToCurrentHzByte>=pointerToLastHzByte) |
|
413 { |
|
414 break; |
|
415 } |
|
416 ++pointerToCurrentHzByte; |
|
417 const TUint currentHzByte=*pointerToCurrentHzByte; |
|
418 if (currentHzByte=='~') |
|
419 { |
|
420 ++offsetFromGb2312IndexToHzIndex; |
|
421 } |
|
422 else |
|
423 { |
|
424 __ASSERT_DEBUG((currentHzByte=='{') || (currentHzByte=='}') || (currentHzByte==0x0a), Panic(EPanicBadTildeSequence)); |
|
425 offsetFromGb2312IndexToHzIndex+=2; |
|
426 } |
|
427 } |
|
428 __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers12)); |
|
429 if (pointerToCurrentHzByte>=pointerToLastHzByte) |
|
430 { |
|
431 break; |
|
432 } |
|
433 ++pointerToCurrentHzByte; |
|
434 } |
|
435 return hzIndex; |
|
436 } |
|
437 |
|
438 TInt CHZConverterImpl::ConvertToUnicode( |
|
439 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
440 TDes16& aUnicode, |
|
441 const TDesC8& aForeign, |
|
442 TInt& aState, |
|
443 TInt& aNumberOfUnconvertibleCharacters, |
|
444 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) |
|
445 { |
|
446 aUnicode.SetLength(0); |
|
447 TPtrC8 remainderOfHz(aForeign); |
|
448 TInt numberOfHzBytesConsumed=0; |
|
449 TUint outputConversionFlags=0; |
|
450 TUint inputConversionFlags=CCnvCharacterSetConverter::EInputConversionFlagAppend; |
|
451 const SCnvConversionData& gb2312ConversionData=CnvGb2312::ConversionData(); |
|
452 FOREVER |
|
453 { |
|
454 __ASSERT_DEBUG(numberOfHzBytesConsumed+remainderOfHz.Length()==aForeign.Length(), Panic(EPanicBadDescriptorSubDivision3)); |
|
455 #if defined(_DEBUG) |
|
456 const TInt oldLengthOfRemainderOfHz=remainderOfHz.Length(); |
|
457 #endif |
|
458 TBuf8<KLengthOfIntermediateBuffer> gb2312; |
|
459 TPtrC8 hzBeingConsumed; |
|
460 const TInt returnValue1=ConvertFromHzToHomogeneousGb2312(gb2312, hzBeingConsumed, remainderOfHz, aState, outputConversionFlags); |
|
461 if (returnValue1<0) |
|
462 { |
|
463 return returnValue1; // this is an error-code |
|
464 } |
|
465 __ASSERT_DEBUG(returnValue1==0, Panic(EPanicBadReturnValue1)); |
|
466 __ASSERT_DEBUG(hzBeingConsumed.Length()+remainderOfHz.Length()==oldLengthOfRemainderOfHz, Panic(EPanicRemainderOfHzHasGotLonger)); |
|
467 if (hzBeingConsumed.Length()==0) |
|
468 { |
|
469 break; |
|
470 } |
|
471 TInt numberOfUnconvertibleCharacters; |
|
472 TInt indexOfFirstByteOfFirstUnconvertibleCharacter; |
|
473 const TInt returnValue2=CCnvCharacterSetConverter::DoConvertToUnicode(gb2312ConversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, gb2312, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, outputConversionFlags, inputConversionFlags); |
|
474 if (returnValue2<0) |
|
475 { |
|
476 return returnValue2; // this is an error-code |
|
477 } |
|
478 if (numberOfUnconvertibleCharacters>0) |
|
479 { |
|
480 if (aNumberOfUnconvertibleCharacters==0) |
|
481 { |
|
482 aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfHzBytesConsumed+Gb2312IndexToHzIndex(hzBeingConsumed, indexOfFirstByteOfFirstUnconvertibleCharacter, EFalse); |
|
483 } |
|
484 aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters; |
|
485 } |
|
486 if (returnValue2>0) |
|
487 { |
|
488 const TInt numberOfGb2312BytesConverted=gb2312.Length()-returnValue2; |
|
489 __ASSERT_DEBUG(numberOfGb2312BytesConverted>=0, Panic(EPanicBadReturnValue2)); |
|
490 // don't call gb2312.SetLength(numberOfGb2312BytesConverted) as we want to access gb2312[numberOfGb2312BytesConverted] - in any case, gb2312's length is never going to be used again |
|
491 // don't bother re-setting remainderOfHz as it won't be used again |
|
492 numberOfHzBytesConsumed+=Gb2312IndexToHzIndex(hzBeingConsumed, numberOfGb2312BytesConverted, ETrue); |
|
493 aState=(gb2312[numberOfGb2312BytesConverted]&0x80)? KIsInGbBlock: CCnvCharacterSetConverter::KStateDefault; // BBB: if the split (between the text that was converted and the text that wasn't converted) occurs on a boundary between some one-byte and some two-byte text, then aState corresponds to the state *after* the split (the code marked "AAA" checks this) - this means that we set aState according to gb2312[numberOfGb2312BytesConverted] rather than gb2312[numberOfGb2312BytesConverted-1] |
|
494 break; |
|
495 } |
|
496 numberOfHzBytesConsumed+=hzBeingConsumed.Length(); |
|
497 remainderOfHz.Set(aForeign.Mid(numberOfHzBytesConsumed)); |
|
498 __ASSERT_DEBUG(numberOfHzBytesConsumed+remainderOfHz.Length()==aForeign.Length(), Panic(EPanicBadDescriptorSubDivision4)); |
|
499 if (remainderOfHz.Length()==0) |
|
500 { |
|
501 break; |
|
502 } |
|
503 if (numberOfHzBytesConsumed>0) |
|
504 { |
|
505 inputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable; |
|
506 } |
|
507 } |
|
508 // N.B. remainderOfHz is in an undefined state by this point |
|
509 if ((numberOfHzBytesConsumed==0) && (outputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated)) |
|
510 { |
|
511 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
512 } |
|
513 return aForeign.Length()-numberOfHzBytesConsumed; |
|
514 } |
|
515 |
|
516 TBool CHZConverterImpl::IsInThisCharacterSetL( |
|
517 TBool& aSetToTrue, |
|
518 TInt& aConfidenceLevel, |
|
519 const TDesC8& aSample) |
|
520 { |
|
521 aSetToTrue=ETrue; |
|
522 TInt sampleLength = aSample.Length(); |
|
523 TInt pairOfTilde=0; |
|
524 TInt occrenceOfNonHz=0; |
|
525 aConfidenceLevel = 50; |
|
526 // Hz encoding uses escape sequences... |
|
527 for (TInt i = 0; i < sampleLength; ++i) |
|
528 { |
|
529 if (aSample[i]>0x7e) |
|
530 occrenceOfNonHz++; |
|
531 if (aSample[i]==0x7e) |
|
532 { |
|
533 TInt increment1 = i+1; |
|
534 if (increment1 >= sampleLength) |
|
535 break; |
|
536 if ((aSample[increment1] == 0x7b)||(aSample[increment1] == 0x7d)||(aSample[increment1] == 0x7e)) |
|
537 { |
|
538 pairOfTilde++; |
|
539 i++; |
|
540 } |
|
541 } |
|
542 }//for |
|
543 if (sampleLength) |
|
544 { |
|
545 TInt occurrenceOftilde =2*pairOfTilde*100/sampleLength; |
|
546 aConfidenceLevel=aConfidenceLevel-Max(0,(4-occurrenceOftilde)); |
|
547 aConfidenceLevel += occurrenceOftilde; |
|
548 aConfidenceLevel -= ((occrenceOfNonHz*100)/sampleLength); |
|
549 } |
|
550 return ETrue; |
|
551 } |
|
552 |
|
553 CHZConverterImpl* CHZConverterImpl::NewL() |
|
554 { |
|
555 CHZConverterImpl* self = new(ELeave) CHZConverterImpl(); |
|
556 return self; |
|
557 } |
|
558 |
|
559 CHZConverterImpl::~CHZConverterImpl() |
|
560 { |
|
561 } |
|
562 |
|
563 CHZConverterImpl::CHZConverterImpl() |
|
564 { |
|
565 } |
|
566 |
|
567 const TImplementationProxy ImplementationTable[] = |
|
568 { |
|
569 IMPLEMENTATION_PROXY_ENTRY(0x10006065, CHZConverterImpl::NewL) |
|
570 }; |
|
571 |
|
572 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount) |
|
573 { |
|
574 aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy); |
|
575 |
|
576 return ImplementationTable; |
|
577 } |