|
1 /* |
|
2 * Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 #include <e32std.h> |
|
20 #include <e32base.h> |
|
21 #include <charconv.h> |
|
22 #include <convutils.h> |
|
23 |
|
24 const TInt KNoPreviousCharacterSet=-1; |
|
25 const TInt KDefaultCharacterSet = 0; |
|
26 const TUint KControlCharacterEscape=0x1b; |
|
27 |
|
28 #if defined(_DEBUG) |
|
29 //It will cause performance problem with small KMaximumLengthOfIntermediateBuffer. |
|
30 //Please use release version to test performance cases. |
|
31 const TInt KMaximumLengthOfIntermediateBuffer=5; |
|
32 #else |
|
33 const TInt KMaximumLengthOfIntermediateBuffer=150; |
|
34 #endif |
|
35 |
|
36 struct SCnvConversionData; |
|
37 |
|
38 _LIT(KLitPanicText, "CONVUTILS"); |
|
39 |
|
40 enum TPanic |
|
41 { |
|
42 EPanicBadInputConversionFlags1=1, |
|
43 EPanicBadInputConversionFlags2, |
|
44 EPanicBadInputConversionFlags3, |
|
45 EPanicBadNumberOfUnicodeElementsConsumed, |
|
46 EPanicAppendFlagViolated, |
|
47 EPanicBadNumberOfUnicodeCharactersConverted, |
|
48 EPanicBadNumberOfCharactersThatDroppedOut, |
|
49 EPanicLoopCounterOverRun1, |
|
50 EPanicLoopCounterOverRun2, |
|
51 EPanicDescriptorNotWholeNumberOfCharacters1, |
|
52 EPanicDescriptorNotWholeNumberOfCharacters2, |
|
53 EPanicDescriptorNotWholeNumberOfCharacters3, |
|
54 EPanicDescriptorNotWholeNumberOfCharacters4, |
|
55 EPanicBadStartOfNextEscapeSequence, |
|
56 EPanicInconsistentNumberOfForeignBytesRemaining, |
|
57 EPanicBadLengthOfRunToConvert1, |
|
58 EPanicBadLengthOfRunToConvert2, |
|
59 EPanicBadMethodPointer, |
|
60 EPanicBadMethodData1, |
|
61 EPanicBadMethodData2, |
|
62 EPanicBadMethodData3, |
|
63 EPanicBadMethodData4, |
|
64 EPanicBadNumberOfCharacterSets, |
|
65 EPanicBadConversionDataPointer1, |
|
66 EPanicBadConversionDataPointer2, |
|
67 EPanicBadConversionDataPointer3, |
|
68 EPanicBadFunctionPointer1, |
|
69 EPanicBadFunctionPointer2, |
|
70 EPanicBadFunctionPointer3, |
|
71 EPanicBadEscapeSequencePointer1, |
|
72 EPanicBadEscapeSequencePointer2, |
|
73 EPanicBadNumberOfStates, |
|
74 EPanicBadEscapeSequenceStart, |
|
75 EPanicBadNumberOfMethods, |
|
76 EPanicBadSurrogatePair1, |
|
77 EPanicBadSurrogatePair2, |
|
78 EPanicBadRemainderOfForeign, |
|
79 EPanicOutputDescriptorTooShortEvenToHoldEscapeSequenceToDefaultCharacterSet |
|
80 }; |
|
81 |
|
82 LOCAL_C void Panic(TPanic aPanic) |
|
83 { |
|
84 User::Panic(KLitPanicText, aPanic); |
|
85 } |
|
86 |
|
87 /** Converts Unicode text into a complex foreign character set encoding. This |
|
88 is an encoding which cannot be converted simply by calling |
|
89 CCnvCharacterSetConverter::DoConvertFromUnicode(). It may be modal (e.g. JIS) |
|
90 or non-modal (e.g. Shift-JIS). |
|
91 |
|
92 The Unicode text specified in aUnicode is converted using the array of |
|
93 conversion data objects (aArrayOfCharacterSets) provided by the plug-in for |
|
94 the complex character set encoding, and the converted text is returned in |
|
95 aForeign. Any existing contents in aForeign are overwritten. |
|
96 |
|
97 Unlike CCnvCharacterSetConverter::DoConvertFromUnicode(), multiple character |
|
98 sets can be specified. aUnicode is converted using the first character conversion |
|
99 data object in the array. When a character is found which cannot be converted |
|
100 using that data, each character set in the array is tried in turn. If it cannot |
|
101 be converted using any object in the array, the index of the character is |
|
102 appended to aIndicesOfUnconvertibleCharacters and the character is replaced |
|
103 by aReplacementForUnconvertibleUnicodeCharacters. |
|
104 |
|
105 If it can be converted using another object in the array, that object is used |
|
106 to convert all subsequent characters until another unconvertible character |
|
107 is found. |
|
108 |
|
109 @param aDefaultEndiannessOfForeignCharacters The default endian-ness to use |
|
110 when writing the characters in the foreign character set. If an endian-ness |
|
111 for foreign characters is specified in the current conversion data object, |
|
112 then that is used instead and the value of |
|
113 aDefaultEndiannessOfForeignCharacters is ignored. |
|
114 @param aReplacementForUnconvertibleUnicodeCharacters The single character (one |
|
115 or more byte values) which is used to replace unconvertible characters. |
|
116 @param aForeign On return, contains the converted text in the non-Unicode |
|
117 character set. |
|
118 @param aUnicode The source Unicode text to be converted. |
|
119 @param aIndicesOfUnconvertibleCharacters On return, holds an ascending array |
|
120 of the indices of each Unicode character in the source text which could not |
|
121 be converted (because none of the target character sets have an equivalent |
|
122 character). |
|
123 @param aArrayOfCharacterSets Array of character conversion data objects, |
|
124 representing the character sets which comprise a complex character set |
|
125 encoding. These are used in sequence to convert the Unicode text. There must |
|
126 be at least one character set in this array and no character set may have any |
|
127 NULL member data, or a panic occurs. |
|
128 @return The number of unconverted characters left at the end of the input |
|
129 descriptor (e.g. because aForeign was not long enough to hold all the text), |
|
130 or a negative error value, as defined in CCnvCharacterSetConverter::TError. */ |
|
131 EXPORT_C TInt CnvUtilities::ConvertFromUnicode( |
|
132 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
133 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, |
|
134 TDes8& aForeign, |
|
135 const TDesC16& aUnicode, |
|
136 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, |
|
137 const TArray<SCharacterSet>& aArrayOfCharacterSets) |
|
138 { |
|
139 TUint notUsed; |
|
140 return ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, |
|
141 aReplacementForUnconvertibleUnicodeCharacters, |
|
142 aForeign, |
|
143 aUnicode, |
|
144 aIndicesOfUnconvertibleCharacters, |
|
145 aArrayOfCharacterSets, |
|
146 notUsed, |
|
147 0); |
|
148 } |
|
149 |
|
150 /** Converts Unicode text into a complex foreign character set encoding. This is |
|
151 an encoding which cannot be converted simply by a call to |
|
152 CCnvCharacterSetConverter::DoConvertFromUnicode(). It may be modal (e.g. JIS) |
|
153 or non-modal (e.g. Shift-JIS). |
|
154 |
|
155 The Unicode text specified in aUnicode is converted using the array of conversion |
|
156 data objects (aArrayOfCharacterSets) provided by the plug-in for the complex |
|
157 character set encoding and the converted text is returned in aForeign. The |
|
158 function can either append to aForeign or overwrite its contents (if any). |
|
159 |
|
160 Unlike CCnvCharacterSetConverter::DoConvertFromUnicode(), multiple character |
|
161 sets can be specified. aUnicode is converted using the first character conversion |
|
162 data object in the array. When a character is found which cannot be converted |
|
163 using that data, each character set in the array is tried in turn. If it cannot |
|
164 be converted using any object in the array, the index of the character is |
|
165 appended to aIndicesOfUnconvertibleCharacters and the character is replaced |
|
166 by aReplacementForUnconvertibleUnicodeCharacters. |
|
167 |
|
168 If it can be converted using another object in the array, that object is used |
|
169 to convert all subsequent characters until another unconvertible character |
|
170 is found. |
|
171 |
|
172 @param aDefaultEndiannessOfForeignCharacters The default endian-ness to use |
|
173 when writing the characters in the foreign character set. If an endian-ness |
|
174 for foreign characters is specified in the current conversion data object, |
|
175 then that is used instead and the value of |
|
176 aDefaultEndiannessOfForeignCharacters is ignored. |
|
177 @param aReplacementForUnconvertibleUnicodeCharacters The single character (one |
|
178 or more byte values) which is used to replace unconvertible characters. |
|
179 @param aForeign On return, contains the converted text in the non-Unicode |
|
180 character set. This may already contain some text. If it does, and if |
|
181 aInputConversionFlags specifies EInputConversionFlagAppend, then the converted |
|
182 text is appended to this descriptor. |
|
183 @param aUnicode The source Unicode text to be converted. |
|
184 @param aIndicesOfUnconvertibleCharacters On return, holds an ascending array |
|
185 of the indices of each Unicode character in the source text which could not |
|
186 be converted (because none of the target character sets have an equivalent |
|
187 character). |
|
188 @param aArrayOfCharacterSets Array of character set data objects. These are |
|
189 used in sequence to convert the Unicode text. There must be at least one |
|
190 character set in this array and no character set may have any NULL member |
|
191 data, or a panic occurs. |
|
192 @param aOutputConversionFlags If the input descriptor ended in a truncated |
|
193 sequence, e.g. the first half only of a Unicode surrogate pair, this returns |
|
194 with the EOutputConversionFlagInputIsTruncated flag set. |
|
195 @param aInputConversionFlags Specify |
|
196 CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to |
|
197 aForeign. Specify CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable |
|
198 to prevent the function from returning the error-code EErrorIllFormedInput |
|
199 when the input descriptor consists of nothing but a truncated sequence. The |
|
200 CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter |
|
201 flag must not be set, otherwise a panic occurs. |
|
202 @return The number of unconverted characters left at the end of the input descriptor |
|
203 (e.g. because aForeign was not long enough to hold all the text), or a negative |
|
204 error value, as defined in CCnvCharacterSetConverter::TError. */ |
|
205 EXPORT_C TInt CnvUtilities::ConvertFromUnicode( |
|
206 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
207 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, |
|
208 TDes8& aForeign, |
|
209 const TDesC16& aUnicode, |
|
210 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, |
|
211 const TArray<SCharacterSet>& aArrayOfCharacterSets, |
|
212 TUint& aOutputConversionFlags, |
|
213 TUint aInputConversionFlags) |
|
214 { |
|
215 __ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags1)); |
|
216 CheckArrayOfCharacterSets(aArrayOfCharacterSets); |
|
217 aOutputConversionFlags=0; |
|
218 TUint internalInputConversionFlags=aInputConversionFlags; |
|
219 if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend) |
|
220 { |
|
221 aForeign.SetLength(0); |
|
222 internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend; |
|
223 } |
|
224 if (aUnicode.Length()==0) |
|
225 { |
|
226 return 0; |
|
227 } |
|
228 if (aForeign.MaxLength()==aForeign.Length()) // relies on the fact that aForeign's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set |
|
229 { |
|
230 return aUnicode.Length(); |
|
231 } |
|
232 TDes8* foreign=&aForeign; |
|
233 TPtr8 dummyForeign(NULL, 0, 0); |
|
234 if (aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagMustEndInDefaultCharacterSet) |
|
235 { |
|
236 TInt dummyMaximumLength = |
|
237 aForeign.MaxLength() - aArrayOfCharacterSets[KDefaultCharacterSet].iEscapeSequence->Length(); |
|
238 __ASSERT_ALWAYS(dummyMaximumLength >= 0, |
|
239 Panic(EPanicOutputDescriptorTooShortEvenToHoldEscapeSequenceToDefaultCharacterSet)); |
|
240 dummyForeign.Set(const_cast <TUint8*> (aForeign.Ptr()), |
|
241 aForeign.Length(), |
|
242 dummyMaximumLength); |
|
243 foreign=&dummyForeign; |
|
244 } |
|
245 const TInt numberOfCharacterSets=aArrayOfCharacterSets.Count(); |
|
246 TInt numberOfUnicodeElementsConsumed=0; |
|
247 internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter; // this is not just an optimization - it ensures that "foreign" doesn't get filled up too much each time CCnvCharacterSetConverter::DoConvertFromUnicode is called |
|
248 TInt previousCharacterSet = aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAssumeStartInDefaultCharacterSet? |
|
249 KDefaultCharacterSet : KNoPreviousCharacterSet; |
|
250 FOREVER |
|
251 { |
|
252 for (TInt presentCharacterSet=KDefaultCharacterSet;;) |
|
253 { |
|
254 __ASSERT_DEBUG(numberOfUnicodeElementsConsumed<=aUnicode.Length(), Panic(EPanicBadNumberOfUnicodeElementsConsumed)); |
|
255 if (numberOfUnicodeElementsConsumed>=aUnicode.Length()) |
|
256 { |
|
257 goto end; |
|
258 } |
|
259 const SCharacterSet& characterSet=aArrayOfCharacterSets[presentCharacterSet]; |
|
260 const TInt oldNumberOfBytesInForeign=foreign->Length(); |
|
261 if (numberOfUnicodeElementsConsumed>0) |
|
262 { |
|
263 internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable; |
|
264 } |
|
265 CCnvCharacterSetConverter::TArrayOfAscendingIndices indicesOfUnconvertibleCharacters; |
|
266 const TInt returnValue=CCnvCharacterSetConverter::DoConvertFromUnicode(*characterSet.iConversionData, aDefaultEndiannessOfForeignCharacters, KNullDesC8, *foreign, aUnicode.Mid(numberOfUnicodeElementsConsumed), indicesOfUnconvertibleCharacters, aOutputConversionFlags, internalInputConversionFlags); |
|
267 if (returnValue<0) |
|
268 { |
|
269 return returnValue; // this is an error-code |
|
270 } |
|
271 __ASSERT_DEBUG(foreign->Length()>=oldNumberOfBytesInForeign, Panic(EPanicAppendFlagViolated)); |
|
272 TInt indexOfFirstUnconvertibleCharacter; |
|
273 if (indicesOfUnconvertibleCharacters.NumberOfIndices()==0) |
|
274 { |
|
275 indexOfFirstUnconvertibleCharacter=-1; |
|
276 numberOfUnicodeElementsConsumed=aUnicode.Length()-returnValue; |
|
277 } |
|
278 else |
|
279 { |
|
280 indexOfFirstUnconvertibleCharacter=indicesOfUnconvertibleCharacters[0]; |
|
281 numberOfUnicodeElementsConsumed+=indexOfFirstUnconvertibleCharacter; |
|
282 __ASSERT_DEBUG(numberOfUnicodeElementsConsumed+LengthOfUnicodeCharacter(aUnicode, numberOfUnicodeElementsConsumed)==aUnicode.Length()-returnValue, Panic(EPanicBadNumberOfUnicodeCharactersConverted)); |
|
283 } |
|
284 if (indexOfFirstUnconvertibleCharacter!=0) // if at least one Unicode character at the start of CCnvCharacterSetConverter::DoConvertFromUnicode's input descriptor was convertible... |
|
285 { |
|
286 TBool gotoEnd = EFalse; |
|
287 if (foreign->Length()>oldNumberOfBytesInForeign) |
|
288 { |
|
289 TInt numberOfCharactersThatDroppedOut=0; |
|
290 // Insert an escape sequence if this character set is different from the last one. |
|
291 if (presentCharacterSet != previousCharacterSet) |
|
292 { |
|
293 // Insert escape sequence (if requred) in front of the last encoded run of text. |
|
294 // Note that this may cause some characters to drop out at the end. |
|
295 (*characterSet.iConvertFromIntermediateBufferInPlace)(oldNumberOfBytesInForeign, *foreign, numberOfCharactersThatDroppedOut); |
|
296 if (oldNumberOfBytesInForeign < foreign->Length()) |
|
297 previousCharacterSet = presentCharacterSet; |
|
298 } |
|
299 numberOfUnicodeElementsConsumed-=numberOfCharactersThatDroppedOut; |
|
300 if (numberOfCharactersThatDroppedOut>0 )// if "foreign" has been filled to as much as it will hold... |
|
301 { |
|
302 gotoEnd = ETrue; |
|
303 } |
|
304 } |
|
305 if (indexOfFirstUnconvertibleCharacter<0) // if we've successfully converted up to the end of aUnicode (using *characterSet.iConversionData)... |
|
306 { |
|
307 gotoEnd = ETrue; |
|
308 } |
|
309 if (gotoEnd) |
|
310 { |
|
311 if ( aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagMustEndInDefaultCharacterSet |
|
312 && previousCharacterSet != KDefaultCharacterSet |
|
313 && previousCharacterSet != KNoPreviousCharacterSet) |
|
314 { |
|
315 aForeign.SetLength(foreign->Length()); |
|
316 aForeign.Append(*aArrayOfCharacterSets[KDefaultCharacterSet].iEscapeSequence); |
|
317 foreign=NULL; |
|
318 } |
|
319 goto end; |
|
320 } |
|
321 break; |
|
322 } |
|
323 __ASSERT_DEBUG(presentCharacterSet<numberOfCharacterSets, Panic(EPanicLoopCounterOverRun1)); |
|
324 ++presentCharacterSet; |
|
325 if (presentCharacterSet>=numberOfCharacterSets) |
|
326 { |
|
327 if ((foreign->MaxLength()-foreign->Length()<aReplacementForUnconvertibleUnicodeCharacters.Length()) || |
|
328 (aIndicesOfUnconvertibleCharacters.AppendIndex(numberOfUnicodeElementsConsumed)!=CCnvCharacterSetConverter::TArrayOfAscendingIndices::EAppendSuccessful)) // the tests must be done in this order as AppendIndex must only be called if there is room for aReplacementForUnconvertibleUnicodeCharacters |
|
329 { |
|
330 goto end; |
|
331 } |
|
332 numberOfUnicodeElementsConsumed+=LengthOfUnicodeCharacter(aUnicode, numberOfUnicodeElementsConsumed); |
|
333 foreign->Append(aReplacementForUnconvertibleUnicodeCharacters); |
|
334 break; |
|
335 } |
|
336 } |
|
337 } |
|
338 end: |
|
339 if (foreign!=NULL) |
|
340 { |
|
341 aForeign.SetLength(foreign->Length()); |
|
342 foreign=NULL; |
|
343 } |
|
344 if ((numberOfUnicodeElementsConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable)) |
|
345 { |
|
346 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
347 } |
|
348 return aUnicode.Length()-numberOfUnicodeElementsConsumed; |
|
349 } |
|
350 |
|
351 |
|
352 /** Inserts an escape sequence into the descriptor. |
|
353 |
|
354 This function is provided to help in the implementation of |
|
355 ConvertFromUnicode() for modal character set encodings. |
|
356 Each SCharacterSet object in the array passed to |
|
357 ConvertFromUnicode() must have its |
|
358 iConvertFromIntermediateBufferInPlace member assigned. To |
|
359 do this for a modal character set encoding, implement a function whose |
|
360 signature matches that of FConvertFromIntermediateBufferInPlace |
|
361 and which calls this function, passing all arguments unchanged, and |
|
362 specifying the character set's escape sequence and the number of bytes per |
|
363 character. |
|
364 |
|
365 @param aStartPositionInDescriptor The byte position in aDescriptor at which |
|
366 the escape sequence is inserted. If the character set uses more than one byte |
|
367 per character, this position must be the start of a character, otherwise a |
|
368 panic occurs. |
|
369 @param aDescriptor The descriptor into which the escape sequence is inserted. |
|
370 @param aNumberOfCharactersThatDroppedOut The escape sequence is inserted into |
|
371 the start of aDescriptor and any characters that need to drop out to make |
|
372 room for the escape sequence (because the descriptor's maximum length was |
|
373 not long enough) drop out from the end of the buffer. This parameter indicates |
|
374 the number of characters that needed to drop out. |
|
375 @param aEscapeSequence The escape sequence for the character set. |
|
376 @param aNumberOfBytesPerCharacter The number of bytes per character. */ |
|
377 EXPORT_C void CnvUtilities::ConvertFromIntermediateBufferInPlace( |
|
378 TInt aStartPositionInDescriptor, |
|
379 TDes8& aDescriptor, |
|
380 TInt& aNumberOfCharactersThatDroppedOut, |
|
381 const TDesC8& aEscapeSequence, |
|
382 TInt aNumberOfBytesPerCharacter) |
|
383 { |
|
384 const TInt lengthOfDescriptor=aDescriptor.Length(); |
|
385 __ASSERT_ALWAYS((lengthOfDescriptor-aStartPositionInDescriptor)%aNumberOfBytesPerCharacter==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters1)); |
|
386 aNumberOfCharactersThatDroppedOut=(Max(0, aEscapeSequence.Length()-(aDescriptor.MaxLength()-lengthOfDescriptor))+(aNumberOfBytesPerCharacter-1))/aNumberOfBytesPerCharacter; |
|
387 const TInt lengthOfRunInCharacters=(lengthOfDescriptor-aStartPositionInDescriptor)/aNumberOfBytesPerCharacter; |
|
388 if (aNumberOfCharactersThatDroppedOut>=lengthOfRunInCharacters) // ">=" is correct (rather than ">") as if there's only room for the escape sequence we don't want to have it in the descriptor |
|
389 { |
|
390 aNumberOfCharactersThatDroppedOut=lengthOfRunInCharacters; |
|
391 aDescriptor.SetLength(aStartPositionInDescriptor); |
|
392 } |
|
393 else |
|
394 { |
|
395 aDescriptor.SetLength(lengthOfDescriptor-(aNumberOfCharactersThatDroppedOut*aNumberOfBytesPerCharacter)); |
|
396 aDescriptor.Insert(aStartPositionInDescriptor, aEscapeSequence); |
|
397 } |
|
398 } |
|
399 |
|
400 |
|
401 /** Converts text from a modal foreign character set encoding into Unicode. |
|
402 |
|
403 The non-Unicode text specified in aForeign is converted using |
|
404 the array of character set conversion objects (aArrayOfStates) |
|
405 provided by the plug-in, and the converted text is returned in |
|
406 aUnicode. The function can either append to aUnicode |
|
407 or overwrite its contents (if any), depending on the input conversion flags |
|
408 specified. The first element in aArrayOfStates is taken to be |
|
409 the default mode (i.e. the mode to assume by default if there is no preceding |
|
410 escape sequence). |
|
411 |
|
412 @param aDefaultEndiannessOfForeignCharacters The default endian-ness of the |
|
413 foreign characters. If an endian-ness for foreign characters is specified |
|
414 in the conversion data, then that is used instead and the value of |
|
415 aDefaultEndiannessOfForeignCharacters is ignored. |
|
416 @param aUnicode On return, contains the text converted into Unicode. |
|
417 @param aForeign The non-Unicode source text to be converted. |
|
418 @param aState Used to store a modal character set encoding's current mode across |
|
419 multiple calls to ConvertToUnicode() on the same input descriptor. This argument |
|
420 should be passed the same object as passed to the plug-in's ConvertToUnicode() |
|
421 exported function. |
|
422 @param aNumberOfUnconvertibleCharacters On return, contains the number of |
|
423 characters in aForeign which were not converted. Characters which cannot be |
|
424 converted are output as Unicode replacement characters (0xfffd). |
|
425 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index |
|
426 of the first byte of the first unconvertible character. For instance if the |
|
427 first character in the input descriptor (aForeign) could not be converted, |
|
428 then this parameter is set to the first byte of that character, i.e. zero. |
|
429 A negative value is returned if all the characters were converted. |
|
430 @param aArrayOfStates Array of character set conversion data objects, and their |
|
431 escape sequences ("modes"). There must be one or more modes in this array, |
|
432 none of the modes can have any NULL member data, and each mode's escape sequence |
|
433 must begin with KControlCharacterEscape (0x1b) or a panic occurs. |
|
434 @return The number of unconverted bytes left at the end of the input descriptor, |
|
435 or a negative error value, as defined in TError. */ |
|
436 EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromModalForeign( |
|
437 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
438 TDes16& aUnicode, |
|
439 const TDesC8& aForeign, |
|
440 TInt& aState, |
|
441 TInt& aNumberOfUnconvertibleCharacters, |
|
442 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, |
|
443 const TArray<SState>& aArrayOfStates) |
|
444 { |
|
445 TUint notUsed; |
|
446 return ConvertToUnicodeFromModalForeign(aDefaultEndiannessOfForeignCharacters, |
|
447 aUnicode, |
|
448 aForeign, |
|
449 aState, |
|
450 aNumberOfUnconvertibleCharacters, |
|
451 aIndexOfFirstByteOfFirstUnconvertibleCharacter, |
|
452 aArrayOfStates, |
|
453 notUsed, |
|
454 0); |
|
455 } |
|
456 |
|
457 /** @param aDefaultEndiannessOfForeignCharacters The default endian-ness for |
|
458 the foreign characters. If an endian-ness for foreign characters is specified |
|
459 in the conversion data, then that is used instead and the value of |
|
460 aDefaultEndiannessOfForeignCharacters is ignored. |
|
461 @param aUnicode On return, contains the text converted into Unicode. |
|
462 @param aForeign The non-Unicode source text to be converted. |
|
463 @param aState Used to store a modal character set encoding's current mode |
|
464 across multiple calls to ConvertToUnicode() on the same input descriptor. This |
|
465 argument should be passed the same object as passed to the plug-in's |
|
466 ConvertToUnicode() exported function. |
|
467 @param aNumberOfUnconvertibleCharacters On return, contains the number of |
|
468 characters in aForeign which were not converted. Characters which cannot be |
|
469 converted are output as Unicode replacement characters (0xfffd). |
|
470 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index |
|
471 of the first byte of the first unconvertible character. For instance if the |
|
472 first character in the input descriptor (aForeign) could not be converted, |
|
473 then this parameter is set to the first byte of that character, i.e. zero. |
|
474 A negative value is returned if all the characters were converted. |
|
475 @param aArrayOfStates Array of character set conversion data objects, and their |
|
476 escape sequences. There must be one or more modes in this array, none of the |
|
477 modes can have any NULL member data, and each mode's escape sequence must |
|
478 begin with KControlCharacterEscape (0x1b) or a panic occurs. |
|
479 @param aOutputConversionFlags If the input descriptor ended in a truncated |
|
480 sequence, e.g. a part of a multi-byte character, aOutputConversionFlags |
|
481 returns with the EOutputConversionFlagInputIsTruncated flag set. |
|
482 @param aInputConversionFlags Specify |
|
483 CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to |
|
484 aUnicode. Specify EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable |
|
485 to prevent the function from returning the error-code EErrorIllFormedInput |
|
486 when the input descriptor consists of nothing but a truncated sequence. The |
|
487 CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter |
|
488 flag must not be set, otherwise a panic occurs. |
|
489 @return The number of unconverted bytes left at the end of the input descriptor, |
|
490 or a negative error value, as defined in TError. */ |
|
491 EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromModalForeign( |
|
492 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
493 TDes16& aUnicode, |
|
494 const TDesC8& aForeign, |
|
495 TInt& aState, |
|
496 TInt& aNumberOfUnconvertibleCharacters, |
|
497 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, |
|
498 const TArray<SState>& aArrayOfStates, |
|
499 TUint& aOutputConversionFlags, |
|
500 TUint aInputConversionFlags) |
|
501 { |
|
502 __ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags2)); |
|
503 CheckArrayOfStates(aArrayOfStates); |
|
504 aNumberOfUnconvertibleCharacters=0; |
|
505 aIndexOfFirstByteOfFirstUnconvertibleCharacter=-1; |
|
506 aOutputConversionFlags=0; |
|
507 TUint internalInputConversionFlags=aInputConversionFlags; |
|
508 if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend) |
|
509 { |
|
510 aUnicode.SetLength(0); |
|
511 internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend; |
|
512 } |
|
513 if (aForeign.Length()==0) |
|
514 { |
|
515 return 0; |
|
516 } |
|
517 if (aUnicode.MaxLength()==aUnicode.Length()) // relies on the fact that aUnicode's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set |
|
518 { |
|
519 return aForeign.Length(); |
|
520 } |
|
521 TPtrC8 remainderOfForeign(aForeign); |
|
522 TPtrC8 homogeneousRun; |
|
523 TInt numberOfForeignBytesConsumed=0; |
|
524 const SCnvConversionData* conversionData = NULL; |
|
525 const TInt startOfNextEscapeSequence=aForeign.Locate(KControlCharacterEscape); |
|
526 if (startOfNextEscapeSequence!=0) // if aForeign doesn't start with an escape sequence... |
|
527 { |
|
528 conversionData=(aState!=CCnvCharacterSetConverter::KStateDefault)? REINTERPRET_CAST(const SCnvConversionData*, aState): aArrayOfStates[0].iConversionData; |
|
529 if (startOfNextEscapeSequence==KErrNotFound) |
|
530 { |
|
531 homogeneousRun.Set(remainderOfForeign); |
|
532 remainderOfForeign.Set(NULL, 0); |
|
533 } |
|
534 else |
|
535 { |
|
536 __ASSERT_DEBUG(startOfNextEscapeSequence>0, Panic(EPanicBadStartOfNextEscapeSequence)); |
|
537 homogeneousRun.Set(remainderOfForeign.Left(startOfNextEscapeSequence)); |
|
538 remainderOfForeign.Set(remainderOfForeign.Mid(startOfNextEscapeSequence)); |
|
539 } |
|
540 goto handleHomogeneousRun; |
|
541 } |
|
542 FOREVER |
|
543 { |
|
544 if (!NextHomogeneousForeignRun(conversionData, numberOfForeignBytesConsumed, homogeneousRun, remainderOfForeign, aArrayOfStates, aOutputConversionFlags)) |
|
545 { |
|
546 goto end; |
|
547 } |
|
548 handleHomogeneousRun: |
|
549 if (conversionData==NULL) |
|
550 { |
|
551 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
552 } |
|
553 TInt numberOfUnconvertibleCharacters; |
|
554 TInt indexOfFirstByteOfFirstUnconvertibleCharacter; |
|
555 const TInt returnValue=CCnvCharacterSetConverter::DoConvertToUnicode(*conversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, homogeneousRun, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, aOutputConversionFlags, internalInputConversionFlags); |
|
556 if (returnValue<0) |
|
557 { |
|
558 return returnValue; // this is an error-code |
|
559 } |
|
560 if (numberOfUnconvertibleCharacters>0) |
|
561 { |
|
562 if (aNumberOfUnconvertibleCharacters==0) |
|
563 { |
|
564 aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfForeignBytesConsumed+indexOfFirstByteOfFirstUnconvertibleCharacter; |
|
565 } |
|
566 aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters; |
|
567 } |
|
568 numberOfForeignBytesConsumed+=homogeneousRun.Length(); |
|
569 if (returnValue>0) |
|
570 { |
|
571 numberOfForeignBytesConsumed-=returnValue; |
|
572 goto end; |
|
573 } |
|
574 if (numberOfForeignBytesConsumed>0) |
|
575 { |
|
576 internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable; |
|
577 } |
|
578 __ASSERT_DEBUG(remainderOfForeign==aForeign.Mid(numberOfForeignBytesConsumed), Panic(EPanicInconsistentNumberOfForeignBytesRemaining)); |
|
579 } |
|
580 end: |
|
581 if ((numberOfForeignBytesConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable)) |
|
582 { |
|
583 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
584 } |
|
585 aState=REINTERPRET_CAST(TInt, conversionData); |
|
586 return aForeign.Length()-numberOfForeignBytesConsumed; |
|
587 } |
|
588 |
|
589 |
|
590 /** Converts text from a non-modal complex character set encoding (e.g. |
|
591 Shift-JIS or EUC-JP) into Unicode.The non-Unicode text specified in |
|
592 aForeign is converted using the array of character set |
|
593 conversion methods (aArrayOfMethods) provided by the |
|
594 plug-in, and the converted text is returned in aUnicode. |
|
595 Overwrites the contents, if any, of aUnicode. |
|
596 |
|
597 @param aDefaultEndiannessOfForeignCharacters The default endian-ness of the |
|
598 foreign characters. If an endian-ness for foreign characters is specified |
|
599 in the conversion data, then that is used instead and the value of |
|
600 aDefaultEndiannessOfForeignCharacters is ignored. |
|
601 @param aUnicode On return, contains the text converted into Unicode. |
|
602 @param aForeign The non-Unicode source text to be converted. |
|
603 @param aNumberOfUnconvertibleCharacters On return, contains the number of |
|
604 characters in aForeign which were not converted. Characters which cannot be |
|
605 converted are output as Unicode replacement characters (0xfffd). |
|
606 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index |
|
607 of the first byte of the first unconvertible character. For instance if the |
|
608 first character in the input descriptor (aForeign) could not be converted, |
|
609 then this parameter is set to the first byte of that character, i.e. zero. |
|
610 A negative value is returned if all the characters were converted. |
|
611 @param aArrayOfMethods Array of conversion methods. There must be one or more |
|
612 methods in this array and none of the methods in the array can have any NULL |
|
613 member data or a panic occurs. |
|
614 @return The number of unconverted bytes left at the end of the input descriptor, |
|
615 or a negative error value, as defined in TError. */ |
|
616 EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign( |
|
617 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
618 TDes16& aUnicode, |
|
619 const TDesC8& aForeign, |
|
620 TInt& aNumberOfUnconvertibleCharacters, |
|
621 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, |
|
622 const TArray<SMethod>& aArrayOfMethods) |
|
623 { |
|
624 TUint notUsed; |
|
625 return ConvertToUnicodeFromHeterogeneousForeign( |
|
626 aDefaultEndiannessOfForeignCharacters, |
|
627 aUnicode, |
|
628 aForeign, |
|
629 aNumberOfUnconvertibleCharacters, |
|
630 aIndexOfFirstByteOfFirstUnconvertibleCharacter, |
|
631 aArrayOfMethods, |
|
632 notUsed, |
|
633 0); |
|
634 } |
|
635 |
|
636 /** @param aDefaultEndiannessOfForeignCharacters The default endian-ness for the |
|
637 foreign characters. If an endian-ness for foreign characters is specified |
|
638 in the conversion data, then that is used instead and the value of |
|
639 aDefaultEndiannessOfForeignCharacters is ignored. |
|
640 @param aUnicode On return, contains the text converted into Unicode. |
|
641 @param aForeign The non-Unicode source text to be converted. |
|
642 @param aNumberOfUnconvertibleCharacters On return, contains the number of |
|
643 characters in aForeign which were not converted. Characters which cannot be |
|
644 converted are output as Unicode replacement characters (0xfffd). |
|
645 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index |
|
646 of the first byte of the first unconvertible character. For instance if the |
|
647 first character in the input descriptor (aForeign) could not be converted, |
|
648 then this parameter is set to the first byte of that character, i.e. zero. |
|
649 A negative value is returned if all the characters were converted. |
|
650 @param aArrayOfMethods Array of conversion methods. There must be one or more |
|
651 methods in this array and none of the methods in the array can have any NULL |
|
652 member data or a panic occurs. |
|
653 @param aOutputConversionFlags If the input descriptor ended in a truncated |
|
654 sequence, e.g. a part of a multi-byte character, aOutputConversionFlags |
|
655 returns with the EOutputConversionFlagInputIsTruncated flag set. |
|
656 @param aInputConversionFlags Specify |
|
657 CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to |
|
658 aUnicode. Specify EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable |
|
659 to prevent the function from returning the error-code EErrorIllFormedInput |
|
660 when the input descriptor consists of nothing but a truncated sequence. The |
|
661 CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter |
|
662 flag must not be set, otherwise a panic occurs. |
|
663 @return The number of unconverted bytes left at the end of the input descriptor, |
|
664 or a negative error value, as defined in TError. */ |
|
665 EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign( |
|
666 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
667 TDes16& aUnicode, |
|
668 const TDesC8& aForeign, |
|
669 TInt& aNumberOfUnconvertibleCharacters, |
|
670 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, |
|
671 const TArray<SMethod>& aArrayOfMethods, |
|
672 TUint& aOutputConversionFlags, |
|
673 TUint aInputConversionFlags) |
|
674 { |
|
675 __ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags3)); |
|
676 CheckArrayOfMethods(aArrayOfMethods); |
|
677 aNumberOfUnconvertibleCharacters=0; |
|
678 aIndexOfFirstByteOfFirstUnconvertibleCharacter=-1; |
|
679 aOutputConversionFlags=0; |
|
680 TUint internalInputConversionFlags=aInputConversionFlags; |
|
681 if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend) |
|
682 { |
|
683 aUnicode.SetLength(0); |
|
684 internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend; |
|
685 } |
|
686 if (aForeign.Length()==0) |
|
687 { |
|
688 return 0; |
|
689 } |
|
690 if (aUnicode.MaxLength()==aUnicode.Length()) // relies on the fact that aUnicode's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set |
|
691 { |
|
692 return aForeign.Length(); |
|
693 } |
|
694 const TInt numberOfMethods=aArrayOfMethods.Count(); |
|
695 TPtrC8 remainderOfForeign(aForeign); |
|
696 TInt numberOfForeignBytesConsumed=0; |
|
697 FOREVER |
|
698 { |
|
699 TInt lengthOfRunToConvert=0; |
|
700 const SMethod* method=NULL; |
|
701 for (TInt i=0;;) |
|
702 { |
|
703 method=&aArrayOfMethods[i]; |
|
704 __ASSERT_DEBUG(method!=NULL, Panic(EPanicBadMethodPointer)); |
|
705 lengthOfRunToConvert=(*method->iNumberOfBytesAbleToConvert)(remainderOfForeign); |
|
706 if (lengthOfRunToConvert<0) |
|
707 { |
|
708 return lengthOfRunToConvert; // this is an error-code |
|
709 } |
|
710 if (lengthOfRunToConvert>0) |
|
711 { |
|
712 break; |
|
713 } |
|
714 __ASSERT_DEBUG(i<numberOfMethods, Panic(EPanicLoopCounterOverRun2)); |
|
715 ++i; |
|
716 if (i>=numberOfMethods) |
|
717 { |
|
718 aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated; |
|
719 goto end; |
|
720 } |
|
721 } |
|
722 TBuf8<KMaximumLengthOfIntermediateBuffer> intermediateBuffer; |
|
723 const TInt maximumUsableLengthOfIntermediateBuffer=ReduceToNearestMultipleOf(KMaximumLengthOfIntermediateBuffer, method->iNumberOfBytesPerCharacter); |
|
724 FOREVER |
|
725 { |
|
726 const TInt numberOfForeignBytesConsumedThisTime=Min(lengthOfRunToConvert, maximumUsableLengthOfIntermediateBuffer); |
|
727 intermediateBuffer=remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime); |
|
728 __ASSERT_DEBUG((numberOfForeignBytesConsumedThisTime%method->iNumberOfBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters2)); |
|
729 (*method->iConvertToIntermediateBufferInPlace)(intermediateBuffer); |
|
730 __ASSERT_DEBUG((intermediateBuffer.Length()%method->iNumberOfCoreBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters3)); |
|
731 __ASSERT_DEBUG((intermediateBuffer.Length()/method->iNumberOfCoreBytesPerCharacter)*method->iNumberOfBytesPerCharacter==numberOfForeignBytesConsumedThisTime, Panic(EPanicBadMethodData1)); |
|
732 TInt numberOfUnconvertibleCharacters; |
|
733 TInt indexOfFirstByteOfFirstUnconvertibleCharacter; |
|
734 const TInt returnValue=CCnvCharacterSetConverter::DoConvertToUnicode(*method->iConversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, intermediateBuffer, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, aOutputConversionFlags, internalInputConversionFlags); |
|
735 if (returnValue<0) |
|
736 { |
|
737 return returnValue; // this is an error-code |
|
738 } |
|
739 if (numberOfUnconvertibleCharacters>0) |
|
740 { |
|
741 if (aNumberOfUnconvertibleCharacters==0) |
|
742 { |
|
743 aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfForeignBytesConsumed+indexOfFirstByteOfFirstUnconvertibleCharacter; |
|
744 } |
|
745 aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters; |
|
746 } |
|
747 numberOfForeignBytesConsumed+=numberOfForeignBytesConsumedThisTime; |
|
748 if (returnValue>0) |
|
749 { |
|
750 __ASSERT_DEBUG((returnValue%method->iNumberOfCoreBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters4)); |
|
751 numberOfForeignBytesConsumed-=(returnValue/method->iNumberOfCoreBytesPerCharacter)*method->iNumberOfBytesPerCharacter; |
|
752 goto end; |
|
753 } |
|
754 if (numberOfForeignBytesConsumed>0) |
|
755 { |
|
756 internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable; |
|
757 } |
|
758 remainderOfForeign.Set(aForeign.Mid(numberOfForeignBytesConsumed)); |
|
759 lengthOfRunToConvert-=numberOfForeignBytesConsumedThisTime; |
|
760 __ASSERT_DEBUG(lengthOfRunToConvert>=0, Panic(EPanicBadLengthOfRunToConvert2)); |
|
761 if (lengthOfRunToConvert<=0) |
|
762 { |
|
763 break; |
|
764 } |
|
765 } |
|
766 } |
|
767 end: |
|
768 if ((numberOfForeignBytesConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable)) |
|
769 { |
|
770 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
771 } |
|
772 return aForeign.Length()-numberOfForeignBytesConsumed; |
|
773 } |
|
774 |
|
775 void CnvUtilities::CheckArrayOfCharacterSets(const TArray<SCharacterSet>& aArrayOfCharacterSets) |
|
776 { |
|
777 const TInt numberOfCharacterSets=aArrayOfCharacterSets.Count(); |
|
778 __ASSERT_ALWAYS(numberOfCharacterSets>0, Panic(EPanicBadNumberOfCharacterSets)); |
|
779 for (TInt i=0; i<numberOfCharacterSets; ++i) |
|
780 { |
|
781 const SCharacterSet& characterSet=aArrayOfCharacterSets[i]; |
|
782 __ASSERT_ALWAYS(characterSet.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer1)); |
|
783 __ASSERT_ALWAYS(characterSet.iConvertFromIntermediateBufferInPlace!=NULL, Panic(EPanicBadFunctionPointer1)); |
|
784 __ASSERT_ALWAYS(characterSet.iEscapeSequence!=NULL, Panic(EPanicBadEscapeSequencePointer1)); |
|
785 } |
|
786 } |
|
787 |
|
788 void CnvUtilities::CheckArrayOfStates(const TArray<SState>& aArrayOfStates) |
|
789 { |
|
790 const TInt numberOfStates=aArrayOfStates.Count(); |
|
791 __ASSERT_ALWAYS(numberOfStates>0, Panic(EPanicBadNumberOfStates)); |
|
792 for (TInt i=0; i<numberOfStates; ++i) |
|
793 { |
|
794 const SState& state=aArrayOfStates[i]; |
|
795 __ASSERT_ALWAYS(state.iEscapeSequence!=NULL, Panic(EPanicBadEscapeSequencePointer2)); |
|
796 __ASSERT_ALWAYS((*state.iEscapeSequence)[0]==KControlCharacterEscape, Panic(EPanicBadEscapeSequenceStart)); |
|
797 __ASSERT_ALWAYS(state.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer2)); |
|
798 } |
|
799 } |
|
800 |
|
801 void CnvUtilities::CheckArrayOfMethods(const TArray<SMethod>& aArrayOfMethods) |
|
802 { |
|
803 const TInt numberOfMethods=aArrayOfMethods.Count(); |
|
804 __ASSERT_ALWAYS(numberOfMethods>0, Panic(EPanicBadNumberOfMethods)); |
|
805 for (TInt i=0; i<numberOfMethods; ++i) |
|
806 { |
|
807 const SMethod& method=aArrayOfMethods[i]; |
|
808 __ASSERT_ALWAYS(method.iNumberOfBytesAbleToConvert!=NULL, Panic(EPanicBadFunctionPointer2)); |
|
809 __ASSERT_ALWAYS(method.iConvertToIntermediateBufferInPlace!=NULL, Panic(EPanicBadFunctionPointer3)); |
|
810 __ASSERT_ALWAYS(method.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer3)); |
|
811 __ASSERT_ALWAYS(method.iNumberOfBytesPerCharacter>0, Panic(EPanicBadMethodData2)); |
|
812 __ASSERT_ALWAYS(method.iNumberOfCoreBytesPerCharacter>0, Panic(EPanicBadMethodData3)); |
|
813 __ASSERT_ALWAYS(method.iNumberOfCoreBytesPerCharacter<=method.iNumberOfBytesPerCharacter, Panic(EPanicBadMethodData4)); |
|
814 } |
|
815 } |
|
816 |
|
817 TInt CnvUtilities::LengthOfUnicodeCharacter(const TDesC16& aUnicode, TInt aIndex) |
|
818 { |
|
819 const TUint unicodeCharacter=aUnicode[aIndex]; |
|
820 if ((unicodeCharacter>=0xd800) && (unicodeCharacter<=0xdbff)) // if the unicode character is the first half of a surrogate-pair... |
|
821 { |
|
822 __ASSERT_DEBUG(aIndex+1<aUnicode.Length(), Panic(EPanicBadSurrogatePair1)); |
|
823 #if defined(_DEBUG) |
|
824 const TUint secondHalfOfSurrogatePair=aUnicode[aIndex+1]; |
|
825 #endif |
|
826 __ASSERT_DEBUG((secondHalfOfSurrogatePair>=0xdc00) && (secondHalfOfSurrogatePair<=0xdfff), Panic(EPanicBadSurrogatePair2)); // this can be asserted as CCnvCharacterSetConverter::DoConvertFromUnicode should have returned an error value if this was a bad surrogate pair |
|
827 return 2; |
|
828 } |
|
829 return 1; |
|
830 } |
|
831 |
|
832 TBool CnvUtilities::NextHomogeneousForeignRun(const SCnvConversionData*& aConversionData, TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TArray<SState>& aArrayOfStates, TUint& aOutputConversionFlags) |
|
833 { |
|
834 __ASSERT_DEBUG((aRemainderOfForeign.Length()==0) || (aRemainderOfForeign[0]==KControlCharacterEscape), Panic(EPanicBadRemainderOfForeign)); |
|
835 FOREVER |
|
836 { |
|
837 if (aRemainderOfForeign.Length()==0) |
|
838 { |
|
839 return EFalse; |
|
840 } |
|
841 const TInt numberOfStates=aArrayOfStates.Count(); |
|
842 TInt i; |
|
843 for (i=0; i<numberOfStates; ++i) |
|
844 { |
|
845 const SState& state=aArrayOfStates[i]; |
|
846 if (MatchesEscapeSequence(aNumberOfForeignBytesConsumed, aHomogeneousRun, aRemainderOfForeign, *state.iEscapeSequence)) |
|
847 { |
|
848 aConversionData=state.iConversionData; |
|
849 goto foundState; |
|
850 } |
|
851 } |
|
852 for (i=0; i<numberOfStates; ++i) |
|
853 { |
|
854 if (IsStartOf(aRemainderOfForeign, *aArrayOfStates[i].iEscapeSequence)) |
|
855 { |
|
856 // aRemainderOfForeign ends with a truncated escape sequence, so ConvertToUnicode cannot convert any more |
|
857 aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated; |
|
858 return EFalse; |
|
859 } |
|
860 } |
|
861 // force ConvertToUnicode to return CCnvCharacterSetConverter::EErrorIllFormedInput |
|
862 aConversionData=NULL; |
|
863 return ETrue; |
|
864 foundState: |
|
865 if (aHomogeneousRun.Length()>0) |
|
866 { |
|
867 return ETrue; |
|
868 } |
|
869 } |
|
870 } |
|
871 |
|
872 TBool CnvUtilities::MatchesEscapeSequence(TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TDesC8& aEscapeSequence) |
|
873 { |
|
874 const TInt lengthOfEscapeSequence=aEscapeSequence.Length(); |
|
875 if (IsStartOf(aEscapeSequence, aRemainderOfForeign)) |
|
876 { |
|
877 aRemainderOfForeign.Set(aRemainderOfForeign.Mid(lengthOfEscapeSequence)); |
|
878 const TInt startOfNextEscapeSequence=aRemainderOfForeign.Locate(KControlCharacterEscape); |
|
879 if (startOfNextEscapeSequence==KErrNotFound) |
|
880 { |
|
881 aHomogeneousRun.Set(aRemainderOfForeign); |
|
882 aRemainderOfForeign.Set(NULL, 0); |
|
883 } |
|
884 else |
|
885 { |
|
886 aHomogeneousRun.Set(aRemainderOfForeign.Left(startOfNextEscapeSequence)); |
|
887 aRemainderOfForeign.Set(aRemainderOfForeign.Mid(startOfNextEscapeSequence)); |
|
888 } |
|
889 aNumberOfForeignBytesConsumed+=lengthOfEscapeSequence; |
|
890 return ETrue; |
|
891 } |
|
892 return EFalse; |
|
893 } |
|
894 |
|
895 TBool CnvUtilities::IsStartOf(const TDesC8& aStart, const TDesC8& aPotentiallyLongerDescriptor) |
|
896 { |
|
897 const TInt lengthOfStart=aStart.Length(); |
|
898 return (aPotentiallyLongerDescriptor.Length()>=lengthOfStart) && (aPotentiallyLongerDescriptor.Left(lengthOfStart)==aStart); |
|
899 } |
|
900 |