|
1 /* |
|
2 * Copyright (c) 2003-2005 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
|
22 |
|
23 |
|
24 |
|
25 #include <e32std.h> |
|
26 #include <e32base.h> |
|
27 #include <charconv.h> |
|
28 #include <convutils.h> |
|
29 |
|
30 const TInt KNoPreviousCharacterSet=-1; |
|
31 const TInt KDefaultCharacterSet = 0; |
|
32 const TUint KControlCharacterEscape=0x1b; |
|
33 |
|
34 #if defined(_DEBUG) |
|
35 const TInt KMaximumLengthOfIntermediateBuffer=5; |
|
36 #else |
|
37 const TInt KMaximumLengthOfIntermediateBuffer=150; |
|
38 #endif |
|
39 |
|
40 struct SCnvConversionData; |
|
41 |
|
42 _LIT(KLitPanicText, "CONVUTILS"); |
|
43 |
|
44 enum TPanic |
|
45 { |
|
46 EPanicBadInputConversionFlags1=1, |
|
47 EPanicBadInputConversionFlags2, |
|
48 EPanicBadInputConversionFlags3, |
|
49 EPanicBadNumberOfUnicodeElementsConsumed, |
|
50 EPanicAppendFlagViolated, |
|
51 EPanicBadNumberOfUnicodeCharactersConverted, |
|
52 EPanicBadNumberOfCharactersThatDroppedOut, |
|
53 EPanicLoopCounterOverRun1, |
|
54 EPanicLoopCounterOverRun2, |
|
55 EPanicDescriptorNotWholeNumberOfCharacters1, |
|
56 EPanicDescriptorNotWholeNumberOfCharacters2, |
|
57 EPanicDescriptorNotWholeNumberOfCharacters3, |
|
58 EPanicDescriptorNotWholeNumberOfCharacters4, |
|
59 EPanicBadStartOfNextEscapeSequence, |
|
60 EPanicInconsistentNumberOfForeignBytesRemaining, |
|
61 EPanicBadLengthOfRunToConvert1, |
|
62 EPanicBadLengthOfRunToConvert2, |
|
63 EPanicBadMethodPointer, |
|
64 EPanicBadMethodData1, |
|
65 EPanicBadMethodData2, |
|
66 EPanicBadMethodData3, |
|
67 EPanicBadMethodData4, |
|
68 EPanicBadNumberOfCharacterSets, |
|
69 EPanicBadConversionDataPointer1, |
|
70 EPanicBadConversionDataPointer2, |
|
71 EPanicBadConversionDataPointer3, |
|
72 EPanicBadFunctionPointer1, |
|
73 EPanicBadFunctionPointer2, |
|
74 EPanicBadFunctionPointer3, |
|
75 EPanicBadEscapeSequencePointer1, |
|
76 EPanicBadEscapeSequencePointer2, |
|
77 EPanicBadNumberOfStates, |
|
78 EPanicBadEscapeSequenceStart, |
|
79 EPanicBadNumberOfMethods, |
|
80 EPanicBadSurrogatePair1, |
|
81 EPanicBadSurrogatePair2, |
|
82 EPanicBadRemainderOfForeign, |
|
83 EPanicOutputDescriptorTooShortEvenToHoldEscapeSequenceToDefaultCharacterSet |
|
84 }; |
|
85 |
|
86 LOCAL_C void Panic(TPanic aPanic) |
|
87 { |
|
88 User::Panic(KLitPanicText, aPanic); |
|
89 } |
|
90 |
|
91 /** Converts Unicode text into a complex foreign character set encoding. This |
|
92 is an encoding which cannot be converted simply by calling |
|
93 CCnvCharacterSetConverter::DoConvertFromUnicode(). It may be modal (e.g. JIS) |
|
94 or non-modal (e.g. Shift-JIS). |
|
95 |
|
96 The Unicode text specified in aUnicode is converted using the array of |
|
97 conversion data objects (aArrayOfCharacterSets) provided by the plug-in for |
|
98 the complex character set encoding, and the converted text is returned in |
|
99 aForeign. Any existing contents in aForeign are overwritten. |
|
100 |
|
101 Unlike CCnvCharacterSetConverter::DoConvertFromUnicode(), multiple character |
|
102 sets can be specified. aUnicode is converted using the first character conversion |
|
103 data object in the array. When a character is found which cannot be converted |
|
104 using that data, each character set in the array is tried in turn. If it cannot |
|
105 be converted using any object in the array, the index of the character is |
|
106 appended to aIndicesOfUnconvertibleCharacters and the character is replaced |
|
107 by aReplacementForUnconvertibleUnicodeCharacters. |
|
108 |
|
109 If it can be converted using another object in the array, that object is used |
|
110 to convert all subsequent characters until another unconvertible character |
|
111 is found. |
|
112 |
|
113 @param aDefaultEndiannessOfForeignCharacters The default endian-ness to use |
|
114 when writing the characters in the foreign character set. If an endian-ness |
|
115 for foreign characters is specified in the current conversion data object, |
|
116 then that is used instead and the value of |
|
117 aDefaultEndiannessOfForeignCharacters is ignored. |
|
118 @param aReplacementForUnconvertibleUnicodeCharacters The single character (one |
|
119 or more byte values) which is used to replace unconvertible characters. |
|
120 @param aForeign On return, contains the converted text in the non-Unicode |
|
121 character set. |
|
122 @param aUnicode The source Unicode text to be converted. |
|
123 @param aIndicesOfUnconvertibleCharacters On return, holds an ascending array |
|
124 of the indices of each Unicode character in the source text which could not |
|
125 be converted (because none of the target character sets have an equivalent |
|
126 character). |
|
127 @param aArrayOfCharacterSets Array of character conversion data objects, |
|
128 representing the character sets which comprise a complex character set |
|
129 encoding. These are used in sequence to convert the Unicode text. There must |
|
130 be at least one character set in this array and no character set may have any |
|
131 NULL member data, or a panic occurs. |
|
132 @return The number of unconverted characters left at the end of the input |
|
133 descriptor (e.g. because aForeign was not long enough to hold all the text), |
|
134 or a negative error value, as defined in CCnvCharacterSetConverter::TError. */ |
|
135 EXPORT_C TInt CnvUtilities::ConvertFromUnicode( |
|
136 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
137 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, |
|
138 TDes8& aForeign, |
|
139 const TDesC16& aUnicode, |
|
140 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, |
|
141 const TArray<SCharacterSet>& aArrayOfCharacterSets) |
|
142 { |
|
143 TUint notUsed; |
|
144 return ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, |
|
145 aReplacementForUnconvertibleUnicodeCharacters, |
|
146 aForeign, |
|
147 aUnicode, |
|
148 aIndicesOfUnconvertibleCharacters, |
|
149 aArrayOfCharacterSets, |
|
150 notUsed, |
|
151 0); |
|
152 } |
|
153 |
|
154 /** Converts Unicode text into a complex foreign character set encoding. This is |
|
155 an encoding which cannot be converted simply by a call to |
|
156 CCnvCharacterSetConverter::DoConvertFromUnicode(). It may be modal (e.g. JIS) |
|
157 or non-modal (e.g. Shift-JIS). |
|
158 |
|
159 The Unicode text specified in aUnicode is converted using the array of conversion |
|
160 data objects (aArrayOfCharacterSets) provided by the plug-in for the complex |
|
161 character set encoding and the converted text is returned in aForeign. The |
|
162 function can either append to aForeign or overwrite its contents (if any). |
|
163 |
|
164 Unlike CCnvCharacterSetConverter::DoConvertFromUnicode(), multiple character |
|
165 sets can be specified. aUnicode is converted using the first character conversion |
|
166 data object in the array. When a character is found which cannot be converted |
|
167 using that data, each character set in the array is tried in turn. If it cannot |
|
168 be converted using any object in the array, the index of the character is |
|
169 appended to aIndicesOfUnconvertibleCharacters and the character is replaced |
|
170 by aReplacementForUnconvertibleUnicodeCharacters. |
|
171 |
|
172 If it can be converted using another object in the array, that object is used |
|
173 to convert all subsequent characters until another unconvertible character |
|
174 is found. |
|
175 |
|
176 @param aDefaultEndiannessOfForeignCharacters The default endian-ness to use |
|
177 when writing the characters in the foreign character set. If an endian-ness |
|
178 for foreign characters is specified in the current conversion data object, |
|
179 then that is used instead and the value of |
|
180 aDefaultEndiannessOfForeignCharacters is ignored. |
|
181 @param aReplacementForUnconvertibleUnicodeCharacters The single character (one |
|
182 or more byte values) which is used to replace unconvertible characters. |
|
183 @param aForeign On return, contains the converted text in the non-Unicode |
|
184 character set. This may already contain some text. If it does, and if |
|
185 aInputConversionFlags specifies EInputConversionFlagAppend, then the converted |
|
186 text is appended to this descriptor. |
|
187 @param aUnicode The source Unicode text to be converted. |
|
188 @param aIndicesOfUnconvertibleCharacters On return, holds an ascending array |
|
189 of the indices of each Unicode character in the source text which could not |
|
190 be converted (because none of the target character sets have an equivalent |
|
191 character). |
|
192 @param aArrayOfCharacterSets Array of character set data objects. These are |
|
193 used in sequence to convert the Unicode text. There must be at least one |
|
194 character set in this array and no character set may have any NULL member |
|
195 data, or a panic occurs. |
|
196 @param aOutputConversionFlags If the input descriptor ended in a truncated |
|
197 sequence, e.g. the first half only of a Unicode surrogate pair, this returns |
|
198 with the EOutputConversionFlagInputIsTruncated flag set. |
|
199 @param aInputConversionFlags Specify |
|
200 CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to |
|
201 aForeign. Specify CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable |
|
202 to prevent the function from returning the error-code EErrorIllFormedInput |
|
203 when the input descriptor consists of nothing but a truncated sequence. The |
|
204 CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter |
|
205 flag must not be set, otherwise a panic occurs. |
|
206 @return The number of unconverted characters left at the end of the input descriptor |
|
207 (e.g. because aForeign was not long enough to hold all the text), or a negative |
|
208 error value, as defined in CCnvCharacterSetConverter::TError. */ |
|
209 EXPORT_C TInt CnvUtilities::ConvertFromUnicode( |
|
210 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
211 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, |
|
212 TDes8& aForeign, |
|
213 const TDesC16& aUnicode, |
|
214 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, |
|
215 const TArray<SCharacterSet>& aArrayOfCharacterSets, |
|
216 TUint& aOutputConversionFlags, |
|
217 TUint aInputConversionFlags) |
|
218 { |
|
219 __ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags1)); |
|
220 CheckArrayOfCharacterSets(aArrayOfCharacterSets); |
|
221 aOutputConversionFlags=0; |
|
222 TUint internalInputConversionFlags=aInputConversionFlags; |
|
223 if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend) |
|
224 { |
|
225 aForeign.SetLength(0); |
|
226 internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend; |
|
227 } |
|
228 if (aUnicode.Length()==0) |
|
229 { |
|
230 return 0; |
|
231 } |
|
232 if (aForeign.MaxLength()==aForeign.Length()) // relies on the fact that aForeign's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set |
|
233 { |
|
234 return aUnicode.Length(); |
|
235 } |
|
236 TDes8* foreign=&aForeign; |
|
237 TPtr8 dummyForeign(NULL, 0, 0); |
|
238 if (aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagMustEndInDefaultCharacterSet) |
|
239 { |
|
240 TInt dummyMaximumLength = |
|
241 aForeign.MaxLength() - aArrayOfCharacterSets[KDefaultCharacterSet].iEscapeSequence->Length(); |
|
242 __ASSERT_ALWAYS(dummyMaximumLength >= 0, |
|
243 Panic(EPanicOutputDescriptorTooShortEvenToHoldEscapeSequenceToDefaultCharacterSet)); |
|
244 dummyForeign.Set(const_cast <TUint8*> (aForeign.Ptr()), |
|
245 aForeign.Length(), |
|
246 dummyMaximumLength); |
|
247 foreign=&dummyForeign; |
|
248 } |
|
249 const TInt numberOfCharacterSets=aArrayOfCharacterSets.Count(); |
|
250 TInt numberOfUnicodeElementsConsumed=0; |
|
251 internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter; // this is not just an optimization - it ensures that "foreign" doesn't get filled up too much each time CCnvCharacterSetConverter::DoConvertFromUnicode is called |
|
252 TInt previousCharacterSet = aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAssumeStartInDefaultCharacterSet? |
|
253 KDefaultCharacterSet : KNoPreviousCharacterSet; |
|
254 FOREVER |
|
255 { |
|
256 for (TInt presentCharacterSet=KDefaultCharacterSet;;) |
|
257 { |
|
258 __ASSERT_DEBUG(numberOfUnicodeElementsConsumed<=aUnicode.Length(), Panic(EPanicBadNumberOfUnicodeElementsConsumed)); |
|
259 if (numberOfUnicodeElementsConsumed>=aUnicode.Length()) |
|
260 { |
|
261 goto end; |
|
262 } |
|
263 const SCharacterSet& characterSet=aArrayOfCharacterSets[presentCharacterSet]; |
|
264 const TInt oldNumberOfBytesInForeign=foreign->Length(); |
|
265 if (numberOfUnicodeElementsConsumed>0) |
|
266 { |
|
267 internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable; |
|
268 } |
|
269 CCnvCharacterSetConverter::TArrayOfAscendingIndices indicesOfUnconvertibleCharacters; |
|
270 const TInt returnValue=CCnvCharacterSetConverter::DoConvertFromUnicode(*characterSet.iConversionData, aDefaultEndiannessOfForeignCharacters, KNullDesC8, *foreign, aUnicode.Mid(numberOfUnicodeElementsConsumed), indicesOfUnconvertibleCharacters, aOutputConversionFlags, internalInputConversionFlags); |
|
271 if (returnValue<0) |
|
272 { |
|
273 return returnValue; // this is an error-code |
|
274 } |
|
275 __ASSERT_DEBUG(foreign->Length()>=oldNumberOfBytesInForeign, Panic(EPanicAppendFlagViolated)); |
|
276 TInt indexOfFirstUnconvertibleCharacter; |
|
277 if (indicesOfUnconvertibleCharacters.NumberOfIndices()==0) |
|
278 { |
|
279 indexOfFirstUnconvertibleCharacter=-1; |
|
280 numberOfUnicodeElementsConsumed=aUnicode.Length()-returnValue; |
|
281 } |
|
282 else |
|
283 { |
|
284 indexOfFirstUnconvertibleCharacter=indicesOfUnconvertibleCharacters[0]; |
|
285 numberOfUnicodeElementsConsumed+=indexOfFirstUnconvertibleCharacter; |
|
286 __ASSERT_DEBUG(numberOfUnicodeElementsConsumed+LengthOfUnicodeCharacter(aUnicode, numberOfUnicodeElementsConsumed)==aUnicode.Length()-returnValue, Panic(EPanicBadNumberOfUnicodeCharactersConverted)); |
|
287 } |
|
288 if (indexOfFirstUnconvertibleCharacter!=0) // if at least one Unicode character at the start of CCnvCharacterSetConverter::DoConvertFromUnicode's input descriptor was convertible... |
|
289 { |
|
290 TBool gotoEnd = EFalse; |
|
291 if (foreign->Length()>oldNumberOfBytesInForeign) |
|
292 { |
|
293 TInt numberOfCharactersThatDroppedOut=0; |
|
294 // Insert an escape sequence if this character set is different from the last one. |
|
295 if (presentCharacterSet != previousCharacterSet) |
|
296 { |
|
297 // Insert escape sequence (if requred) in front of the last encoded run of text. |
|
298 // Note that this may cause some characters to drop out at the end. |
|
299 (*characterSet.iConvertFromIntermediateBufferInPlace)(oldNumberOfBytesInForeign, *foreign, numberOfCharactersThatDroppedOut); |
|
300 if (oldNumberOfBytesInForeign < foreign->Length()) |
|
301 previousCharacterSet = presentCharacterSet; |
|
302 } |
|
303 numberOfUnicodeElementsConsumed-=numberOfCharactersThatDroppedOut; |
|
304 if (numberOfCharactersThatDroppedOut>0 )// if "foreign" has been filled to as much as it will hold... |
|
305 { |
|
306 gotoEnd = ETrue; |
|
307 } |
|
308 } |
|
309 if (indexOfFirstUnconvertibleCharacter<0) // if we've successfully converted up to the end of aUnicode (using *characterSet.iConversionData)... |
|
310 { |
|
311 gotoEnd = ETrue; |
|
312 } |
|
313 if (gotoEnd) |
|
314 { |
|
315 if ( aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagMustEndInDefaultCharacterSet |
|
316 && previousCharacterSet != KDefaultCharacterSet |
|
317 && previousCharacterSet != KNoPreviousCharacterSet) |
|
318 { |
|
319 aForeign.SetLength(foreign->Length()); |
|
320 aForeign.Append(*aArrayOfCharacterSets[KDefaultCharacterSet].iEscapeSequence); |
|
321 foreign=NULL; |
|
322 } |
|
323 goto end; |
|
324 } |
|
325 break; |
|
326 } |
|
327 __ASSERT_DEBUG(presentCharacterSet<numberOfCharacterSets, Panic(EPanicLoopCounterOverRun1)); |
|
328 ++presentCharacterSet; |
|
329 if (presentCharacterSet>=numberOfCharacterSets) |
|
330 { |
|
331 if ((foreign->MaxLength()-foreign->Length()<aReplacementForUnconvertibleUnicodeCharacters.Length()) || |
|
332 (aIndicesOfUnconvertibleCharacters.AppendIndex(numberOfUnicodeElementsConsumed)!=CCnvCharacterSetConverter::TArrayOfAscendingIndices::EAppendSuccessful)) // the tests must be done in this order as AppendIndex must only be called if there is room for aReplacementForUnconvertibleUnicodeCharacters |
|
333 { |
|
334 goto end; |
|
335 } |
|
336 numberOfUnicodeElementsConsumed+=LengthOfUnicodeCharacter(aUnicode, numberOfUnicodeElementsConsumed); |
|
337 foreign->Append(aReplacementForUnconvertibleUnicodeCharacters); |
|
338 break; |
|
339 } |
|
340 } |
|
341 } |
|
342 end: |
|
343 if (foreign!=NULL) |
|
344 { |
|
345 aForeign.SetLength(foreign->Length()); |
|
346 foreign=NULL; |
|
347 } |
|
348 if ((numberOfUnicodeElementsConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable)) |
|
349 { |
|
350 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
351 } |
|
352 return aUnicode.Length()-numberOfUnicodeElementsConsumed; |
|
353 } |
|
354 |
|
355 |
|
356 /** Inserts an escape sequence into the descriptor. |
|
357 |
|
358 This function is provided to help in the implementation of |
|
359 ConvertFromUnicode() for modal character set encodings. |
|
360 Each SCharacterSet object in the array passed to |
|
361 ConvertFromUnicode() must have its |
|
362 iConvertFromIntermediateBufferInPlace member assigned. To |
|
363 do this for a modal character set encoding, implement a function whose |
|
364 signature matches that of FConvertFromIntermediateBufferInPlace |
|
365 and which calls this function, passing all arguments unchanged, and |
|
366 specifying the character set's escape sequence and the number of bytes per |
|
367 character. |
|
368 |
|
369 @param aStartPositionInDescriptor The byte position in aDescriptor at which |
|
370 the escape sequence is inserted. If the character set uses more than one byte |
|
371 per character, this position must be the start of a character, otherwise a |
|
372 panic occurs. |
|
373 @param aDescriptor The descriptor into which the escape sequence is inserted. |
|
374 @param aNumberOfCharactersThatDroppedOut The escape sequence is inserted into |
|
375 the start of aDescriptor and any characters that need to drop out to make |
|
376 room for the escape sequence (because the descriptor's maximum length was |
|
377 not long enough) drop out from the end of the buffer. This parameter indicates |
|
378 the number of characters that needed to drop out. |
|
379 @param aEscapeSequence The escape sequence for the character set. |
|
380 @param aNumberOfBytesPerCharacter The number of bytes per character. */ |
|
381 EXPORT_C void CnvUtilities::ConvertFromIntermediateBufferInPlace( |
|
382 TInt aStartPositionInDescriptor, |
|
383 TDes8& aDescriptor, |
|
384 TInt& aNumberOfCharactersThatDroppedOut, |
|
385 const TDesC8& aEscapeSequence, |
|
386 TInt aNumberOfBytesPerCharacter) |
|
387 { |
|
388 const TInt lengthOfDescriptor=aDescriptor.Length(); |
|
389 __ASSERT_ALWAYS((lengthOfDescriptor-aStartPositionInDescriptor)%aNumberOfBytesPerCharacter==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters1)); |
|
390 aNumberOfCharactersThatDroppedOut=(Max(0, aEscapeSequence.Length()-(aDescriptor.MaxLength()-lengthOfDescriptor))+(aNumberOfBytesPerCharacter-1))/aNumberOfBytesPerCharacter; |
|
391 const TInt lengthOfRunInCharacters=(lengthOfDescriptor-aStartPositionInDescriptor)/aNumberOfBytesPerCharacter; |
|
392 if (aNumberOfCharactersThatDroppedOut>=lengthOfRunInCharacters) // ">=" is correct (rather than ">") as if there's only room for the escape sequence we don't want to have it in the descriptor |
|
393 { |
|
394 aNumberOfCharactersThatDroppedOut=lengthOfRunInCharacters; |
|
395 aDescriptor.SetLength(aStartPositionInDescriptor); |
|
396 } |
|
397 else |
|
398 { |
|
399 aDescriptor.SetLength(lengthOfDescriptor-(aNumberOfCharactersThatDroppedOut*aNumberOfBytesPerCharacter)); |
|
400 aDescriptor.Insert(aStartPositionInDescriptor, aEscapeSequence); |
|
401 } |
|
402 } |
|
403 |
|
404 |
|
405 /** Converts text from a modal foreign character set encoding into Unicode. |
|
406 |
|
407 The non-Unicode text specified in aForeign is converted using |
|
408 the array of character set conversion objects (aArrayOfStates) |
|
409 provided by the plug-in, and the converted text is returned in |
|
410 aUnicode. The function can either append to aUnicode |
|
411 or overwrite its contents (if any), depending on the input conversion flags |
|
412 specified. The first element in aArrayOfStates is taken to be |
|
413 the default mode (i.e. the mode to assume by default if there is no preceding |
|
414 escape sequence). |
|
415 |
|
416 @param aDefaultEndiannessOfForeignCharacters The default endian-ness of the |
|
417 foreign characters. If an endian-ness for foreign characters is specified |
|
418 in the conversion data, then that is used instead and the value of |
|
419 aDefaultEndiannessOfForeignCharacters is ignored. |
|
420 @param aUnicode On return, contains the text converted into Unicode. |
|
421 @param aForeign The non-Unicode source text to be converted. |
|
422 @param aState Used to store a modal character set encoding's current mode across |
|
423 multiple calls to ConvertToUnicode() on the same input descriptor. This argument |
|
424 should be passed the same object as passed to the plug-in's ConvertToUnicode() |
|
425 exported function. |
|
426 @param aNumberOfUnconvertibleCharacters On return, contains the number of |
|
427 characters in aForeign which were not converted. Characters which cannot be |
|
428 converted are output as Unicode replacement characters (0xfffd). |
|
429 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index |
|
430 of the first byte of the first unconvertible character. For instance if the |
|
431 first character in the input descriptor (aForeign) could not be converted, |
|
432 then this parameter is set to the first byte of that character, i.e. zero. |
|
433 A negative value is returned if all the characters were converted. |
|
434 @param aArrayOfStates Array of character set conversion data objects, and their |
|
435 escape sequences ("modes"). There must be one or more modes in this array, |
|
436 none of the modes can have any NULL member data, and each mode's escape sequence |
|
437 must begin with KControlCharacterEscape (0x1b) or a panic occurs. |
|
438 @return The number of unconverted bytes left at the end of the input descriptor, |
|
439 or a negative error value, as defined in TError. */ |
|
440 EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromModalForeign( |
|
441 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
442 TDes16& aUnicode, |
|
443 const TDesC8& aForeign, |
|
444 TInt& aState, |
|
445 TInt& aNumberOfUnconvertibleCharacters, |
|
446 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, |
|
447 const TArray<SState>& aArrayOfStates) |
|
448 { |
|
449 TUint notUsed; |
|
450 return ConvertToUnicodeFromModalForeign(aDefaultEndiannessOfForeignCharacters, |
|
451 aUnicode, |
|
452 aForeign, |
|
453 aState, |
|
454 aNumberOfUnconvertibleCharacters, |
|
455 aIndexOfFirstByteOfFirstUnconvertibleCharacter, |
|
456 aArrayOfStates, |
|
457 notUsed, |
|
458 0); |
|
459 } |
|
460 |
|
461 /** @param aDefaultEndiannessOfForeignCharacters The default endian-ness for |
|
462 the foreign characters. If an endian-ness for foreign characters is specified |
|
463 in the conversion data, then that is used instead and the value of |
|
464 aDefaultEndiannessOfForeignCharacters is ignored. |
|
465 @param aUnicode On return, contains the text converted into Unicode. |
|
466 @param aForeign The non-Unicode source text to be converted. |
|
467 @param aState Used to store a modal character set encoding's current mode |
|
468 across multiple calls to ConvertToUnicode() on the same input descriptor. This |
|
469 argument should be passed the same object as passed to the plug-in's |
|
470 ConvertToUnicode() exported function. |
|
471 @param aNumberOfUnconvertibleCharacters On return, contains the number of |
|
472 characters in aForeign which were not converted. Characters which cannot be |
|
473 converted are output as Unicode replacement characters (0xfffd). |
|
474 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index |
|
475 of the first byte of the first unconvertible character. For instance if the |
|
476 first character in the input descriptor (aForeign) could not be converted, |
|
477 then this parameter is set to the first byte of that character, i.e. zero. |
|
478 A negative value is returned if all the characters were converted. |
|
479 @param aArrayOfStates Array of character set conversion data objects, and their |
|
480 escape sequences. There must be one or more modes in this array, none of the |
|
481 modes can have any NULL member data, and each mode's escape sequence must |
|
482 begin with KControlCharacterEscape (0x1b) or a panic occurs. |
|
483 @param aOutputConversionFlags If the input descriptor ended in a truncated |
|
484 sequence, e.g. an incomplete multi-byte character, aOutputConversionFlags |
|
485 returns with the EOutputConversionFlagInputIsTruncated flag set. |
|
486 @param aInputConversionFlags Specify |
|
487 CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to |
|
488 aUnicode. Specify EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable |
|
489 to prevent the function from returning the error-code EErrorIllFormedInput |
|
490 when the input descriptor consists of nothing but a truncated sequence. The |
|
491 CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter |
|
492 flag must not be set, otherwise a panic occurs. |
|
493 @return The number of unconverted bytes left at the end of the input descriptor, |
|
494 or a negative error value, as defined in TError. */ |
|
495 EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromModalForeign( |
|
496 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
497 TDes16& aUnicode, |
|
498 const TDesC8& aForeign, |
|
499 TInt& aState, |
|
500 TInt& aNumberOfUnconvertibleCharacters, |
|
501 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, |
|
502 const TArray<SState>& aArrayOfStates, |
|
503 TUint& aOutputConversionFlags, |
|
504 TUint aInputConversionFlags) |
|
505 { |
|
506 __ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags2)); |
|
507 CheckArrayOfStates(aArrayOfStates); |
|
508 aNumberOfUnconvertibleCharacters=0; |
|
509 aIndexOfFirstByteOfFirstUnconvertibleCharacter=-1; |
|
510 aOutputConversionFlags=0; |
|
511 TUint internalInputConversionFlags=aInputConversionFlags; |
|
512 if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend) |
|
513 { |
|
514 aUnicode.SetLength(0); |
|
515 internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend; |
|
516 } |
|
517 if (aForeign.Length()==0) |
|
518 { |
|
519 return 0; |
|
520 } |
|
521 if (aUnicode.MaxLength()==aUnicode.Length()) // relies on the fact that aUnicode's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set |
|
522 { |
|
523 return aForeign.Length(); |
|
524 } |
|
525 TPtrC8 remainderOfForeign(aForeign); |
|
526 TPtrC8 homogeneousRun; |
|
527 TInt numberOfForeignBytesConsumed=0; |
|
528 const SCnvConversionData* conversionData = NULL; |
|
529 const TInt startOfNextEscapeSequence=aForeign.Locate(KControlCharacterEscape); |
|
530 if (startOfNextEscapeSequence!=0) // if aForeign doesn't start with an escape sequence... |
|
531 { |
|
532 conversionData=(aState!=CCnvCharacterSetConverter::KStateDefault)? REINTERPRET_CAST(const SCnvConversionData*, aState): aArrayOfStates[0].iConversionData; |
|
533 if (startOfNextEscapeSequence==KErrNotFound) |
|
534 { |
|
535 homogeneousRun.Set(remainderOfForeign); |
|
536 remainderOfForeign.Set(NULL, 0); |
|
537 } |
|
538 else |
|
539 { |
|
540 __ASSERT_DEBUG(startOfNextEscapeSequence>0, Panic(EPanicBadStartOfNextEscapeSequence)); |
|
541 homogeneousRun.Set(remainderOfForeign.Left(startOfNextEscapeSequence)); |
|
542 remainderOfForeign.Set(remainderOfForeign.Mid(startOfNextEscapeSequence)); |
|
543 } |
|
544 goto handleHomogeneousRun; |
|
545 } |
|
546 FOREVER |
|
547 { |
|
548 if (!NextHomogeneousForeignRun(conversionData, numberOfForeignBytesConsumed, homogeneousRun, remainderOfForeign, aArrayOfStates, aOutputConversionFlags)) |
|
549 { |
|
550 goto end; |
|
551 } |
|
552 handleHomogeneousRun: |
|
553 if (conversionData==NULL) |
|
554 { |
|
555 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
556 } |
|
557 TInt numberOfUnconvertibleCharacters; |
|
558 TInt indexOfFirstByteOfFirstUnconvertibleCharacter; |
|
559 const TInt returnValue=CCnvCharacterSetConverter::DoConvertToUnicode(*conversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, homogeneousRun, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, aOutputConversionFlags, internalInputConversionFlags); |
|
560 if (returnValue<0) |
|
561 { |
|
562 return returnValue; // this is an error-code |
|
563 } |
|
564 if (numberOfUnconvertibleCharacters>0) |
|
565 { |
|
566 if (aNumberOfUnconvertibleCharacters==0) |
|
567 { |
|
568 aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfForeignBytesConsumed+indexOfFirstByteOfFirstUnconvertibleCharacter; |
|
569 } |
|
570 aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters; |
|
571 } |
|
572 numberOfForeignBytesConsumed+=homogeneousRun.Length(); |
|
573 if (returnValue>0) |
|
574 { |
|
575 numberOfForeignBytesConsumed-=returnValue; |
|
576 goto end; |
|
577 } |
|
578 if (numberOfForeignBytesConsumed>0) |
|
579 { |
|
580 internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable; |
|
581 } |
|
582 __ASSERT_DEBUG(remainderOfForeign==aForeign.Mid(numberOfForeignBytesConsumed), Panic(EPanicInconsistentNumberOfForeignBytesRemaining)); |
|
583 } |
|
584 end: |
|
585 if ((numberOfForeignBytesConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable)) |
|
586 { |
|
587 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
588 } |
|
589 aState=REINTERPRET_CAST(TInt, conversionData); |
|
590 return aForeign.Length()-numberOfForeignBytesConsumed; |
|
591 } |
|
592 |
|
593 |
|
594 /** Converts text from a non-modal complex character set encoding (e.g. |
|
595 Shift-JIS or EUC-JP) into Unicode.The non-Unicode text specified in |
|
596 aForeign is converted using the array of character set |
|
597 conversion methods (aArrayOfMethods) provided by the |
|
598 plug-in, and the converted text is returned in aUnicode. |
|
599 Overwrites the contents, if any, of aUnicode. |
|
600 |
|
601 @param aDefaultEndiannessOfForeignCharacters The default endian-ness of the |
|
602 foreign characters. If an endian-ness for foreign characters is specified |
|
603 in the conversion data, then that is used instead and the value of |
|
604 aDefaultEndiannessOfForeignCharacters is ignored. |
|
605 @param aUnicode On return, contains the text converted into Unicode. |
|
606 @param aForeign The non-Unicode source text to be converted. |
|
607 @param aNumberOfUnconvertibleCharacters On return, contains the number of |
|
608 characters in aForeign which were not converted. Characters which cannot be |
|
609 converted are output as Unicode replacement characters (0xfffd). |
|
610 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index |
|
611 of the first byte of the first unconvertible character. For instance if the |
|
612 first character in the input descriptor (aForeign) could not be converted, |
|
613 then this parameter is set to the first byte of that character, i.e. zero. |
|
614 A negative value is returned if all the characters were converted. |
|
615 @param aArrayOfMethods Array of conversion methods. There must be one or more |
|
616 methods in this array and none of the methods in the array can have any NULL |
|
617 member data or a panic occurs. |
|
618 @return The number of unconverted bytes left at the end of the input descriptor, |
|
619 or a negative error value, as defined in TError. */ |
|
620 EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign( |
|
621 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
622 TDes16& aUnicode, |
|
623 const TDesC8& aForeign, |
|
624 TInt& aNumberOfUnconvertibleCharacters, |
|
625 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, |
|
626 const TArray<SMethod>& aArrayOfMethods) |
|
627 { |
|
628 TUint notUsed; |
|
629 return ConvertToUnicodeFromHeterogeneousForeign( |
|
630 aDefaultEndiannessOfForeignCharacters, |
|
631 aUnicode, |
|
632 aForeign, |
|
633 aNumberOfUnconvertibleCharacters, |
|
634 aIndexOfFirstByteOfFirstUnconvertibleCharacter, |
|
635 aArrayOfMethods, |
|
636 notUsed, |
|
637 0); |
|
638 } |
|
639 |
|
640 /** @param aDefaultEndiannessOfForeignCharacters The default endian-ness for the |
|
641 foreign characters. If an endian-ness for foreign characters is specified |
|
642 in the conversion data, then that is used instead and the value of |
|
643 aDefaultEndiannessOfForeignCharacters is ignored. |
|
644 @param aUnicode On return, contains the text converted into Unicode. |
|
645 @param aForeign The non-Unicode source text to be converted. |
|
646 @param aNumberOfUnconvertibleCharacters On return, contains the number of |
|
647 characters in aForeign which were not converted. Characters which cannot be |
|
648 converted are output as Unicode replacement characters (0xfffd). |
|
649 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index |
|
650 of the first byte of the first unconvertible character. For instance if the |
|
651 first character in the input descriptor (aForeign) could not be converted, |
|
652 then this parameter is set to the first byte of that character, i.e. zero. |
|
653 A negative value is returned if all the characters were converted. |
|
654 @param aArrayOfMethods Array of conversion methods. There must be one or more |
|
655 methods in this array and none of the methods in the array can have any NULL |
|
656 member data or a panic occurs. |
|
657 @param aOutputConversionFlags If the input descriptor ended in a truncated |
|
658 sequence, e.g. an incomplete multi-byte character, aOutputConversionFlags |
|
659 returns with the EOutputConversionFlagInputIsTruncated flag set. |
|
660 @param aInputConversionFlags Specify |
|
661 CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to |
|
662 aUnicode. Specify EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable |
|
663 to prevent the function from returning the error-code EErrorIllFormedInput |
|
664 when the input descriptor consists of nothing but a truncated sequence. The |
|
665 CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter |
|
666 flag must not be set, otherwise a panic occurs. |
|
667 @return The number of unconverted bytes left at the end of the input descriptor, |
|
668 or a negative error value, as defined in TError. */ |
|
669 EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign( |
|
670 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
671 TDes16& aUnicode, |
|
672 const TDesC8& aForeign, |
|
673 TInt& aNumberOfUnconvertibleCharacters, |
|
674 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, |
|
675 const TArray<SMethod>& aArrayOfMethods, |
|
676 TUint& aOutputConversionFlags, |
|
677 TUint aInputConversionFlags) |
|
678 { |
|
679 __ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags3)); |
|
680 CheckArrayOfMethods(aArrayOfMethods); |
|
681 aNumberOfUnconvertibleCharacters=0; |
|
682 aIndexOfFirstByteOfFirstUnconvertibleCharacter=-1; |
|
683 aOutputConversionFlags=0; |
|
684 TUint internalInputConversionFlags=aInputConversionFlags; |
|
685 if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend) |
|
686 { |
|
687 aUnicode.SetLength(0); |
|
688 internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend; |
|
689 } |
|
690 if (aForeign.Length()==0) |
|
691 { |
|
692 return 0; |
|
693 } |
|
694 if (aUnicode.MaxLength()==aUnicode.Length()) // relies on the fact that aUnicode's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set |
|
695 { |
|
696 return aForeign.Length(); |
|
697 } |
|
698 const TInt numberOfMethods=aArrayOfMethods.Count(); |
|
699 TPtrC8 remainderOfForeign(aForeign); |
|
700 TInt numberOfForeignBytesConsumed=0; |
|
701 FOREVER |
|
702 { |
|
703 TInt lengthOfRunToConvert=0; |
|
704 const SMethod* method=NULL; |
|
705 for (TInt i=0;;) |
|
706 { |
|
707 method=&aArrayOfMethods[i]; |
|
708 lengthOfRunToConvert=(*method->iNumberOfBytesAbleToConvert)(remainderOfForeign); |
|
709 if (lengthOfRunToConvert<0) |
|
710 { |
|
711 return lengthOfRunToConvert; // this is an error-code |
|
712 } |
|
713 if (lengthOfRunToConvert>0) |
|
714 { |
|
715 break; |
|
716 } |
|
717 __ASSERT_DEBUG(i<numberOfMethods, Panic(EPanicLoopCounterOverRun2)); |
|
718 ++i; |
|
719 if (i>=numberOfMethods) |
|
720 { |
|
721 aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated; |
|
722 goto end; |
|
723 } |
|
724 } |
|
725 __ASSERT_DEBUG(lengthOfRunToConvert>0, Panic(EPanicBadLengthOfRunToConvert1)); |
|
726 __ASSERT_DEBUG(method!=NULL, Panic(EPanicBadMethodPointer)); |
|
727 TBuf8<KMaximumLengthOfIntermediateBuffer> intermediateBuffer; |
|
728 const TInt maximumUsableLengthOfIntermediateBuffer=ReduceToNearestMultipleOf(KMaximumLengthOfIntermediateBuffer, method->iNumberOfBytesPerCharacter); |
|
729 FOREVER |
|
730 { |
|
731 const TInt numberOfForeignBytesConsumedThisTime=Min(lengthOfRunToConvert, maximumUsableLengthOfIntermediateBuffer); |
|
732 intermediateBuffer=remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime); |
|
733 __ASSERT_DEBUG((numberOfForeignBytesConsumedThisTime%method->iNumberOfBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters2)); |
|
734 (*method->iConvertToIntermediateBufferInPlace)(intermediateBuffer); |
|
735 __ASSERT_DEBUG((intermediateBuffer.Length()%method->iNumberOfCoreBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters3)); |
|
736 __ASSERT_DEBUG((intermediateBuffer.Length()/method->iNumberOfCoreBytesPerCharacter)*method->iNumberOfBytesPerCharacter==numberOfForeignBytesConsumedThisTime, Panic(EPanicBadMethodData1)); |
|
737 TInt numberOfUnconvertibleCharacters; |
|
738 TInt indexOfFirstByteOfFirstUnconvertibleCharacter; |
|
739 const TInt returnValue=CCnvCharacterSetConverter::DoConvertToUnicode(*method->iConversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, intermediateBuffer, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, aOutputConversionFlags, internalInputConversionFlags); |
|
740 if (returnValue<0) |
|
741 { |
|
742 return returnValue; // this is an error-code |
|
743 } |
|
744 if (numberOfUnconvertibleCharacters>0) |
|
745 { |
|
746 if (aNumberOfUnconvertibleCharacters==0) |
|
747 { |
|
748 aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfForeignBytesConsumed+indexOfFirstByteOfFirstUnconvertibleCharacter; |
|
749 } |
|
750 aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters; |
|
751 } |
|
752 numberOfForeignBytesConsumed+=numberOfForeignBytesConsumedThisTime; |
|
753 if (returnValue>0) |
|
754 { |
|
755 __ASSERT_DEBUG((returnValue%method->iNumberOfCoreBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters4)); |
|
756 numberOfForeignBytesConsumed-=(returnValue/method->iNumberOfCoreBytesPerCharacter)*method->iNumberOfBytesPerCharacter; |
|
757 goto end; |
|
758 } |
|
759 if (numberOfForeignBytesConsumed>0) |
|
760 { |
|
761 internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable; |
|
762 } |
|
763 remainderOfForeign.Set(aForeign.Mid(numberOfForeignBytesConsumed)); |
|
764 lengthOfRunToConvert-=numberOfForeignBytesConsumedThisTime; |
|
765 __ASSERT_DEBUG(lengthOfRunToConvert>=0, Panic(EPanicBadLengthOfRunToConvert2)); |
|
766 if (lengthOfRunToConvert<=0) |
|
767 { |
|
768 break; |
|
769 } |
|
770 } |
|
771 } |
|
772 end: |
|
773 if ((numberOfForeignBytesConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable)) |
|
774 { |
|
775 return CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
776 } |
|
777 return aForeign.Length()-numberOfForeignBytesConsumed; |
|
778 } |
|
779 |
|
780 void CnvUtilities::CheckArrayOfCharacterSets(const TArray<SCharacterSet>& aArrayOfCharacterSets) |
|
781 { |
|
782 const TInt numberOfCharacterSets=aArrayOfCharacterSets.Count(); |
|
783 __ASSERT_ALWAYS(numberOfCharacterSets>0, Panic(EPanicBadNumberOfCharacterSets)); |
|
784 for (TInt i=0; i<numberOfCharacterSets; ++i) |
|
785 { |
|
786 const SCharacterSet& characterSet=aArrayOfCharacterSets[i]; |
|
787 __ASSERT_ALWAYS(characterSet.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer1)); |
|
788 __ASSERT_ALWAYS(characterSet.iConvertFromIntermediateBufferInPlace!=NULL, Panic(EPanicBadFunctionPointer1)); |
|
789 __ASSERT_ALWAYS(characterSet.iEscapeSequence!=NULL, Panic(EPanicBadEscapeSequencePointer1)); |
|
790 } |
|
791 } |
|
792 |
|
793 void CnvUtilities::CheckArrayOfStates(const TArray<SState>& aArrayOfStates) |
|
794 { |
|
795 const TInt numberOfStates=aArrayOfStates.Count(); |
|
796 __ASSERT_ALWAYS(numberOfStates>0, Panic(EPanicBadNumberOfStates)); |
|
797 for (TInt i=0; i<numberOfStates; ++i) |
|
798 { |
|
799 const SState& state=aArrayOfStates[i]; |
|
800 __ASSERT_ALWAYS(state.iEscapeSequence!=NULL, Panic(EPanicBadEscapeSequencePointer2)); |
|
801 __ASSERT_ALWAYS((*state.iEscapeSequence)[0]==KControlCharacterEscape, Panic(EPanicBadEscapeSequenceStart)); |
|
802 __ASSERT_ALWAYS(state.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer2)); |
|
803 } |
|
804 } |
|
805 |
|
806 void CnvUtilities::CheckArrayOfMethods(const TArray<SMethod>& aArrayOfMethods) |
|
807 { |
|
808 const TInt numberOfMethods=aArrayOfMethods.Count(); |
|
809 __ASSERT_ALWAYS(numberOfMethods>0, Panic(EPanicBadNumberOfMethods)); |
|
810 for (TInt i=0; i<numberOfMethods; ++i) |
|
811 { |
|
812 const SMethod& method=aArrayOfMethods[i]; |
|
813 __ASSERT_ALWAYS(method.iNumberOfBytesAbleToConvert!=NULL, Panic(EPanicBadFunctionPointer2)); |
|
814 __ASSERT_ALWAYS(method.iConvertToIntermediateBufferInPlace!=NULL, Panic(EPanicBadFunctionPointer3)); |
|
815 __ASSERT_ALWAYS(method.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer3)); |
|
816 __ASSERT_ALWAYS(method.iNumberOfBytesPerCharacter>0, Panic(EPanicBadMethodData2)); |
|
817 __ASSERT_ALWAYS(method.iNumberOfCoreBytesPerCharacter>0, Panic(EPanicBadMethodData3)); |
|
818 __ASSERT_ALWAYS(method.iNumberOfCoreBytesPerCharacter<=method.iNumberOfBytesPerCharacter, Panic(EPanicBadMethodData4)); |
|
819 } |
|
820 } |
|
821 |
|
822 TInt CnvUtilities::LengthOfUnicodeCharacter(const TDesC16& aUnicode, TInt aIndex) |
|
823 { |
|
824 const TUint unicodeCharacter=aUnicode[aIndex]; |
|
825 if ((unicodeCharacter>=0xd800) && (unicodeCharacter<=0xdbff)) // if the unicode character is the first half of a surrogate-pair... |
|
826 { |
|
827 __ASSERT_DEBUG(aIndex+1<aUnicode.Length(), Panic(EPanicBadSurrogatePair1)); |
|
828 #if defined(_DEBUG) |
|
829 const TUint secondHalfOfSurrogatePair=aUnicode[aIndex+1]; |
|
830 #endif |
|
831 __ASSERT_DEBUG((secondHalfOfSurrogatePair>=0xdc00) && (secondHalfOfSurrogatePair<=0xdfff), Panic(EPanicBadSurrogatePair2)); // this can be asserted as CCnvCharacterSetConverter::DoConvertFromUnicode should have returned an error value if this was a bad surrogate pair |
|
832 return 2; |
|
833 } |
|
834 return 1; |
|
835 } |
|
836 |
|
837 TBool CnvUtilities::NextHomogeneousForeignRun(const SCnvConversionData*& aConversionData, TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TArray<SState>& aArrayOfStates, TUint& aOutputConversionFlags) |
|
838 { |
|
839 __ASSERT_DEBUG((aRemainderOfForeign.Length()==0) || (aRemainderOfForeign[0]==KControlCharacterEscape), Panic(EPanicBadRemainderOfForeign)); |
|
840 FOREVER |
|
841 { |
|
842 if (aRemainderOfForeign.Length()==0) |
|
843 { |
|
844 return EFalse; |
|
845 } |
|
846 const TInt numberOfStates=aArrayOfStates.Count(); |
|
847 TInt i; |
|
848 for (i=0; i<numberOfStates; ++i) |
|
849 { |
|
850 const SState& state=aArrayOfStates[i]; |
|
851 if (MatchesEscapeSequence(aNumberOfForeignBytesConsumed, aHomogeneousRun, aRemainderOfForeign, *state.iEscapeSequence)) |
|
852 { |
|
853 aConversionData=state.iConversionData; |
|
854 goto foundState; |
|
855 } |
|
856 } |
|
857 for (i=0; i<numberOfStates; ++i) |
|
858 { |
|
859 if (IsStartOf(aRemainderOfForeign, *aArrayOfStates[i].iEscapeSequence)) |
|
860 { |
|
861 // aRemainderOfForeign ends with a truncated escape sequence, so ConvertToUnicode cannot convert any more |
|
862 aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated; |
|
863 return EFalse; |
|
864 } |
|
865 } |
|
866 // force ConvertToUnicode to return CCnvCharacterSetConverter::EErrorIllFormedInput |
|
867 aConversionData=NULL; |
|
868 return ETrue; |
|
869 foundState: |
|
870 if (aHomogeneousRun.Length()>0) |
|
871 { |
|
872 return ETrue; |
|
873 } |
|
874 } |
|
875 } |
|
876 |
|
877 TBool CnvUtilities::MatchesEscapeSequence(TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TDesC8& aEscapeSequence) |
|
878 { |
|
879 const TInt lengthOfEscapeSequence=aEscapeSequence.Length(); |
|
880 if (IsStartOf(aEscapeSequence, aRemainderOfForeign)) |
|
881 { |
|
882 aRemainderOfForeign.Set(aRemainderOfForeign.Mid(lengthOfEscapeSequence)); |
|
883 const TInt startOfNextEscapeSequence=aRemainderOfForeign.Locate(KControlCharacterEscape); |
|
884 if (startOfNextEscapeSequence==KErrNotFound) |
|
885 { |
|
886 aHomogeneousRun.Set(aRemainderOfForeign); |
|
887 aRemainderOfForeign.Set(NULL, 0); |
|
888 } |
|
889 else |
|
890 { |
|
891 aHomogeneousRun.Set(aRemainderOfForeign.Left(startOfNextEscapeSequence)); |
|
892 aRemainderOfForeign.Set(aRemainderOfForeign.Mid(startOfNextEscapeSequence)); |
|
893 } |
|
894 aNumberOfForeignBytesConsumed+=lengthOfEscapeSequence; |
|
895 return ETrue; |
|
896 } |
|
897 return EFalse; |
|
898 } |
|
899 |
|
900 TBool CnvUtilities::IsStartOf(const TDesC8& aStart, const TDesC8& aPotentiallyLongerDescriptor) |
|
901 { |
|
902 const TInt lengthOfStart=aStart.Length(); |
|
903 return (aPotentiallyLongerDescriptor.Length()>=lengthOfStart) && (aPotentiallyLongerDescriptor.Left(lengthOfStart)==aStart); |
|
904 } |
|
905 |