charconvfw/charconv_fw/src/convutils/convutils.cpp
changeset 0 1fb32624e06b
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 #include <e32std.h>
       
    20 #include <e32base.h>
       
    21 #include <charconv.h>
       
    22 #include <convutils.h>
       
    23                    
       
    24 const TInt KNoPreviousCharacterSet=-1;
       
    25 const TInt KDefaultCharacterSet = 0;
       
    26 const TUint KControlCharacterEscape=0x1b;
       
    27 
       
    28 #if defined(_DEBUG)
       
    29 //It will cause performance problem with small KMaximumLengthOfIntermediateBuffer.
       
    30 //Please use release version to test performance cases.
       
    31 const TInt KMaximumLengthOfIntermediateBuffer=5;
       
    32 #else
       
    33 const TInt KMaximumLengthOfIntermediateBuffer=150;
       
    34 #endif
       
    35 
       
    36 struct SCnvConversionData;
       
    37 
       
    38 _LIT(KLitPanicText, "CONVUTILS");
       
    39 
       
    40 enum TPanic
       
    41 	{
       
    42 	EPanicBadInputConversionFlags1=1,
       
    43 	EPanicBadInputConversionFlags2,
       
    44 	EPanicBadInputConversionFlags3,
       
    45 	EPanicBadNumberOfUnicodeElementsConsumed,
       
    46 	EPanicAppendFlagViolated,
       
    47 	EPanicBadNumberOfUnicodeCharactersConverted,
       
    48 	EPanicBadNumberOfCharactersThatDroppedOut,
       
    49 	EPanicLoopCounterOverRun1,
       
    50 	EPanicLoopCounterOverRun2,
       
    51 	EPanicDescriptorNotWholeNumberOfCharacters1,
       
    52 	EPanicDescriptorNotWholeNumberOfCharacters2,
       
    53 	EPanicDescriptorNotWholeNumberOfCharacters3,
       
    54 	EPanicDescriptorNotWholeNumberOfCharacters4,
       
    55 	EPanicBadStartOfNextEscapeSequence,
       
    56 	EPanicInconsistentNumberOfForeignBytesRemaining,
       
    57 	EPanicBadLengthOfRunToConvert1,
       
    58 	EPanicBadLengthOfRunToConvert2,
       
    59 	EPanicBadMethodPointer,
       
    60 	EPanicBadMethodData1,
       
    61 	EPanicBadMethodData2,
       
    62 	EPanicBadMethodData3,
       
    63 	EPanicBadMethodData4,
       
    64 	EPanicBadNumberOfCharacterSets,
       
    65 	EPanicBadConversionDataPointer1,
       
    66 	EPanicBadConversionDataPointer2,
       
    67 	EPanicBadConversionDataPointer3,
       
    68 	EPanicBadFunctionPointer1,
       
    69 	EPanicBadFunctionPointer2,
       
    70 	EPanicBadFunctionPointer3,
       
    71 	EPanicBadEscapeSequencePointer1,
       
    72 	EPanicBadEscapeSequencePointer2,
       
    73 	EPanicBadNumberOfStates,
       
    74 	EPanicBadEscapeSequenceStart,
       
    75 	EPanicBadNumberOfMethods,
       
    76 	EPanicBadSurrogatePair1,
       
    77 	EPanicBadSurrogatePair2,
       
    78 	EPanicBadRemainderOfForeign,
       
    79 	EPanicOutputDescriptorTooShortEvenToHoldEscapeSequenceToDefaultCharacterSet
       
    80 	};
       
    81 
       
    82 LOCAL_C void Panic(TPanic aPanic)
       
    83 	{
       
    84 	User::Panic(KLitPanicText, aPanic);
       
    85 	}
       
    86 
       
    87 /** Converts Unicode text into a complex foreign character set encoding. This 
       
    88 is an encoding which cannot be converted simply by calling 
       
    89 CCnvCharacterSetConverter::DoConvertFromUnicode(). It may be modal (e.g. JIS) 
       
    90 or non-modal (e.g. Shift-JIS).
       
    91 
       
    92 The Unicode text specified in aUnicode is converted using the array of 
       
    93 conversion data objects (aArrayOfCharacterSets) provided by the plug-in for 
       
    94 the complex character set encoding, and the converted text is returned in 
       
    95 aForeign. Any existing contents in aForeign are overwritten.
       
    96 
       
    97 Unlike CCnvCharacterSetConverter::DoConvertFromUnicode(), multiple character 
       
    98 sets can be specified. aUnicode is converted using the first character conversion 
       
    99 data object in the array. When a character is found which cannot be converted 
       
   100 using that data, each character set in the array is tried in turn. If it cannot 
       
   101 be converted using any object in the array, the index of the character is 
       
   102 appended to aIndicesOfUnconvertibleCharacters and the character is replaced 
       
   103 by aReplacementForUnconvertibleUnicodeCharacters.
       
   104 
       
   105 If it can be converted using another object in the array, that object is used 
       
   106 to convert all subsequent characters until another unconvertible character 
       
   107 is found.
       
   108 
       
   109 @param aDefaultEndiannessOfForeignCharacters The default endian-ness to use 
       
   110 when writing the characters in the foreign character set. If an endian-ness 
       
   111 for foreign characters is specified in the current conversion data object, 
       
   112 then that is used instead and the value of 
       
   113 aDefaultEndiannessOfForeignCharacters is ignored. 
       
   114 @param aReplacementForUnconvertibleUnicodeCharacters The single character (one 
       
   115 or more byte values) which is used to replace unconvertible characters. 
       
   116 @param aForeign On return, contains the converted text in the non-Unicode 
       
   117 character set. 
       
   118 @param aUnicode The source Unicode text to be converted. 
       
   119 @param aIndicesOfUnconvertibleCharacters On return, holds an ascending array 
       
   120 of the indices of each Unicode character in the source text which could not 
       
   121 be converted (because none of the target character sets have an equivalent 
       
   122 character). 
       
   123 @param aArrayOfCharacterSets Array of character conversion data objects, 
       
   124 representing the character sets which comprise a complex character set 
       
   125 encoding. These are used in sequence to convert the Unicode text. There must 
       
   126 be at least one character set in this array and no character set may have any 
       
   127 NULL member data, or a panic occurs. 
       
   128 @return The number of unconverted characters left at the end of the input 
       
   129 descriptor (e.g. because aForeign was not long enough to hold all the text), 
       
   130 or a negative error value, as defined in CCnvCharacterSetConverter::TError. */
       
   131 EXPORT_C TInt CnvUtilities::ConvertFromUnicode(
       
   132 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   133 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
   134 		TDes8& aForeign, 
       
   135 		const TDesC16& aUnicode, 
       
   136 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, 
       
   137 		const TArray<SCharacterSet>& aArrayOfCharacterSets)
       
   138 	{
       
   139 	TUint notUsed;
       
   140 	return ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, 
       
   141 								aReplacementForUnconvertibleUnicodeCharacters, 
       
   142 								aForeign, 
       
   143 								aUnicode, 
       
   144 								aIndicesOfUnconvertibleCharacters, 
       
   145 								aArrayOfCharacterSets, 
       
   146 								notUsed, 
       
   147 								0);
       
   148 	}
       
   149 
       
   150 /** Converts Unicode text into a complex foreign character set encoding. This is 
       
   151 an encoding which cannot be converted simply by a call to 
       
   152 CCnvCharacterSetConverter::DoConvertFromUnicode(). It may be modal (e.g. JIS) 
       
   153 or non-modal (e.g. Shift-JIS).
       
   154 
       
   155 The Unicode text specified in aUnicode is converted using the array of conversion 
       
   156 data objects (aArrayOfCharacterSets) provided by the plug-in for the complex 
       
   157 character set encoding and the converted text is returned in aForeign. The 
       
   158 function can either append to aForeign or overwrite its contents (if any).
       
   159 
       
   160 Unlike CCnvCharacterSetConverter::DoConvertFromUnicode(), multiple character 
       
   161 sets can be specified. aUnicode is converted using the first character conversion 
       
   162 data object in the array. When a character is found which cannot be converted 
       
   163 using that data, each character set in the array is tried in turn. If it cannot 
       
   164 be converted using any object in the array, the index of the character is 
       
   165 appended to aIndicesOfUnconvertibleCharacters and the character is replaced 
       
   166 by aReplacementForUnconvertibleUnicodeCharacters.
       
   167 
       
   168 If it can be converted using another object in the array, that object is used 
       
   169 to convert all subsequent characters until another unconvertible character 
       
   170 is found.
       
   171 
       
   172 @param aDefaultEndiannessOfForeignCharacters The default endian-ness to use 
       
   173 when writing the characters in the foreign character set. If an endian-ness 
       
   174 for foreign characters is specified in the current conversion data object, 
       
   175 then that is used instead and the value of 
       
   176 aDefaultEndiannessOfForeignCharacters is ignored. 
       
   177 @param aReplacementForUnconvertibleUnicodeCharacters The single character (one 
       
   178 or more byte values) which is used to replace unconvertible characters. 
       
   179 @param aForeign On return, contains the converted text in the non-Unicode 
       
   180 character set. This may already contain some text. If it does, and if 
       
   181 aInputConversionFlags specifies EInputConversionFlagAppend, then the converted 
       
   182 text is appended to this descriptor. 
       
   183 @param aUnicode The source Unicode text to be converted. 
       
   184 @param aIndicesOfUnconvertibleCharacters On return, holds an ascending array 
       
   185 of the indices of each Unicode character in the source text which could not 
       
   186 be converted (because none of the target character sets have an equivalent 
       
   187 character). 
       
   188 @param aArrayOfCharacterSets Array of character set data objects. These are 
       
   189 used in sequence to convert the Unicode text. There must be at least one 
       
   190 character set in this array and no character set may have any NULL member 
       
   191 data, or a panic occurs. 
       
   192 @param aOutputConversionFlags If the input descriptor ended in a truncated 
       
   193 sequence, e.g. the first half only of a Unicode surrogate pair, this returns 
       
   194 with the EOutputConversionFlagInputIsTruncated flag set. 
       
   195 @param aInputConversionFlags Specify 
       
   196 CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to 
       
   197 aForeign. Specify CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable 
       
   198 to prevent the function from returning the error-code EErrorIllFormedInput 
       
   199 when the input descriptor consists of nothing but a truncated sequence. The 
       
   200 CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter 
       
   201 flag must not be set, otherwise a panic occurs. 
       
   202 @return The number of unconverted characters left at the end of the input descriptor 
       
   203 (e.g. because aForeign was not long enough to hold all the text), or a negative 
       
   204 error value, as defined in CCnvCharacterSetConverter::TError. */
       
   205 EXPORT_C TInt CnvUtilities::ConvertFromUnicode(
       
   206 				CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   207 				const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
   208 				TDes8& aForeign, 
       
   209 				const TDesC16& aUnicode, 
       
   210 				CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, 
       
   211 				const TArray<SCharacterSet>& aArrayOfCharacterSets, 
       
   212 				TUint& aOutputConversionFlags, 
       
   213 				TUint aInputConversionFlags)
       
   214 	{
       
   215 	__ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags1));
       
   216 	CheckArrayOfCharacterSets(aArrayOfCharacterSets);
       
   217 	aOutputConversionFlags=0;
       
   218 	TUint internalInputConversionFlags=aInputConversionFlags;
       
   219 	if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend)
       
   220 		{
       
   221 		aForeign.SetLength(0);
       
   222 		internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend;
       
   223 		}
       
   224 	if (aUnicode.Length()==0)
       
   225 		{
       
   226 		return 0;
       
   227 		}
       
   228 	if (aForeign.MaxLength()==aForeign.Length()) // relies on the fact that aForeign's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set
       
   229 		{
       
   230 		return aUnicode.Length();
       
   231 		}
       
   232 	TDes8* foreign=&aForeign;
       
   233 	TPtr8 dummyForeign(NULL, 0, 0);
       
   234 	if (aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagMustEndInDefaultCharacterSet)
       
   235 		{
       
   236 		TInt dummyMaximumLength =
       
   237 			aForeign.MaxLength() - aArrayOfCharacterSets[KDefaultCharacterSet].iEscapeSequence->Length();
       
   238 		__ASSERT_ALWAYS(dummyMaximumLength >= 0, 
       
   239 			Panic(EPanicOutputDescriptorTooShortEvenToHoldEscapeSequenceToDefaultCharacterSet));
       
   240 		dummyForeign.Set(const_cast <TUint8*> (aForeign.Ptr()),
       
   241 						 aForeign.Length(), 
       
   242 						 dummyMaximumLength);
       
   243 		foreign=&dummyForeign;
       
   244 		}
       
   245 	const TInt numberOfCharacterSets=aArrayOfCharacterSets.Count();
       
   246 	TInt numberOfUnicodeElementsConsumed=0;
       
   247 	internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter; // this is not just an optimization - it ensures that "foreign" doesn't get filled up too much each time CCnvCharacterSetConverter::DoConvertFromUnicode is called
       
   248     TInt previousCharacterSet = aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAssumeStartInDefaultCharacterSet?
       
   249         KDefaultCharacterSet : KNoPreviousCharacterSet;
       
   250 	FOREVER
       
   251 		{
       
   252 		for (TInt presentCharacterSet=KDefaultCharacterSet;;)
       
   253 			{
       
   254 			__ASSERT_DEBUG(numberOfUnicodeElementsConsumed<=aUnicode.Length(), Panic(EPanicBadNumberOfUnicodeElementsConsumed));
       
   255 			if (numberOfUnicodeElementsConsumed>=aUnicode.Length())
       
   256 				{
       
   257 				goto end;
       
   258 				}
       
   259 			const SCharacterSet& characterSet=aArrayOfCharacterSets[presentCharacterSet];
       
   260 			const TInt oldNumberOfBytesInForeign=foreign->Length();
       
   261 			if (numberOfUnicodeElementsConsumed>0)
       
   262 				{
       
   263 				internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
       
   264 				}
       
   265 			CCnvCharacterSetConverter::TArrayOfAscendingIndices indicesOfUnconvertibleCharacters;
       
   266 			const TInt returnValue=CCnvCharacterSetConverter::DoConvertFromUnicode(*characterSet.iConversionData, aDefaultEndiannessOfForeignCharacters, KNullDesC8, *foreign, aUnicode.Mid(numberOfUnicodeElementsConsumed), indicesOfUnconvertibleCharacters, aOutputConversionFlags, internalInputConversionFlags);
       
   267 			if (returnValue<0)
       
   268 				{
       
   269 				return returnValue; // this is an error-code
       
   270 				}
       
   271 			__ASSERT_DEBUG(foreign->Length()>=oldNumberOfBytesInForeign, Panic(EPanicAppendFlagViolated));
       
   272 			TInt indexOfFirstUnconvertibleCharacter;
       
   273 			if (indicesOfUnconvertibleCharacters.NumberOfIndices()==0)
       
   274 				{
       
   275 				indexOfFirstUnconvertibleCharacter=-1;
       
   276 				numberOfUnicodeElementsConsumed=aUnicode.Length()-returnValue;
       
   277 				}
       
   278 			else
       
   279 				{
       
   280 				indexOfFirstUnconvertibleCharacter=indicesOfUnconvertibleCharacters[0];
       
   281 				numberOfUnicodeElementsConsumed+=indexOfFirstUnconvertibleCharacter;
       
   282 				__ASSERT_DEBUG(numberOfUnicodeElementsConsumed+LengthOfUnicodeCharacter(aUnicode, numberOfUnicodeElementsConsumed)==aUnicode.Length()-returnValue, Panic(EPanicBadNumberOfUnicodeCharactersConverted));
       
   283 				}
       
   284 			if (indexOfFirstUnconvertibleCharacter!=0) // if at least one Unicode character at the start of CCnvCharacterSetConverter::DoConvertFromUnicode's input descriptor was convertible...
       
   285 				{
       
   286 				TBool gotoEnd = EFalse;
       
   287 				if (foreign->Length()>oldNumberOfBytesInForeign)
       
   288 					{
       
   289 					TInt numberOfCharactersThatDroppedOut=0;
       
   290                      // Insert an escape sequence if this character set is different from the last one.
       
   291                      if (presentCharacterSet  != previousCharacterSet)
       
   292                         {
       
   293                         // Insert escape sequence (if requred) in front of the last encoded run of text.
       
   294                         // Note that this may cause some characters to drop out at the end.
       
   295                         (*characterSet.iConvertFromIntermediateBufferInPlace)(oldNumberOfBytesInForeign, *foreign, numberOfCharactersThatDroppedOut);
       
   296                         if (oldNumberOfBytesInForeign < foreign->Length())
       
   297                 			previousCharacterSet = presentCharacterSet;
       
   298                         }
       
   299 					numberOfUnicodeElementsConsumed-=numberOfCharactersThatDroppedOut;
       
   300 					if (numberOfCharactersThatDroppedOut>0 )// if "foreign" has been filled to as much as it will hold...
       
   301 						{
       
   302 						gotoEnd = ETrue;
       
   303 						}
       
   304 					}
       
   305 				if (indexOfFirstUnconvertibleCharacter<0) // if we've successfully converted up to the end of aUnicode (using *characterSet.iConversionData)...
       
   306 					{
       
   307 					gotoEnd = ETrue;
       
   308 					}
       
   309 				if (gotoEnd)
       
   310 					{
       
   311 					if ( aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagMustEndInDefaultCharacterSet
       
   312 					    && previousCharacterSet != KDefaultCharacterSet
       
   313 					    && previousCharacterSet != KNoPreviousCharacterSet)
       
   314 					    {
       
   315 					    aForeign.SetLength(foreign->Length());
       
   316     				    aForeign.Append(*aArrayOfCharacterSets[KDefaultCharacterSet].iEscapeSequence);
       
   317     				    foreign=NULL;
       
   318 					    }
       
   319 					goto end;
       
   320 					}
       
   321 				break;
       
   322 				}
       
   323 			__ASSERT_DEBUG(presentCharacterSet<numberOfCharacterSets, Panic(EPanicLoopCounterOverRun1));
       
   324 			++presentCharacterSet;
       
   325 			if (presentCharacterSet>=numberOfCharacterSets)
       
   326 				{
       
   327 				if ((foreign->MaxLength()-foreign->Length()<aReplacementForUnconvertibleUnicodeCharacters.Length()) ||
       
   328 					(aIndicesOfUnconvertibleCharacters.AppendIndex(numberOfUnicodeElementsConsumed)!=CCnvCharacterSetConverter::TArrayOfAscendingIndices::EAppendSuccessful)) // the tests must be done in this order as AppendIndex must only be called if there is room for aReplacementForUnconvertibleUnicodeCharacters
       
   329 					{
       
   330 					goto end;
       
   331 					}
       
   332 				numberOfUnicodeElementsConsumed+=LengthOfUnicodeCharacter(aUnicode, numberOfUnicodeElementsConsumed);
       
   333 				foreign->Append(aReplacementForUnconvertibleUnicodeCharacters);
       
   334 				break;
       
   335 				}
       
   336 			}
       
   337 		}
       
   338 end:
       
   339 	if (foreign!=NULL)
       
   340 		{
       
   341 		aForeign.SetLength(foreign->Length());
       
   342 		foreign=NULL;
       
   343 		}
       
   344 	if ((numberOfUnicodeElementsConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable))
       
   345 		{
       
   346 		return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   347 		}
       
   348 	return aUnicode.Length()-numberOfUnicodeElementsConsumed;
       
   349 	}
       
   350 
       
   351 
       
   352 /** Inserts an escape sequence into the descriptor.
       
   353 
       
   354 This function is provided to help in the implementation of
       
   355 ConvertFromUnicode() for modal character set encodings.
       
   356 Each SCharacterSet object in the array passed to
       
   357 ConvertFromUnicode() must have its
       
   358 iConvertFromIntermediateBufferInPlace member assigned. To
       
   359 do this for a modal character set encoding, implement a function whose
       
   360 signature matches that of FConvertFromIntermediateBufferInPlace 
       
   361 and which calls this function, passing all arguments unchanged, and 
       
   362 specifying the character set's escape sequence and the number of bytes per 
       
   363 character.
       
   364 
       
   365 @param aStartPositionInDescriptor The byte position in aDescriptor at which 
       
   366 the escape sequence is inserted. If the character set uses more than one byte 
       
   367 per character, this position must be the start of a character, otherwise a 
       
   368 panic occurs. 
       
   369 @param aDescriptor The descriptor into which the escape sequence is inserted. 
       
   370 @param aNumberOfCharactersThatDroppedOut The escape sequence is inserted into 
       
   371 the start of aDescriptor and any characters that need to drop out to make 
       
   372 room for the escape sequence (because the descriptor's maximum length was 
       
   373 not long enough) drop out from the end of the buffer. This parameter indicates 
       
   374 the number of characters that needed to drop out.
       
   375 @param aEscapeSequence The escape sequence for the character set. 
       
   376 @param aNumberOfBytesPerCharacter The number of bytes per character. */
       
   377 EXPORT_C void CnvUtilities::ConvertFromIntermediateBufferInPlace(
       
   378 					TInt aStartPositionInDescriptor, 
       
   379 					TDes8& aDescriptor, 
       
   380 					TInt& aNumberOfCharactersThatDroppedOut, 
       
   381 					const TDesC8& aEscapeSequence, 
       
   382 					TInt aNumberOfBytesPerCharacter)
       
   383 	{
       
   384 	const TInt lengthOfDescriptor=aDescriptor.Length();
       
   385 	__ASSERT_ALWAYS((lengthOfDescriptor-aStartPositionInDescriptor)%aNumberOfBytesPerCharacter==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters1));
       
   386 	aNumberOfCharactersThatDroppedOut=(Max(0, aEscapeSequence.Length()-(aDescriptor.MaxLength()-lengthOfDescriptor))+(aNumberOfBytesPerCharacter-1))/aNumberOfBytesPerCharacter;
       
   387 	const TInt lengthOfRunInCharacters=(lengthOfDescriptor-aStartPositionInDescriptor)/aNumberOfBytesPerCharacter;
       
   388 	if (aNumberOfCharactersThatDroppedOut>=lengthOfRunInCharacters) // ">=" is correct (rather than ">") as if there's only room for the escape sequence we don't want to have it in the descriptor
       
   389 		{
       
   390 		aNumberOfCharactersThatDroppedOut=lengthOfRunInCharacters;
       
   391 		aDescriptor.SetLength(aStartPositionInDescriptor);
       
   392 		}
       
   393 	else
       
   394 		{
       
   395 		aDescriptor.SetLength(lengthOfDescriptor-(aNumberOfCharactersThatDroppedOut*aNumberOfBytesPerCharacter));
       
   396 		aDescriptor.Insert(aStartPositionInDescriptor, aEscapeSequence);
       
   397 		}
       
   398 	}
       
   399 
       
   400 
       
   401 /**  Converts text from a modal foreign character set encoding into Unicode.
       
   402 
       
   403 The non-Unicode text specified in aForeign is converted using 
       
   404 the array of character set conversion objects (aArrayOfStates) 
       
   405 provided by the plug-in, and the converted text is returned in 
       
   406 aUnicode. The function can either append to aUnicode 
       
   407 or overwrite its contents (if any), depending on the input conversion flags 
       
   408 specified. The first element in aArrayOfStates is taken to be 
       
   409 the default mode (i.e. the mode to assume by default if there is no preceding 
       
   410 escape sequence).
       
   411  
       
   412 @param aDefaultEndiannessOfForeignCharacters The default endian-ness of the 
       
   413 foreign characters. If an endian-ness for foreign characters is specified 
       
   414 in the conversion data, then that is used instead and the value of 
       
   415 aDefaultEndiannessOfForeignCharacters is ignored. 
       
   416 @param aUnicode On return, contains the text converted into Unicode. 
       
   417 @param aForeign The non-Unicode source text to be converted. 
       
   418 @param aState Used to store a modal character set encoding's current mode across 
       
   419 multiple calls to ConvertToUnicode() on the same input descriptor. This argument 
       
   420 should be passed the same object as passed to the plug-in's ConvertToUnicode() 
       
   421 exported function.
       
   422 @param aNumberOfUnconvertibleCharacters On return, contains the number of 
       
   423 characters in aForeign which were not converted. Characters which cannot be 
       
   424 converted are output as Unicode replacement characters (0xfffd). 
       
   425 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
       
   426 of the first byte of the first unconvertible character. For instance if the 
       
   427 first character in the input descriptor (aForeign) could not be converted, 
       
   428 then this parameter is set to the first byte of that character, i.e. zero. 
       
   429 A negative value is returned if all the characters were converted. 
       
   430 @param aArrayOfStates Array of character set conversion data objects, and their 
       
   431 escape sequences ("modes"). There must be one or more modes in this array, 
       
   432 none of the modes can have any NULL member data, and each mode's escape sequence 
       
   433 must begin with KControlCharacterEscape (0x1b) or a panic occurs. 
       
   434 @return The number of unconverted bytes left at the end of the input descriptor, 
       
   435 or a negative error value, as defined in TError. */
       
   436 EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromModalForeign(
       
   437 					CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   438 					TDes16& aUnicode, 
       
   439 					const TDesC8& aForeign, 
       
   440 					TInt& aState, 
       
   441 					TInt& aNumberOfUnconvertibleCharacters, 
       
   442 					TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
       
   443 					const TArray<SState>& aArrayOfStates)
       
   444 	{
       
   445 	TUint notUsed;
       
   446 	return ConvertToUnicodeFromModalForeign(aDefaultEndiannessOfForeignCharacters, 
       
   447 											aUnicode, 
       
   448 											aForeign, 
       
   449 											aState, 
       
   450 											aNumberOfUnconvertibleCharacters, 
       
   451 											aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
       
   452 											aArrayOfStates, 
       
   453 											notUsed, 
       
   454 											0);
       
   455 	}
       
   456 
       
   457 /** @param aDefaultEndiannessOfForeignCharacters The default endian-ness for 
       
   458 the foreign characters. If an endian-ness for foreign characters is specified 
       
   459 in the conversion data, then that is used instead and the value of 
       
   460 aDefaultEndiannessOfForeignCharacters is ignored. 
       
   461 @param aUnicode On return, contains the text converted into Unicode. 
       
   462 @param aForeign The non-Unicode source text to be converted. 
       
   463 @param aState Used to store a modal character set encoding's current mode 
       
   464 across multiple calls to ConvertToUnicode() on the same input descriptor. This 
       
   465 argument should be passed the same object as passed to the plug-in's 
       
   466 ConvertToUnicode() exported function. 
       
   467 @param aNumberOfUnconvertibleCharacters On return, contains the number of 
       
   468 characters in aForeign which were not converted. Characters which cannot be 
       
   469 converted are output as Unicode replacement characters (0xfffd). 
       
   470 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
       
   471 of the first byte of the first unconvertible character. For instance if the 
       
   472 first character in the input descriptor (aForeign) could not be converted, 
       
   473 then this parameter is set to the first byte of that character, i.e. zero. 
       
   474 A negative value is returned if all the characters were converted. 
       
   475 @param aArrayOfStates Array of character set conversion data objects, and their 
       
   476 escape sequences. There must be one or more modes in this array, none of the 
       
   477 modes can have any NULL member data, and each mode's escape sequence must 
       
   478 begin with KControlCharacterEscape (0x1b) or a panic occurs. 
       
   479 @param aOutputConversionFlags If the input descriptor ended in a truncated 
       
   480 sequence, e.g. a part of a multi-byte character, aOutputConversionFlags 
       
   481 returns with the EOutputConversionFlagInputIsTruncated flag set. 
       
   482 @param aInputConversionFlags Specify 
       
   483 CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to 
       
   484 aUnicode. Specify EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable 
       
   485 to prevent the function from returning the error-code EErrorIllFormedInput 
       
   486 when the input descriptor consists of nothing but a truncated sequence. The 
       
   487 CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter 
       
   488 flag must not be set, otherwise a panic occurs. 
       
   489 @return The number of unconverted bytes left at the end of the input descriptor, 
       
   490 or a negative error value, as defined in TError. */
       
   491 EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromModalForeign(
       
   492 								CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   493 								TDes16& aUnicode, 
       
   494 								const TDesC8& aForeign, 
       
   495 								TInt& aState, 
       
   496 								TInt& aNumberOfUnconvertibleCharacters, 
       
   497 								TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
       
   498 								const TArray<SState>& aArrayOfStates, 
       
   499 								TUint& aOutputConversionFlags, 
       
   500 								TUint aInputConversionFlags)
       
   501 	{
       
   502 	__ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags2));
       
   503 	CheckArrayOfStates(aArrayOfStates);
       
   504 	aNumberOfUnconvertibleCharacters=0;
       
   505 	aIndexOfFirstByteOfFirstUnconvertibleCharacter=-1;
       
   506 	aOutputConversionFlags=0;
       
   507 	TUint internalInputConversionFlags=aInputConversionFlags;
       
   508 	if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend)
       
   509 		{
       
   510 		aUnicode.SetLength(0);
       
   511 		internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend;
       
   512 		}
       
   513 	if (aForeign.Length()==0)
       
   514 		{
       
   515 		return 0;
       
   516 		}
       
   517 	if (aUnicode.MaxLength()==aUnicode.Length()) // relies on the fact that aUnicode's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set
       
   518 		{
       
   519 		return aForeign.Length();
       
   520 		}
       
   521 	TPtrC8 remainderOfForeign(aForeign);
       
   522 	TPtrC8 homogeneousRun;
       
   523 	TInt numberOfForeignBytesConsumed=0;
       
   524 	const SCnvConversionData* conversionData = NULL;
       
   525 	const TInt startOfNextEscapeSequence=aForeign.Locate(KControlCharacterEscape);
       
   526 	if (startOfNextEscapeSequence!=0) // if aForeign doesn't start with an escape sequence...
       
   527 		{
       
   528 		conversionData=(aState!=CCnvCharacterSetConverter::KStateDefault)? REINTERPRET_CAST(const SCnvConversionData*, aState): aArrayOfStates[0].iConversionData;
       
   529 		if (startOfNextEscapeSequence==KErrNotFound)
       
   530 			{
       
   531 			homogeneousRun.Set(remainderOfForeign);
       
   532 			remainderOfForeign.Set(NULL, 0);
       
   533 			}
       
   534 		else
       
   535 			{
       
   536 			__ASSERT_DEBUG(startOfNextEscapeSequence>0, Panic(EPanicBadStartOfNextEscapeSequence));
       
   537 			homogeneousRun.Set(remainderOfForeign.Left(startOfNextEscapeSequence));
       
   538 			remainderOfForeign.Set(remainderOfForeign.Mid(startOfNextEscapeSequence));
       
   539 			}
       
   540 		goto handleHomogeneousRun;
       
   541 		}
       
   542 	FOREVER
       
   543 		{
       
   544 		if (!NextHomogeneousForeignRun(conversionData, numberOfForeignBytesConsumed, homogeneousRun, remainderOfForeign, aArrayOfStates, aOutputConversionFlags))
       
   545 			{
       
   546 			goto end;
       
   547 			}
       
   548 handleHomogeneousRun:
       
   549 		if (conversionData==NULL)
       
   550 			{
       
   551 			return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   552 			}
       
   553 		TInt numberOfUnconvertibleCharacters;
       
   554 		TInt indexOfFirstByteOfFirstUnconvertibleCharacter;
       
   555 		const TInt returnValue=CCnvCharacterSetConverter::DoConvertToUnicode(*conversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, homogeneousRun, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, aOutputConversionFlags, internalInputConversionFlags);
       
   556 		if (returnValue<0)
       
   557 			{
       
   558 			return returnValue; // this is an error-code
       
   559 			}
       
   560 		if (numberOfUnconvertibleCharacters>0)
       
   561 			{
       
   562 			if (aNumberOfUnconvertibleCharacters==0)
       
   563 				{
       
   564 				aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfForeignBytesConsumed+indexOfFirstByteOfFirstUnconvertibleCharacter;
       
   565 				}
       
   566 			aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters;
       
   567 			}
       
   568 		numberOfForeignBytesConsumed+=homogeneousRun.Length();
       
   569 		if (returnValue>0)
       
   570 			{
       
   571 			numberOfForeignBytesConsumed-=returnValue;
       
   572 			goto end;
       
   573 			}
       
   574 		if (numberOfForeignBytesConsumed>0)
       
   575 			{
       
   576 			internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
       
   577 			}
       
   578 		__ASSERT_DEBUG(remainderOfForeign==aForeign.Mid(numberOfForeignBytesConsumed), Panic(EPanicInconsistentNumberOfForeignBytesRemaining));
       
   579 		}
       
   580 end:
       
   581 	if ((numberOfForeignBytesConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable))
       
   582 		{
       
   583 		return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   584 		}
       
   585 	aState=REINTERPRET_CAST(TInt, conversionData);
       
   586 	return aForeign.Length()-numberOfForeignBytesConsumed;
       
   587 	}
       
   588 
       
   589 
       
   590 /**  Converts text from a non-modal complex character set encoding (e.g. 
       
   591 Shift-JIS or EUC-JP) into Unicode.The non-Unicode text specified in
       
   592 aForeign is converted using the array of character set
       
   593 conversion methods (aArrayOfMethods) provided by the
       
   594 plug-in, and the converted text is returned in aUnicode.
       
   595 Overwrites the contents, if any, of aUnicode.
       
   596 
       
   597 @param aDefaultEndiannessOfForeignCharacters The default endian-ness of the 
       
   598 foreign characters. If an endian-ness for foreign characters is specified 
       
   599 in the conversion data, then that is used instead and the value of 
       
   600 aDefaultEndiannessOfForeignCharacters is ignored. 
       
   601 @param aUnicode On return, contains the text converted into Unicode. 
       
   602 @param aForeign The non-Unicode source text to be converted. 
       
   603 @param aNumberOfUnconvertibleCharacters On return, contains the number of 
       
   604 characters in aForeign which were not converted. Characters which cannot be 
       
   605 converted are output as Unicode replacement characters (0xfffd). 
       
   606 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
       
   607 of the first byte of the first unconvertible character. For instance if the 
       
   608 first character in the input descriptor (aForeign) could not be converted, 
       
   609 then this parameter is set to the first byte of that character, i.e. zero. 
       
   610 A negative value is returned if all the characters were converted. 
       
   611 @param aArrayOfMethods Array of conversion methods. There must be one or more 
       
   612 methods in this array and none of the methods in the array can have any NULL 
       
   613 member data or a panic occurs. 
       
   614 @return The number of unconverted bytes left at the end of the input descriptor, 
       
   615 or a negative error value, as defined in TError. */
       
   616 EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign(
       
   617 					CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   618 					TDes16& aUnicode, 
       
   619 					const TDesC8& aForeign, 
       
   620 					TInt& aNumberOfUnconvertibleCharacters, 
       
   621 					TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
       
   622 					const TArray<SMethod>& aArrayOfMethods)
       
   623 	{
       
   624 	TUint notUsed;
       
   625 	return ConvertToUnicodeFromHeterogeneousForeign(
       
   626 				aDefaultEndiannessOfForeignCharacters, 
       
   627 				aUnicode, 
       
   628 				aForeign, 
       
   629 				aNumberOfUnconvertibleCharacters, 
       
   630 				aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
       
   631 				aArrayOfMethods, 
       
   632 				notUsed, 
       
   633 				0);
       
   634 	}
       
   635 
       
   636 /** @param aDefaultEndiannessOfForeignCharacters The default endian-ness for the 
       
   637 foreign characters. If an endian-ness for foreign characters is specified 
       
   638 in the conversion data, then that is used instead and the value of 
       
   639 aDefaultEndiannessOfForeignCharacters is ignored. 
       
   640 @param aUnicode On return, contains the text converted into Unicode. 
       
   641 @param aForeign The non-Unicode source text to be converted. 
       
   642 @param aNumberOfUnconvertibleCharacters On return, contains the number of 
       
   643 characters in aForeign which were not converted. Characters which cannot be 
       
   644 converted are output as Unicode replacement characters (0xfffd). 
       
   645 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
       
   646 of the first byte of the first unconvertible character. For instance if the 
       
   647 first character in the input descriptor (aForeign) could not be converted, 
       
   648 then this parameter is set to the first byte of that character, i.e. zero. 
       
   649 A negative value is returned if all the characters were converted. 
       
   650 @param aArrayOfMethods Array of conversion methods. There must be one or more 
       
   651 methods in this array and none of the methods in the array can have any NULL 
       
   652 member data or a panic occurs. 
       
   653 @param aOutputConversionFlags If the input descriptor ended in a truncated 
       
   654 sequence, e.g. a part of a multi-byte character, aOutputConversionFlags 
       
   655 returns with the EOutputConversionFlagInputIsTruncated flag set. 
       
   656 @param aInputConversionFlags Specify 
       
   657 CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to 
       
   658 aUnicode. Specify EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable 
       
   659 to prevent the function from returning the error-code EErrorIllFormedInput 
       
   660 when the input descriptor consists of nothing but a truncated sequence. The 
       
   661 CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter 
       
   662 flag must not be set, otherwise a panic occurs. 
       
   663 @return The number of unconverted bytes left at the end of the input descriptor, 
       
   664 or a negative error value, as defined in TError. */
       
   665 EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign(
       
   666 						CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   667 						TDes16& aUnicode, 
       
   668 						const TDesC8& aForeign, 
       
   669 						TInt& aNumberOfUnconvertibleCharacters, 
       
   670 						TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
       
   671 						const TArray<SMethod>& aArrayOfMethods, 
       
   672 						TUint& aOutputConversionFlags, 
       
   673 						TUint aInputConversionFlags)
       
   674 	{
       
   675 	__ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags3));
       
   676 	CheckArrayOfMethods(aArrayOfMethods);
       
   677 	aNumberOfUnconvertibleCharacters=0;
       
   678 	aIndexOfFirstByteOfFirstUnconvertibleCharacter=-1;
       
   679 	aOutputConversionFlags=0;
       
   680 	TUint internalInputConversionFlags=aInputConversionFlags;
       
   681 	if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend)
       
   682 		{
       
   683 		aUnicode.SetLength(0);
       
   684 		internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend;
       
   685 		}
       
   686 	if (aForeign.Length()==0)
       
   687 		{
       
   688 		return 0;
       
   689 		}
       
   690 	if (aUnicode.MaxLength()==aUnicode.Length()) // relies on the fact that aUnicode's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set
       
   691 		{
       
   692 		return aForeign.Length();
       
   693 		}
       
   694 	const TInt numberOfMethods=aArrayOfMethods.Count();
       
   695 	TPtrC8 remainderOfForeign(aForeign);
       
   696 	TInt numberOfForeignBytesConsumed=0;
       
   697 	FOREVER
       
   698 		{
       
   699 		TInt lengthOfRunToConvert=0;
       
   700 		const SMethod* method=NULL;
       
   701 		for (TInt i=0;;)
       
   702 			{
       
   703 			method=&aArrayOfMethods[i];
       
   704 			__ASSERT_DEBUG(method!=NULL, Panic(EPanicBadMethodPointer));
       
   705 			lengthOfRunToConvert=(*method->iNumberOfBytesAbleToConvert)(remainderOfForeign);
       
   706 			if (lengthOfRunToConvert<0)
       
   707 				{
       
   708 				return lengthOfRunToConvert; // this is an error-code
       
   709 				}
       
   710 			if (lengthOfRunToConvert>0)
       
   711 				{
       
   712 				break;
       
   713 				}
       
   714 			__ASSERT_DEBUG(i<numberOfMethods, Panic(EPanicLoopCounterOverRun2));
       
   715 			++i;
       
   716 			if (i>=numberOfMethods)
       
   717 				{
       
   718 				aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
       
   719 				goto end;
       
   720 				}
       
   721 			}
       
   722 		TBuf8<KMaximumLengthOfIntermediateBuffer> intermediateBuffer;
       
   723 		const TInt maximumUsableLengthOfIntermediateBuffer=ReduceToNearestMultipleOf(KMaximumLengthOfIntermediateBuffer, method->iNumberOfBytesPerCharacter);
       
   724 		FOREVER
       
   725 			{
       
   726 			const TInt numberOfForeignBytesConsumedThisTime=Min(lengthOfRunToConvert, maximumUsableLengthOfIntermediateBuffer);
       
   727 			intermediateBuffer=remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime);
       
   728 			__ASSERT_DEBUG((numberOfForeignBytesConsumedThisTime%method->iNumberOfBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters2));
       
   729 			(*method->iConvertToIntermediateBufferInPlace)(intermediateBuffer);
       
   730 			__ASSERT_DEBUG((intermediateBuffer.Length()%method->iNumberOfCoreBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters3));
       
   731 			__ASSERT_DEBUG((intermediateBuffer.Length()/method->iNumberOfCoreBytesPerCharacter)*method->iNumberOfBytesPerCharacter==numberOfForeignBytesConsumedThisTime, Panic(EPanicBadMethodData1));
       
   732 			TInt numberOfUnconvertibleCharacters;
       
   733 			TInt indexOfFirstByteOfFirstUnconvertibleCharacter;
       
   734 			const TInt returnValue=CCnvCharacterSetConverter::DoConvertToUnicode(*method->iConversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, intermediateBuffer, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, aOutputConversionFlags, internalInputConversionFlags);
       
   735 			if (returnValue<0)
       
   736 				{
       
   737 				return returnValue; // this is an error-code
       
   738 				}
       
   739 			if (numberOfUnconvertibleCharacters>0)
       
   740 				{
       
   741 				if (aNumberOfUnconvertibleCharacters==0)
       
   742 					{
       
   743 					aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfForeignBytesConsumed+indexOfFirstByteOfFirstUnconvertibleCharacter;
       
   744 					}
       
   745 				aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters;
       
   746 				}
       
   747 			numberOfForeignBytesConsumed+=numberOfForeignBytesConsumedThisTime;
       
   748 			if (returnValue>0)
       
   749 				{
       
   750 				__ASSERT_DEBUG((returnValue%method->iNumberOfCoreBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters4));
       
   751 				numberOfForeignBytesConsumed-=(returnValue/method->iNumberOfCoreBytesPerCharacter)*method->iNumberOfBytesPerCharacter;
       
   752 				goto end;
       
   753 				}
       
   754 			if (numberOfForeignBytesConsumed>0)
       
   755 				{
       
   756 				internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
       
   757 				}
       
   758 			remainderOfForeign.Set(aForeign.Mid(numberOfForeignBytesConsumed));
       
   759 			lengthOfRunToConvert-=numberOfForeignBytesConsumedThisTime;
       
   760 			__ASSERT_DEBUG(lengthOfRunToConvert>=0, Panic(EPanicBadLengthOfRunToConvert2));
       
   761 			if (lengthOfRunToConvert<=0)
       
   762 				{
       
   763 				break;
       
   764 				}
       
   765 			}
       
   766 		}
       
   767 end:
       
   768 	if ((numberOfForeignBytesConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable))
       
   769 		{
       
   770 		return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   771 		}
       
   772 	return aForeign.Length()-numberOfForeignBytesConsumed;
       
   773 	}
       
   774 
       
   775 void CnvUtilities::CheckArrayOfCharacterSets(const TArray<SCharacterSet>& aArrayOfCharacterSets)
       
   776 	{
       
   777 	const TInt numberOfCharacterSets=aArrayOfCharacterSets.Count();
       
   778 	__ASSERT_ALWAYS(numberOfCharacterSets>0, Panic(EPanicBadNumberOfCharacterSets));
       
   779 	for (TInt i=0; i<numberOfCharacterSets; ++i)
       
   780 		{
       
   781 		const SCharacterSet& characterSet=aArrayOfCharacterSets[i];
       
   782 		__ASSERT_ALWAYS(characterSet.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer1));
       
   783 		__ASSERT_ALWAYS(characterSet.iConvertFromIntermediateBufferInPlace!=NULL, Panic(EPanicBadFunctionPointer1));
       
   784 		__ASSERT_ALWAYS(characterSet.iEscapeSequence!=NULL, Panic(EPanicBadEscapeSequencePointer1));
       
   785 		}
       
   786 	}
       
   787 
       
   788 void CnvUtilities::CheckArrayOfStates(const TArray<SState>& aArrayOfStates)
       
   789 	{
       
   790 	const TInt numberOfStates=aArrayOfStates.Count();
       
   791 	__ASSERT_ALWAYS(numberOfStates>0, Panic(EPanicBadNumberOfStates));
       
   792 	for (TInt i=0; i<numberOfStates; ++i)
       
   793 		{
       
   794 		const SState& state=aArrayOfStates[i];
       
   795 		__ASSERT_ALWAYS(state.iEscapeSequence!=NULL, Panic(EPanicBadEscapeSequencePointer2));
       
   796 		__ASSERT_ALWAYS((*state.iEscapeSequence)[0]==KControlCharacterEscape, Panic(EPanicBadEscapeSequenceStart));
       
   797 		__ASSERT_ALWAYS(state.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer2));
       
   798 		}
       
   799 	}
       
   800 
       
   801 void CnvUtilities::CheckArrayOfMethods(const TArray<SMethod>& aArrayOfMethods)
       
   802 	{
       
   803 	const TInt numberOfMethods=aArrayOfMethods.Count();
       
   804 	__ASSERT_ALWAYS(numberOfMethods>0, Panic(EPanicBadNumberOfMethods));
       
   805 	for (TInt i=0; i<numberOfMethods; ++i)
       
   806 		{
       
   807 		const SMethod& method=aArrayOfMethods[i];
       
   808 		__ASSERT_ALWAYS(method.iNumberOfBytesAbleToConvert!=NULL, Panic(EPanicBadFunctionPointer2));
       
   809 		__ASSERT_ALWAYS(method.iConvertToIntermediateBufferInPlace!=NULL, Panic(EPanicBadFunctionPointer3));
       
   810 		__ASSERT_ALWAYS(method.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer3));
       
   811 		__ASSERT_ALWAYS(method.iNumberOfBytesPerCharacter>0, Panic(EPanicBadMethodData2));
       
   812 		__ASSERT_ALWAYS(method.iNumberOfCoreBytesPerCharacter>0, Panic(EPanicBadMethodData3));
       
   813 		__ASSERT_ALWAYS(method.iNumberOfCoreBytesPerCharacter<=method.iNumberOfBytesPerCharacter, Panic(EPanicBadMethodData4));
       
   814 		}
       
   815 	}
       
   816 
       
   817 TInt CnvUtilities::LengthOfUnicodeCharacter(const TDesC16& aUnicode, TInt aIndex)
       
   818 	{
       
   819 	const TUint unicodeCharacter=aUnicode[aIndex];
       
   820 	if ((unicodeCharacter>=0xd800) && (unicodeCharacter<=0xdbff)) // if the unicode character is the first half of a surrogate-pair...
       
   821 		{
       
   822 		__ASSERT_DEBUG(aIndex+1<aUnicode.Length(), Panic(EPanicBadSurrogatePair1));
       
   823 #if defined(_DEBUG)
       
   824 		const TUint secondHalfOfSurrogatePair=aUnicode[aIndex+1];
       
   825 #endif
       
   826 		__ASSERT_DEBUG((secondHalfOfSurrogatePair>=0xdc00) && (secondHalfOfSurrogatePair<=0xdfff), Panic(EPanicBadSurrogatePair2)); // this can be asserted as CCnvCharacterSetConverter::DoConvertFromUnicode should have returned an error value if this was a bad surrogate pair
       
   827 		return 2;
       
   828 		}
       
   829 	return 1;
       
   830 	}
       
   831 
       
   832 TBool CnvUtilities::NextHomogeneousForeignRun(const SCnvConversionData*& aConversionData, TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TArray<SState>& aArrayOfStates, TUint& aOutputConversionFlags)
       
   833 	{
       
   834 	__ASSERT_DEBUG((aRemainderOfForeign.Length()==0) || (aRemainderOfForeign[0]==KControlCharacterEscape), Panic(EPanicBadRemainderOfForeign));
       
   835 	FOREVER
       
   836 		{
       
   837 		if (aRemainderOfForeign.Length()==0)
       
   838 			{
       
   839 			return EFalse;
       
   840 			}
       
   841 		const TInt numberOfStates=aArrayOfStates.Count();
       
   842 		TInt i;
       
   843 		for (i=0; i<numberOfStates; ++i)
       
   844 			{
       
   845 			const SState& state=aArrayOfStates[i];
       
   846 			if (MatchesEscapeSequence(aNumberOfForeignBytesConsumed, aHomogeneousRun, aRemainderOfForeign, *state.iEscapeSequence))
       
   847 				{
       
   848 				aConversionData=state.iConversionData;
       
   849 				goto foundState;
       
   850 				}
       
   851 			}
       
   852 		for (i=0; i<numberOfStates; ++i)
       
   853 			{
       
   854 			if (IsStartOf(aRemainderOfForeign, *aArrayOfStates[i].iEscapeSequence))
       
   855 				{
       
   856 				// aRemainderOfForeign ends with a truncated escape sequence, so ConvertToUnicode cannot convert any more
       
   857 				aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
       
   858 				return EFalse;
       
   859 				}
       
   860 			}
       
   861 		// force ConvertToUnicode to return CCnvCharacterSetConverter::EErrorIllFormedInput
       
   862 		aConversionData=NULL;
       
   863 		return ETrue;
       
   864 foundState:
       
   865 		if (aHomogeneousRun.Length()>0)
       
   866 			{
       
   867 			return ETrue;
       
   868 			}
       
   869 		}
       
   870 	}
       
   871 
       
   872 TBool CnvUtilities::MatchesEscapeSequence(TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TDesC8& aEscapeSequence)
       
   873 	{
       
   874 	const TInt lengthOfEscapeSequence=aEscapeSequence.Length();
       
   875 	if (IsStartOf(aEscapeSequence, aRemainderOfForeign))
       
   876 		{
       
   877 		aRemainderOfForeign.Set(aRemainderOfForeign.Mid(lengthOfEscapeSequence));
       
   878 		const TInt startOfNextEscapeSequence=aRemainderOfForeign.Locate(KControlCharacterEscape);
       
   879 		if (startOfNextEscapeSequence==KErrNotFound)
       
   880 			{
       
   881 			aHomogeneousRun.Set(aRemainderOfForeign);
       
   882 			aRemainderOfForeign.Set(NULL, 0);
       
   883 			}
       
   884 		else
       
   885 			{
       
   886 			aHomogeneousRun.Set(aRemainderOfForeign.Left(startOfNextEscapeSequence));
       
   887 			aRemainderOfForeign.Set(aRemainderOfForeign.Mid(startOfNextEscapeSequence));
       
   888 			}
       
   889 		aNumberOfForeignBytesConsumed+=lengthOfEscapeSequence;
       
   890 		return ETrue;
       
   891 		}
       
   892 	return EFalse;
       
   893 	}
       
   894 
       
   895 TBool CnvUtilities::IsStartOf(const TDesC8& aStart, const TDesC8& aPotentiallyLongerDescriptor)
       
   896 	{
       
   897 	const TInt lengthOfStart=aStart.Length();
       
   898 	return (aPotentiallyLongerDescriptor.Length()>=lengthOfStart) && (aPotentiallyLongerDescriptor.Left(lengthOfStart)==aStart);
       
   899 	}
       
   900