charconvfw/Charconv/ongoing/Source/utils/CONVUTILS.CPP
changeset 0 1fb32624e06b
child 16 56cd22a7a1cb
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 2003-2005 Nokia Corporation and/or its subsidiary(-ies). 
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description:      
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 
       
    20 
       
    21 
       
    22 
       
    23 
       
    24 
       
    25 #include <e32std.h>
       
    26 #include <e32base.h>
       
    27 #include <charconv.h>
       
    28 #include <convutils.h>
       
    29                    
       
    30 const TInt KNoPreviousCharacterSet=-1;
       
    31 const TInt KDefaultCharacterSet = 0;
       
    32 const TUint KControlCharacterEscape=0x1b;
       
    33 
       
    34 #if defined(_DEBUG)
       
    35 const TInt KMaximumLengthOfIntermediateBuffer=5;
       
    36 #else
       
    37 const TInt KMaximumLengthOfIntermediateBuffer=150;
       
    38 #endif
       
    39 
       
    40 struct SCnvConversionData;
       
    41 
       
    42 _LIT(KLitPanicText, "CONVUTILS");
       
    43 
       
    44 enum TPanic
       
    45 	{
       
    46 	EPanicBadInputConversionFlags1=1,
       
    47 	EPanicBadInputConversionFlags2,
       
    48 	EPanicBadInputConversionFlags3,
       
    49 	EPanicBadNumberOfUnicodeElementsConsumed,
       
    50 	EPanicAppendFlagViolated,
       
    51 	EPanicBadNumberOfUnicodeCharactersConverted,
       
    52 	EPanicBadNumberOfCharactersThatDroppedOut,
       
    53 	EPanicLoopCounterOverRun1,
       
    54 	EPanicLoopCounterOverRun2,
       
    55 	EPanicDescriptorNotWholeNumberOfCharacters1,
       
    56 	EPanicDescriptorNotWholeNumberOfCharacters2,
       
    57 	EPanicDescriptorNotWholeNumberOfCharacters3,
       
    58 	EPanicDescriptorNotWholeNumberOfCharacters4,
       
    59 	EPanicBadStartOfNextEscapeSequence,
       
    60 	EPanicInconsistentNumberOfForeignBytesRemaining,
       
    61 	EPanicBadLengthOfRunToConvert1,
       
    62 	EPanicBadLengthOfRunToConvert2,
       
    63 	EPanicBadMethodPointer,
       
    64 	EPanicBadMethodData1,
       
    65 	EPanicBadMethodData2,
       
    66 	EPanicBadMethodData3,
       
    67 	EPanicBadMethodData4,
       
    68 	EPanicBadNumberOfCharacterSets,
       
    69 	EPanicBadConversionDataPointer1,
       
    70 	EPanicBadConversionDataPointer2,
       
    71 	EPanicBadConversionDataPointer3,
       
    72 	EPanicBadFunctionPointer1,
       
    73 	EPanicBadFunctionPointer2,
       
    74 	EPanicBadFunctionPointer3,
       
    75 	EPanicBadEscapeSequencePointer1,
       
    76 	EPanicBadEscapeSequencePointer2,
       
    77 	EPanicBadNumberOfStates,
       
    78 	EPanicBadEscapeSequenceStart,
       
    79 	EPanicBadNumberOfMethods,
       
    80 	EPanicBadSurrogatePair1,
       
    81 	EPanicBadSurrogatePair2,
       
    82 	EPanicBadRemainderOfForeign,
       
    83 	EPanicOutputDescriptorTooShortEvenToHoldEscapeSequenceToDefaultCharacterSet
       
    84 	};
       
    85 
       
    86 LOCAL_C void Panic(TPanic aPanic)
       
    87 	{
       
    88 	User::Panic(KLitPanicText, aPanic);
       
    89 	}
       
    90 
       
    91 /** Converts Unicode text into a complex foreign character set encoding. This 
       
    92 is an encoding which cannot be converted simply by calling 
       
    93 CCnvCharacterSetConverter::DoConvertFromUnicode(). It may be modal (e.g. JIS) 
       
    94 or non-modal (e.g. Shift-JIS).
       
    95 
       
    96 The Unicode text specified in aUnicode is converted using the array of 
       
    97 conversion data objects (aArrayOfCharacterSets) provided by the plug-in for 
       
    98 the complex character set encoding, and the converted text is returned in 
       
    99 aForeign. Any existing contents in aForeign are overwritten.
       
   100 
       
   101 Unlike CCnvCharacterSetConverter::DoConvertFromUnicode(), multiple character 
       
   102 sets can be specified. aUnicode is converted using the first character conversion 
       
   103 data object in the array. When a character is found which cannot be converted 
       
   104 using that data, each character set in the array is tried in turn. If it cannot 
       
   105 be converted using any object in the array, the index of the character is 
       
   106 appended to aIndicesOfUnconvertibleCharacters and the character is replaced 
       
   107 by aReplacementForUnconvertibleUnicodeCharacters.
       
   108 
       
   109 If it can be converted using another object in the array, that object is used 
       
   110 to convert all subsequent characters until another unconvertible character 
       
   111 is found.
       
   112 
       
   113 @param aDefaultEndiannessOfForeignCharacters The default endian-ness to use 
       
   114 when writing the characters in the foreign character set. If an endian-ness 
       
   115 for foreign characters is specified in the current conversion data object, 
       
   116 then that is used instead and the value of 
       
   117 aDefaultEndiannessOfForeignCharacters is ignored. 
       
   118 @param aReplacementForUnconvertibleUnicodeCharacters The single character (one 
       
   119 or more byte values) which is used to replace unconvertible characters. 
       
   120 @param aForeign On return, contains the converted text in the non-Unicode 
       
   121 character set. 
       
   122 @param aUnicode The source Unicode text to be converted. 
       
   123 @param aIndicesOfUnconvertibleCharacters On return, holds an ascending array 
       
   124 of the indices of each Unicode character in the source text which could not 
       
   125 be converted (because none of the target character sets have an equivalent 
       
   126 character). 
       
   127 @param aArrayOfCharacterSets Array of character conversion data objects, 
       
   128 representing the character sets which comprise a complex character set 
       
   129 encoding. These are used in sequence to convert the Unicode text. There must 
       
   130 be at least one character set in this array and no character set may have any 
       
   131 NULL member data, or a panic occurs. 
       
   132 @return The number of unconverted characters left at the end of the input 
       
   133 descriptor (e.g. because aForeign was not long enough to hold all the text), 
       
   134 or a negative error value, as defined in CCnvCharacterSetConverter::TError. */
       
   135 EXPORT_C TInt CnvUtilities::ConvertFromUnicode(
       
   136 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   137 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
   138 		TDes8& aForeign, 
       
   139 		const TDesC16& aUnicode, 
       
   140 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, 
       
   141 		const TArray<SCharacterSet>& aArrayOfCharacterSets)
       
   142 	{
       
   143 	TUint notUsed;
       
   144 	return ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, 
       
   145 								aReplacementForUnconvertibleUnicodeCharacters, 
       
   146 								aForeign, 
       
   147 								aUnicode, 
       
   148 								aIndicesOfUnconvertibleCharacters, 
       
   149 								aArrayOfCharacterSets, 
       
   150 								notUsed, 
       
   151 								0);
       
   152 	}
       
   153 
       
   154 /** Converts Unicode text into a complex foreign character set encoding. This is 
       
   155 an encoding which cannot be converted simply by a call to 
       
   156 CCnvCharacterSetConverter::DoConvertFromUnicode(). It may be modal (e.g. JIS) 
       
   157 or non-modal (e.g. Shift-JIS).
       
   158 
       
   159 The Unicode text specified in aUnicode is converted using the array of conversion 
       
   160 data objects (aArrayOfCharacterSets) provided by the plug-in for the complex 
       
   161 character set encoding and the converted text is returned in aForeign. The 
       
   162 function can either append to aForeign or overwrite its contents (if any).
       
   163 
       
   164 Unlike CCnvCharacterSetConverter::DoConvertFromUnicode(), multiple character 
       
   165 sets can be specified. aUnicode is converted using the first character conversion 
       
   166 data object in the array. When a character is found which cannot be converted 
       
   167 using that data, each character set in the array is tried in turn. If it cannot 
       
   168 be converted using any object in the array, the index of the character is 
       
   169 appended to aIndicesOfUnconvertibleCharacters and the character is replaced 
       
   170 by aReplacementForUnconvertibleUnicodeCharacters.
       
   171 
       
   172 If it can be converted using another object in the array, that object is used 
       
   173 to convert all subsequent characters until another unconvertible character 
       
   174 is found.
       
   175 
       
   176 @param aDefaultEndiannessOfForeignCharacters The default endian-ness to use 
       
   177 when writing the characters in the foreign character set. If an endian-ness 
       
   178 for foreign characters is specified in the current conversion data object, 
       
   179 then that is used instead and the value of 
       
   180 aDefaultEndiannessOfForeignCharacters is ignored. 
       
   181 @param aReplacementForUnconvertibleUnicodeCharacters The single character (one 
       
   182 or more byte values) which is used to replace unconvertible characters. 
       
   183 @param aForeign On return, contains the converted text in the non-Unicode 
       
   184 character set. This may already contain some text. If it does, and if 
       
   185 aInputConversionFlags specifies EInputConversionFlagAppend, then the converted 
       
   186 text is appended to this descriptor. 
       
   187 @param aUnicode The source Unicode text to be converted. 
       
   188 @param aIndicesOfUnconvertibleCharacters On return, holds an ascending array 
       
   189 of the indices of each Unicode character in the source text which could not 
       
   190 be converted (because none of the target character sets have an equivalent 
       
   191 character). 
       
   192 @param aArrayOfCharacterSets Array of character set data objects. These are 
       
   193 used in sequence to convert the Unicode text. There must be at least one 
       
   194 character set in this array and no character set may have any NULL member 
       
   195 data, or a panic occurs. 
       
   196 @param aOutputConversionFlags If the input descriptor ended in a truncated 
       
   197 sequence, e.g. the first half only of a Unicode surrogate pair, this returns 
       
   198 with the EOutputConversionFlagInputIsTruncated flag set. 
       
   199 @param aInputConversionFlags Specify 
       
   200 CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to 
       
   201 aForeign. Specify CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable 
       
   202 to prevent the function from returning the error-code EErrorIllFormedInput 
       
   203 when the input descriptor consists of nothing but a truncated sequence. The 
       
   204 CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter 
       
   205 flag must not be set, otherwise a panic occurs. 
       
   206 @return The number of unconverted characters left at the end of the input descriptor 
       
   207 (e.g. because aForeign was not long enough to hold all the text), or a negative 
       
   208 error value, as defined in CCnvCharacterSetConverter::TError. */
       
   209 EXPORT_C TInt CnvUtilities::ConvertFromUnicode(
       
   210 				CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   211 				const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
   212 				TDes8& aForeign, 
       
   213 				const TDesC16& aUnicode, 
       
   214 				CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, 
       
   215 				const TArray<SCharacterSet>& aArrayOfCharacterSets, 
       
   216 				TUint& aOutputConversionFlags, 
       
   217 				TUint aInputConversionFlags)
       
   218 	{
       
   219 	__ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags1));
       
   220 	CheckArrayOfCharacterSets(aArrayOfCharacterSets);
       
   221 	aOutputConversionFlags=0;
       
   222 	TUint internalInputConversionFlags=aInputConversionFlags;
       
   223 	if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend)
       
   224 		{
       
   225 		aForeign.SetLength(0);
       
   226 		internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend;
       
   227 		}
       
   228 	if (aUnicode.Length()==0)
       
   229 		{
       
   230 		return 0;
       
   231 		}
       
   232 	if (aForeign.MaxLength()==aForeign.Length()) // relies on the fact that aForeign's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set
       
   233 		{
       
   234 		return aUnicode.Length();
       
   235 		}
       
   236 	TDes8* foreign=&aForeign;
       
   237 	TPtr8 dummyForeign(NULL, 0, 0);
       
   238 	if (aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagMustEndInDefaultCharacterSet)
       
   239 		{
       
   240 		TInt dummyMaximumLength =
       
   241 			aForeign.MaxLength() - aArrayOfCharacterSets[KDefaultCharacterSet].iEscapeSequence->Length();
       
   242 		__ASSERT_ALWAYS(dummyMaximumLength >= 0, 
       
   243 			Panic(EPanicOutputDescriptorTooShortEvenToHoldEscapeSequenceToDefaultCharacterSet));
       
   244 		dummyForeign.Set(const_cast <TUint8*> (aForeign.Ptr()),
       
   245 						 aForeign.Length(), 
       
   246 						 dummyMaximumLength);
       
   247 		foreign=&dummyForeign;
       
   248 		}
       
   249 	const TInt numberOfCharacterSets=aArrayOfCharacterSets.Count();
       
   250 	TInt numberOfUnicodeElementsConsumed=0;
       
   251 	internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter; // this is not just an optimization - it ensures that "foreign" doesn't get filled up too much each time CCnvCharacterSetConverter::DoConvertFromUnicode is called
       
   252     TInt previousCharacterSet = aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAssumeStartInDefaultCharacterSet?
       
   253         KDefaultCharacterSet : KNoPreviousCharacterSet;
       
   254 	FOREVER
       
   255 		{
       
   256 		for (TInt presentCharacterSet=KDefaultCharacterSet;;)
       
   257 			{
       
   258 			__ASSERT_DEBUG(numberOfUnicodeElementsConsumed<=aUnicode.Length(), Panic(EPanicBadNumberOfUnicodeElementsConsumed));
       
   259 			if (numberOfUnicodeElementsConsumed>=aUnicode.Length())
       
   260 				{
       
   261 				goto end;
       
   262 				}
       
   263 			const SCharacterSet& characterSet=aArrayOfCharacterSets[presentCharacterSet];
       
   264 			const TInt oldNumberOfBytesInForeign=foreign->Length();
       
   265 			if (numberOfUnicodeElementsConsumed>0)
       
   266 				{
       
   267 				internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
       
   268 				}
       
   269 			CCnvCharacterSetConverter::TArrayOfAscendingIndices indicesOfUnconvertibleCharacters;
       
   270 			const TInt returnValue=CCnvCharacterSetConverter::DoConvertFromUnicode(*characterSet.iConversionData, aDefaultEndiannessOfForeignCharacters, KNullDesC8, *foreign, aUnicode.Mid(numberOfUnicodeElementsConsumed), indicesOfUnconvertibleCharacters, aOutputConversionFlags, internalInputConversionFlags);
       
   271 			if (returnValue<0)
       
   272 				{
       
   273 				return returnValue; // this is an error-code
       
   274 				}
       
   275 			__ASSERT_DEBUG(foreign->Length()>=oldNumberOfBytesInForeign, Panic(EPanicAppendFlagViolated));
       
   276 			TInt indexOfFirstUnconvertibleCharacter;
       
   277 			if (indicesOfUnconvertibleCharacters.NumberOfIndices()==0)
       
   278 				{
       
   279 				indexOfFirstUnconvertibleCharacter=-1;
       
   280 				numberOfUnicodeElementsConsumed=aUnicode.Length()-returnValue;
       
   281 				}
       
   282 			else
       
   283 				{
       
   284 				indexOfFirstUnconvertibleCharacter=indicesOfUnconvertibleCharacters[0];
       
   285 				numberOfUnicodeElementsConsumed+=indexOfFirstUnconvertibleCharacter;
       
   286 				__ASSERT_DEBUG(numberOfUnicodeElementsConsumed+LengthOfUnicodeCharacter(aUnicode, numberOfUnicodeElementsConsumed)==aUnicode.Length()-returnValue, Panic(EPanicBadNumberOfUnicodeCharactersConverted));
       
   287 				}
       
   288 			if (indexOfFirstUnconvertibleCharacter!=0) // if at least one Unicode character at the start of CCnvCharacterSetConverter::DoConvertFromUnicode's input descriptor was convertible...
       
   289 				{
       
   290 				TBool gotoEnd = EFalse;
       
   291 				if (foreign->Length()>oldNumberOfBytesInForeign)
       
   292 					{
       
   293 					TInt numberOfCharactersThatDroppedOut=0;
       
   294                      // Insert an escape sequence if this character set is different from the last one.
       
   295                      if (presentCharacterSet  != previousCharacterSet)
       
   296                         {
       
   297                         // Insert escape sequence (if requred) in front of the last encoded run of text.
       
   298                         // Note that this may cause some characters to drop out at the end.
       
   299                         (*characterSet.iConvertFromIntermediateBufferInPlace)(oldNumberOfBytesInForeign, *foreign, numberOfCharactersThatDroppedOut);
       
   300                         if (oldNumberOfBytesInForeign < foreign->Length())
       
   301                 			previousCharacterSet = presentCharacterSet;
       
   302                         }
       
   303 					numberOfUnicodeElementsConsumed-=numberOfCharactersThatDroppedOut;
       
   304 					if (numberOfCharactersThatDroppedOut>0 )// if "foreign" has been filled to as much as it will hold...
       
   305 						{
       
   306 						gotoEnd = ETrue;
       
   307 						}
       
   308 					}
       
   309 				if (indexOfFirstUnconvertibleCharacter<0) // if we've successfully converted up to the end of aUnicode (using *characterSet.iConversionData)...
       
   310 					{
       
   311 					gotoEnd = ETrue;
       
   312 					}
       
   313 				if (gotoEnd)
       
   314 					{
       
   315 					if ( aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagMustEndInDefaultCharacterSet
       
   316 					    && previousCharacterSet != KDefaultCharacterSet
       
   317 					    && previousCharacterSet != KNoPreviousCharacterSet)
       
   318 					    {
       
   319 					    aForeign.SetLength(foreign->Length());
       
   320     				    aForeign.Append(*aArrayOfCharacterSets[KDefaultCharacterSet].iEscapeSequence);
       
   321     				    foreign=NULL;
       
   322 					    }
       
   323 					goto end;
       
   324 					}
       
   325 				break;
       
   326 				}
       
   327 			__ASSERT_DEBUG(presentCharacterSet<numberOfCharacterSets, Panic(EPanicLoopCounterOverRun1));
       
   328 			++presentCharacterSet;
       
   329 			if (presentCharacterSet>=numberOfCharacterSets)
       
   330 				{
       
   331 				if ((foreign->MaxLength()-foreign->Length()<aReplacementForUnconvertibleUnicodeCharacters.Length()) ||
       
   332 					(aIndicesOfUnconvertibleCharacters.AppendIndex(numberOfUnicodeElementsConsumed)!=CCnvCharacterSetConverter::TArrayOfAscendingIndices::EAppendSuccessful)) // the tests must be done in this order as AppendIndex must only be called if there is room for aReplacementForUnconvertibleUnicodeCharacters
       
   333 					{
       
   334 					goto end;
       
   335 					}
       
   336 				numberOfUnicodeElementsConsumed+=LengthOfUnicodeCharacter(aUnicode, numberOfUnicodeElementsConsumed);
       
   337 				foreign->Append(aReplacementForUnconvertibleUnicodeCharacters);
       
   338 				break;
       
   339 				}
       
   340 			}
       
   341 		}
       
   342 end:
       
   343 	if (foreign!=NULL)
       
   344 		{
       
   345 		aForeign.SetLength(foreign->Length());
       
   346 		foreign=NULL;
       
   347 		}
       
   348 	if ((numberOfUnicodeElementsConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable))
       
   349 		{
       
   350 		return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   351 		}
       
   352 	return aUnicode.Length()-numberOfUnicodeElementsConsumed;
       
   353 	}
       
   354 
       
   355 
       
   356 /** Inserts an escape sequence into the descriptor.
       
   357 
       
   358 This function is provided to help in the implementation of
       
   359 ConvertFromUnicode() for modal character set encodings.
       
   360 Each SCharacterSet object in the array passed to
       
   361 ConvertFromUnicode() must have its
       
   362 iConvertFromIntermediateBufferInPlace member assigned. To
       
   363 do this for a modal character set encoding, implement a function whose
       
   364 signature matches that of FConvertFromIntermediateBufferInPlace 
       
   365 and which calls this function, passing all arguments unchanged, and 
       
   366 specifying the character set's escape sequence and the number of bytes per 
       
   367 character.
       
   368 
       
   369 @param aStartPositionInDescriptor The byte position in aDescriptor at which 
       
   370 the escape sequence is inserted. If the character set uses more than one byte 
       
   371 per character, this position must be the start of a character, otherwise a 
       
   372 panic occurs. 
       
   373 @param aDescriptor The descriptor into which the escape sequence is inserted. 
       
   374 @param aNumberOfCharactersThatDroppedOut The escape sequence is inserted into 
       
   375 the start of aDescriptor and any characters that need to drop out to make 
       
   376 room for the escape sequence (because the descriptor's maximum length was 
       
   377 not long enough) drop out from the end of the buffer. This parameter indicates 
       
   378 the number of characters that needed to drop out.
       
   379 @param aEscapeSequence The escape sequence for the character set. 
       
   380 @param aNumberOfBytesPerCharacter The number of bytes per character. */
       
   381 EXPORT_C void CnvUtilities::ConvertFromIntermediateBufferInPlace(
       
   382 					TInt aStartPositionInDescriptor, 
       
   383 					TDes8& aDescriptor, 
       
   384 					TInt& aNumberOfCharactersThatDroppedOut, 
       
   385 					const TDesC8& aEscapeSequence, 
       
   386 					TInt aNumberOfBytesPerCharacter)
       
   387 	{
       
   388 	const TInt lengthOfDescriptor=aDescriptor.Length();
       
   389 	__ASSERT_ALWAYS((lengthOfDescriptor-aStartPositionInDescriptor)%aNumberOfBytesPerCharacter==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters1));
       
   390 	aNumberOfCharactersThatDroppedOut=(Max(0, aEscapeSequence.Length()-(aDescriptor.MaxLength()-lengthOfDescriptor))+(aNumberOfBytesPerCharacter-1))/aNumberOfBytesPerCharacter;
       
   391 	const TInt lengthOfRunInCharacters=(lengthOfDescriptor-aStartPositionInDescriptor)/aNumberOfBytesPerCharacter;
       
   392 	if (aNumberOfCharactersThatDroppedOut>=lengthOfRunInCharacters) // ">=" is correct (rather than ">") as if there's only room for the escape sequence we don't want to have it in the descriptor
       
   393 		{
       
   394 		aNumberOfCharactersThatDroppedOut=lengthOfRunInCharacters;
       
   395 		aDescriptor.SetLength(aStartPositionInDescriptor);
       
   396 		}
       
   397 	else
       
   398 		{
       
   399 		aDescriptor.SetLength(lengthOfDescriptor-(aNumberOfCharactersThatDroppedOut*aNumberOfBytesPerCharacter));
       
   400 		aDescriptor.Insert(aStartPositionInDescriptor, aEscapeSequence);
       
   401 		}
       
   402 	}
       
   403 
       
   404 
       
   405 /**  Converts text from a modal foreign character set encoding into Unicode.
       
   406 
       
   407 The non-Unicode text specified in aForeign is converted using 
       
   408 the array of character set conversion objects (aArrayOfStates) 
       
   409 provided by the plug-in, and the converted text is returned in 
       
   410 aUnicode. The function can either append to aUnicode 
       
   411 or overwrite its contents (if any), depending on the input conversion flags 
       
   412 specified. The first element in aArrayOfStates is taken to be 
       
   413 the default mode (i.e. the mode to assume by default if there is no preceding 
       
   414 escape sequence).
       
   415  
       
   416 @param aDefaultEndiannessOfForeignCharacters The default endian-ness of the 
       
   417 foreign characters. If an endian-ness for foreign characters is specified 
       
   418 in the conversion data, then that is used instead and the value of 
       
   419 aDefaultEndiannessOfForeignCharacters is ignored. 
       
   420 @param aUnicode On return, contains the text converted into Unicode. 
       
   421 @param aForeign The non-Unicode source text to be converted. 
       
   422 @param aState Used to store a modal character set encoding's current mode across 
       
   423 multiple calls to ConvertToUnicode() on the same input descriptor. This argument 
       
   424 should be passed the same object as passed to the plug-in's ConvertToUnicode() 
       
   425 exported function.
       
   426 @param aNumberOfUnconvertibleCharacters On return, contains the number of 
       
   427 characters in aForeign which were not converted. Characters which cannot be 
       
   428 converted are output as Unicode replacement characters (0xfffd). 
       
   429 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
       
   430 of the first byte of the first unconvertible character. For instance if the 
       
   431 first character in the input descriptor (aForeign) could not be converted, 
       
   432 then this parameter is set to the first byte of that character, i.e. zero. 
       
   433 A negative value is returned if all the characters were converted. 
       
   434 @param aArrayOfStates Array of character set conversion data objects, and their 
       
   435 escape sequences ("modes"). There must be one or more modes in this array, 
       
   436 none of the modes can have any NULL member data, and each mode's escape sequence 
       
   437 must begin with KControlCharacterEscape (0x1b) or a panic occurs. 
       
   438 @return The number of unconverted bytes left at the end of the input descriptor, 
       
   439 or a negative error value, as defined in TError. */
       
   440 EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromModalForeign(
       
   441 					CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   442 					TDes16& aUnicode, 
       
   443 					const TDesC8& aForeign, 
       
   444 					TInt& aState, 
       
   445 					TInt& aNumberOfUnconvertibleCharacters, 
       
   446 					TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
       
   447 					const TArray<SState>& aArrayOfStates)
       
   448 	{
       
   449 	TUint notUsed;
       
   450 	return ConvertToUnicodeFromModalForeign(aDefaultEndiannessOfForeignCharacters, 
       
   451 											aUnicode, 
       
   452 											aForeign, 
       
   453 											aState, 
       
   454 											aNumberOfUnconvertibleCharacters, 
       
   455 											aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
       
   456 											aArrayOfStates, 
       
   457 											notUsed, 
       
   458 											0);
       
   459 	}
       
   460 
       
   461 /** @param aDefaultEndiannessOfForeignCharacters The default endian-ness for 
       
   462 the foreign characters. If an endian-ness for foreign characters is specified 
       
   463 in the conversion data, then that is used instead and the value of 
       
   464 aDefaultEndiannessOfForeignCharacters is ignored. 
       
   465 @param aUnicode On return, contains the text converted into Unicode. 
       
   466 @param aForeign The non-Unicode source text to be converted. 
       
   467 @param aState Used to store a modal character set encoding's current mode 
       
   468 across multiple calls to ConvertToUnicode() on the same input descriptor. This 
       
   469 argument should be passed the same object as passed to the plug-in's 
       
   470 ConvertToUnicode() exported function. 
       
   471 @param aNumberOfUnconvertibleCharacters On return, contains the number of 
       
   472 characters in aForeign which were not converted. Characters which cannot be 
       
   473 converted are output as Unicode replacement characters (0xfffd). 
       
   474 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
       
   475 of the first byte of the first unconvertible character. For instance if the 
       
   476 first character in the input descriptor (aForeign) could not be converted, 
       
   477 then this parameter is set to the first byte of that character, i.e. zero. 
       
   478 A negative value is returned if all the characters were converted. 
       
   479 @param aArrayOfStates Array of character set conversion data objects, and their 
       
   480 escape sequences. There must be one or more modes in this array, none of the 
       
   481 modes can have any NULL member data, and each mode's escape sequence must 
       
   482 begin with KControlCharacterEscape (0x1b) or a panic occurs. 
       
   483 @param aOutputConversionFlags If the input descriptor ended in a truncated 
       
   484 sequence, e.g. an incomplete multi-byte character, aOutputConversionFlags 
       
   485 returns with the EOutputConversionFlagInputIsTruncated flag set. 
       
   486 @param aInputConversionFlags Specify 
       
   487 CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to 
       
   488 aUnicode. Specify EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable 
       
   489 to prevent the function from returning the error-code EErrorIllFormedInput 
       
   490 when the input descriptor consists of nothing but a truncated sequence. The 
       
   491 CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter 
       
   492 flag must not be set, otherwise a panic occurs. 
       
   493 @return The number of unconverted bytes left at the end of the input descriptor, 
       
   494 or a negative error value, as defined in TError. */
       
   495 EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromModalForeign(
       
   496 								CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   497 								TDes16& aUnicode, 
       
   498 								const TDesC8& aForeign, 
       
   499 								TInt& aState, 
       
   500 								TInt& aNumberOfUnconvertibleCharacters, 
       
   501 								TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
       
   502 								const TArray<SState>& aArrayOfStates, 
       
   503 								TUint& aOutputConversionFlags, 
       
   504 								TUint aInputConversionFlags)
       
   505 	{
       
   506 	__ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags2));
       
   507 	CheckArrayOfStates(aArrayOfStates);
       
   508 	aNumberOfUnconvertibleCharacters=0;
       
   509 	aIndexOfFirstByteOfFirstUnconvertibleCharacter=-1;
       
   510 	aOutputConversionFlags=0;
       
   511 	TUint internalInputConversionFlags=aInputConversionFlags;
       
   512 	if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend)
       
   513 		{
       
   514 		aUnicode.SetLength(0);
       
   515 		internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend;
       
   516 		}
       
   517 	if (aForeign.Length()==0)
       
   518 		{
       
   519 		return 0;
       
   520 		}
       
   521 	if (aUnicode.MaxLength()==aUnicode.Length()) // relies on the fact that aUnicode's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set
       
   522 		{
       
   523 		return aForeign.Length();
       
   524 		}
       
   525 	TPtrC8 remainderOfForeign(aForeign);
       
   526 	TPtrC8 homogeneousRun;
       
   527 	TInt numberOfForeignBytesConsumed=0;
       
   528 	const SCnvConversionData* conversionData = NULL;
       
   529 	const TInt startOfNextEscapeSequence=aForeign.Locate(KControlCharacterEscape);
       
   530 	if (startOfNextEscapeSequence!=0) // if aForeign doesn't start with an escape sequence...
       
   531 		{
       
   532 		conversionData=(aState!=CCnvCharacterSetConverter::KStateDefault)? REINTERPRET_CAST(const SCnvConversionData*, aState): aArrayOfStates[0].iConversionData;
       
   533 		if (startOfNextEscapeSequence==KErrNotFound)
       
   534 			{
       
   535 			homogeneousRun.Set(remainderOfForeign);
       
   536 			remainderOfForeign.Set(NULL, 0);
       
   537 			}
       
   538 		else
       
   539 			{
       
   540 			__ASSERT_DEBUG(startOfNextEscapeSequence>0, Panic(EPanicBadStartOfNextEscapeSequence));
       
   541 			homogeneousRun.Set(remainderOfForeign.Left(startOfNextEscapeSequence));
       
   542 			remainderOfForeign.Set(remainderOfForeign.Mid(startOfNextEscapeSequence));
       
   543 			}
       
   544 		goto handleHomogeneousRun;
       
   545 		}
       
   546 	FOREVER
       
   547 		{
       
   548 		if (!NextHomogeneousForeignRun(conversionData, numberOfForeignBytesConsumed, homogeneousRun, remainderOfForeign, aArrayOfStates, aOutputConversionFlags))
       
   549 			{
       
   550 			goto end;
       
   551 			}
       
   552 handleHomogeneousRun:
       
   553 		if (conversionData==NULL)
       
   554 			{
       
   555 			return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   556 			}
       
   557 		TInt numberOfUnconvertibleCharacters;
       
   558 		TInt indexOfFirstByteOfFirstUnconvertibleCharacter;
       
   559 		const TInt returnValue=CCnvCharacterSetConverter::DoConvertToUnicode(*conversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, homogeneousRun, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, aOutputConversionFlags, internalInputConversionFlags);
       
   560 		if (returnValue<0)
       
   561 			{
       
   562 			return returnValue; // this is an error-code
       
   563 			}
       
   564 		if (numberOfUnconvertibleCharacters>0)
       
   565 			{
       
   566 			if (aNumberOfUnconvertibleCharacters==0)
       
   567 				{
       
   568 				aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfForeignBytesConsumed+indexOfFirstByteOfFirstUnconvertibleCharacter;
       
   569 				}
       
   570 			aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters;
       
   571 			}
       
   572 		numberOfForeignBytesConsumed+=homogeneousRun.Length();
       
   573 		if (returnValue>0)
       
   574 			{
       
   575 			numberOfForeignBytesConsumed-=returnValue;
       
   576 			goto end;
       
   577 			}
       
   578 		if (numberOfForeignBytesConsumed>0)
       
   579 			{
       
   580 			internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
       
   581 			}
       
   582 		__ASSERT_DEBUG(remainderOfForeign==aForeign.Mid(numberOfForeignBytesConsumed), Panic(EPanicInconsistentNumberOfForeignBytesRemaining));
       
   583 		}
       
   584 end:
       
   585 	if ((numberOfForeignBytesConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable))
       
   586 		{
       
   587 		return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   588 		}
       
   589 	aState=REINTERPRET_CAST(TInt, conversionData);
       
   590 	return aForeign.Length()-numberOfForeignBytesConsumed;
       
   591 	}
       
   592 
       
   593 
       
   594 /**  Converts text from a non-modal complex character set encoding (e.g. 
       
   595 Shift-JIS or EUC-JP) into Unicode.The non-Unicode text specified in
       
   596 aForeign is converted using the array of character set
       
   597 conversion methods (aArrayOfMethods) provided by the
       
   598 plug-in, and the converted text is returned in aUnicode.
       
   599 Overwrites the contents, if any, of aUnicode.
       
   600 
       
   601 @param aDefaultEndiannessOfForeignCharacters The default endian-ness of the 
       
   602 foreign characters. If an endian-ness for foreign characters is specified 
       
   603 in the conversion data, then that is used instead and the value of 
       
   604 aDefaultEndiannessOfForeignCharacters is ignored. 
       
   605 @param aUnicode On return, contains the text converted into Unicode. 
       
   606 @param aForeign The non-Unicode source text to be converted. 
       
   607 @param aNumberOfUnconvertibleCharacters On return, contains the number of 
       
   608 characters in aForeign which were not converted. Characters which cannot be 
       
   609 converted are output as Unicode replacement characters (0xfffd). 
       
   610 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
       
   611 of the first byte of the first unconvertible character. For instance if the 
       
   612 first character in the input descriptor (aForeign) could not be converted, 
       
   613 then this parameter is set to the first byte of that character, i.e. zero. 
       
   614 A negative value is returned if all the characters were converted. 
       
   615 @param aArrayOfMethods Array of conversion methods. There must be one or more 
       
   616 methods in this array and none of the methods in the array can have any NULL 
       
   617 member data or a panic occurs. 
       
   618 @return The number of unconverted bytes left at the end of the input descriptor, 
       
   619 or a negative error value, as defined in TError. */
       
   620 EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign(
       
   621 					CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   622 					TDes16& aUnicode, 
       
   623 					const TDesC8& aForeign, 
       
   624 					TInt& aNumberOfUnconvertibleCharacters, 
       
   625 					TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
       
   626 					const TArray<SMethod>& aArrayOfMethods)
       
   627 	{
       
   628 	TUint notUsed;
       
   629 	return ConvertToUnicodeFromHeterogeneousForeign(
       
   630 				aDefaultEndiannessOfForeignCharacters, 
       
   631 				aUnicode, 
       
   632 				aForeign, 
       
   633 				aNumberOfUnconvertibleCharacters, 
       
   634 				aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
       
   635 				aArrayOfMethods, 
       
   636 				notUsed, 
       
   637 				0);
       
   638 	}
       
   639 
       
   640 /** @param aDefaultEndiannessOfForeignCharacters The default endian-ness for the 
       
   641 foreign characters. If an endian-ness for foreign characters is specified 
       
   642 in the conversion data, then that is used instead and the value of 
       
   643 aDefaultEndiannessOfForeignCharacters is ignored. 
       
   644 @param aUnicode On return, contains the text converted into Unicode. 
       
   645 @param aForeign The non-Unicode source text to be converted. 
       
   646 @param aNumberOfUnconvertibleCharacters On return, contains the number of 
       
   647 characters in aForeign which were not converted. Characters which cannot be 
       
   648 converted are output as Unicode replacement characters (0xfffd). 
       
   649 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
       
   650 of the first byte of the first unconvertible character. For instance if the 
       
   651 first character in the input descriptor (aForeign) could not be converted, 
       
   652 then this parameter is set to the first byte of that character, i.e. zero. 
       
   653 A negative value is returned if all the characters were converted. 
       
   654 @param aArrayOfMethods Array of conversion methods. There must be one or more 
       
   655 methods in this array and none of the methods in the array can have any NULL 
       
   656 member data or a panic occurs. 
       
   657 @param aOutputConversionFlags If the input descriptor ended in a truncated 
       
   658 sequence, e.g. an incomplete multi-byte character, aOutputConversionFlags 
       
   659 returns with the EOutputConversionFlagInputIsTruncated flag set. 
       
   660 @param aInputConversionFlags Specify 
       
   661 CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to 
       
   662 aUnicode. Specify EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable 
       
   663 to prevent the function from returning the error-code EErrorIllFormedInput 
       
   664 when the input descriptor consists of nothing but a truncated sequence. The 
       
   665 CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter 
       
   666 flag must not be set, otherwise a panic occurs. 
       
   667 @return The number of unconverted bytes left at the end of the input descriptor, 
       
   668 or a negative error value, as defined in TError. */
       
   669 EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign(
       
   670 						CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   671 						TDes16& aUnicode, 
       
   672 						const TDesC8& aForeign, 
       
   673 						TInt& aNumberOfUnconvertibleCharacters, 
       
   674 						TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
       
   675 						const TArray<SMethod>& aArrayOfMethods, 
       
   676 						TUint& aOutputConversionFlags, 
       
   677 						TUint aInputConversionFlags)
       
   678 	{
       
   679 	__ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags3));
       
   680 	CheckArrayOfMethods(aArrayOfMethods);
       
   681 	aNumberOfUnconvertibleCharacters=0;
       
   682 	aIndexOfFirstByteOfFirstUnconvertibleCharacter=-1;
       
   683 	aOutputConversionFlags=0;
       
   684 	TUint internalInputConversionFlags=aInputConversionFlags;
       
   685 	if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend)
       
   686 		{
       
   687 		aUnicode.SetLength(0);
       
   688 		internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend;
       
   689 		}
       
   690 	if (aForeign.Length()==0)
       
   691 		{
       
   692 		return 0;
       
   693 		}
       
   694 	if (aUnicode.MaxLength()==aUnicode.Length()) // relies on the fact that aUnicode's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set
       
   695 		{
       
   696 		return aForeign.Length();
       
   697 		}
       
   698 	const TInt numberOfMethods=aArrayOfMethods.Count();
       
   699 	TPtrC8 remainderOfForeign(aForeign);
       
   700 	TInt numberOfForeignBytesConsumed=0;
       
   701 	FOREVER
       
   702 		{
       
   703 		TInt lengthOfRunToConvert=0;
       
   704 		const SMethod* method=NULL;
       
   705 		for (TInt i=0;;)
       
   706 			{
       
   707 			method=&aArrayOfMethods[i];
       
   708 			lengthOfRunToConvert=(*method->iNumberOfBytesAbleToConvert)(remainderOfForeign);
       
   709 			if (lengthOfRunToConvert<0)
       
   710 				{
       
   711 				return lengthOfRunToConvert; // this is an error-code
       
   712 				}
       
   713 			if (lengthOfRunToConvert>0)
       
   714 				{
       
   715 				break;
       
   716 				}
       
   717 			__ASSERT_DEBUG(i<numberOfMethods, Panic(EPanicLoopCounterOverRun2));
       
   718 			++i;
       
   719 			if (i>=numberOfMethods)
       
   720 				{
       
   721 				aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
       
   722 				goto end;
       
   723 				}
       
   724 			}
       
   725 		__ASSERT_DEBUG(lengthOfRunToConvert>0, Panic(EPanicBadLengthOfRunToConvert1));
       
   726 		__ASSERT_DEBUG(method!=NULL, Panic(EPanicBadMethodPointer));
       
   727 		TBuf8<KMaximumLengthOfIntermediateBuffer> intermediateBuffer;
       
   728 		const TInt maximumUsableLengthOfIntermediateBuffer=ReduceToNearestMultipleOf(KMaximumLengthOfIntermediateBuffer, method->iNumberOfBytesPerCharacter);
       
   729 		FOREVER
       
   730 			{
       
   731 			const TInt numberOfForeignBytesConsumedThisTime=Min(lengthOfRunToConvert, maximumUsableLengthOfIntermediateBuffer);
       
   732 			intermediateBuffer=remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime);
       
   733 			__ASSERT_DEBUG((numberOfForeignBytesConsumedThisTime%method->iNumberOfBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters2));
       
   734 			(*method->iConvertToIntermediateBufferInPlace)(intermediateBuffer);
       
   735 			__ASSERT_DEBUG((intermediateBuffer.Length()%method->iNumberOfCoreBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters3));
       
   736 			__ASSERT_DEBUG((intermediateBuffer.Length()/method->iNumberOfCoreBytesPerCharacter)*method->iNumberOfBytesPerCharacter==numberOfForeignBytesConsumedThisTime, Panic(EPanicBadMethodData1));
       
   737 			TInt numberOfUnconvertibleCharacters;
       
   738 			TInt indexOfFirstByteOfFirstUnconvertibleCharacter;
       
   739 			const TInt returnValue=CCnvCharacterSetConverter::DoConvertToUnicode(*method->iConversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, intermediateBuffer, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, aOutputConversionFlags, internalInputConversionFlags);
       
   740 			if (returnValue<0)
       
   741 				{
       
   742 				return returnValue; // this is an error-code
       
   743 				}
       
   744 			if (numberOfUnconvertibleCharacters>0)
       
   745 				{
       
   746 				if (aNumberOfUnconvertibleCharacters==0)
       
   747 					{
       
   748 					aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfForeignBytesConsumed+indexOfFirstByteOfFirstUnconvertibleCharacter;
       
   749 					}
       
   750 				aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters;
       
   751 				}
       
   752 			numberOfForeignBytesConsumed+=numberOfForeignBytesConsumedThisTime;
       
   753 			if (returnValue>0)
       
   754 				{
       
   755 				__ASSERT_DEBUG((returnValue%method->iNumberOfCoreBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters4));
       
   756 				numberOfForeignBytesConsumed-=(returnValue/method->iNumberOfCoreBytesPerCharacter)*method->iNumberOfBytesPerCharacter;
       
   757 				goto end;
       
   758 				}
       
   759 			if (numberOfForeignBytesConsumed>0)
       
   760 				{
       
   761 				internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
       
   762 				}
       
   763 			remainderOfForeign.Set(aForeign.Mid(numberOfForeignBytesConsumed));
       
   764 			lengthOfRunToConvert-=numberOfForeignBytesConsumedThisTime;
       
   765 			__ASSERT_DEBUG(lengthOfRunToConvert>=0, Panic(EPanicBadLengthOfRunToConvert2));
       
   766 			if (lengthOfRunToConvert<=0)
       
   767 				{
       
   768 				break;
       
   769 				}
       
   770 			}
       
   771 		}
       
   772 end:
       
   773 	if ((numberOfForeignBytesConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable))
       
   774 		{
       
   775 		return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   776 		}
       
   777 	return aForeign.Length()-numberOfForeignBytesConsumed;
       
   778 	}
       
   779 
       
   780 void CnvUtilities::CheckArrayOfCharacterSets(const TArray<SCharacterSet>& aArrayOfCharacterSets)
       
   781 	{
       
   782 	const TInt numberOfCharacterSets=aArrayOfCharacterSets.Count();
       
   783 	__ASSERT_ALWAYS(numberOfCharacterSets>0, Panic(EPanicBadNumberOfCharacterSets));
       
   784 	for (TInt i=0; i<numberOfCharacterSets; ++i)
       
   785 		{
       
   786 		const SCharacterSet& characterSet=aArrayOfCharacterSets[i];
       
   787 		__ASSERT_ALWAYS(characterSet.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer1));
       
   788 		__ASSERT_ALWAYS(characterSet.iConvertFromIntermediateBufferInPlace!=NULL, Panic(EPanicBadFunctionPointer1));
       
   789 		__ASSERT_ALWAYS(characterSet.iEscapeSequence!=NULL, Panic(EPanicBadEscapeSequencePointer1));
       
   790 		}
       
   791 	}
       
   792 
       
   793 void CnvUtilities::CheckArrayOfStates(const TArray<SState>& aArrayOfStates)
       
   794 	{
       
   795 	const TInt numberOfStates=aArrayOfStates.Count();
       
   796 	__ASSERT_ALWAYS(numberOfStates>0, Panic(EPanicBadNumberOfStates));
       
   797 	for (TInt i=0; i<numberOfStates; ++i)
       
   798 		{
       
   799 		const SState& state=aArrayOfStates[i];
       
   800 		__ASSERT_ALWAYS(state.iEscapeSequence!=NULL, Panic(EPanicBadEscapeSequencePointer2));
       
   801 		__ASSERT_ALWAYS((*state.iEscapeSequence)[0]==KControlCharacterEscape, Panic(EPanicBadEscapeSequenceStart));
       
   802 		__ASSERT_ALWAYS(state.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer2));
       
   803 		}
       
   804 	}
       
   805 
       
   806 void CnvUtilities::CheckArrayOfMethods(const TArray<SMethod>& aArrayOfMethods)
       
   807 	{
       
   808 	const TInt numberOfMethods=aArrayOfMethods.Count();
       
   809 	__ASSERT_ALWAYS(numberOfMethods>0, Panic(EPanicBadNumberOfMethods));
       
   810 	for (TInt i=0; i<numberOfMethods; ++i)
       
   811 		{
       
   812 		const SMethod& method=aArrayOfMethods[i];
       
   813 		__ASSERT_ALWAYS(method.iNumberOfBytesAbleToConvert!=NULL, Panic(EPanicBadFunctionPointer2));
       
   814 		__ASSERT_ALWAYS(method.iConvertToIntermediateBufferInPlace!=NULL, Panic(EPanicBadFunctionPointer3));
       
   815 		__ASSERT_ALWAYS(method.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer3));
       
   816 		__ASSERT_ALWAYS(method.iNumberOfBytesPerCharacter>0, Panic(EPanicBadMethodData2));
       
   817 		__ASSERT_ALWAYS(method.iNumberOfCoreBytesPerCharacter>0, Panic(EPanicBadMethodData3));
       
   818 		__ASSERT_ALWAYS(method.iNumberOfCoreBytesPerCharacter<=method.iNumberOfBytesPerCharacter, Panic(EPanicBadMethodData4));
       
   819 		}
       
   820 	}
       
   821 
       
   822 TInt CnvUtilities::LengthOfUnicodeCharacter(const TDesC16& aUnicode, TInt aIndex)
       
   823 	{
       
   824 	const TUint unicodeCharacter=aUnicode[aIndex];
       
   825 	if ((unicodeCharacter>=0xd800) && (unicodeCharacter<=0xdbff)) // if the unicode character is the first half of a surrogate-pair...
       
   826 		{
       
   827 		__ASSERT_DEBUG(aIndex+1<aUnicode.Length(), Panic(EPanicBadSurrogatePair1));
       
   828 #if defined(_DEBUG)
       
   829 		const TUint secondHalfOfSurrogatePair=aUnicode[aIndex+1];
       
   830 #endif
       
   831 		__ASSERT_DEBUG((secondHalfOfSurrogatePair>=0xdc00) && (secondHalfOfSurrogatePair<=0xdfff), Panic(EPanicBadSurrogatePair2)); // this can be asserted as CCnvCharacterSetConverter::DoConvertFromUnicode should have returned an error value if this was a bad surrogate pair
       
   832 		return 2;
       
   833 		}
       
   834 	return 1;
       
   835 	}
       
   836 
       
   837 TBool CnvUtilities::NextHomogeneousForeignRun(const SCnvConversionData*& aConversionData, TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TArray<SState>& aArrayOfStates, TUint& aOutputConversionFlags)
       
   838 	{
       
   839 	__ASSERT_DEBUG((aRemainderOfForeign.Length()==0) || (aRemainderOfForeign[0]==KControlCharacterEscape), Panic(EPanicBadRemainderOfForeign));
       
   840 	FOREVER
       
   841 		{
       
   842 		if (aRemainderOfForeign.Length()==0)
       
   843 			{
       
   844 			return EFalse;
       
   845 			}
       
   846 		const TInt numberOfStates=aArrayOfStates.Count();
       
   847 		TInt i;
       
   848 		for (i=0; i<numberOfStates; ++i)
       
   849 			{
       
   850 			const SState& state=aArrayOfStates[i];
       
   851 			if (MatchesEscapeSequence(aNumberOfForeignBytesConsumed, aHomogeneousRun, aRemainderOfForeign, *state.iEscapeSequence))
       
   852 				{
       
   853 				aConversionData=state.iConversionData;
       
   854 				goto foundState;
       
   855 				}
       
   856 			}
       
   857 		for (i=0; i<numberOfStates; ++i)
       
   858 			{
       
   859 			if (IsStartOf(aRemainderOfForeign, *aArrayOfStates[i].iEscapeSequence))
       
   860 				{
       
   861 				// aRemainderOfForeign ends with a truncated escape sequence, so ConvertToUnicode cannot convert any more
       
   862 				aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
       
   863 				return EFalse;
       
   864 				}
       
   865 			}
       
   866 		// force ConvertToUnicode to return CCnvCharacterSetConverter::EErrorIllFormedInput
       
   867 		aConversionData=NULL;
       
   868 		return ETrue;
       
   869 foundState:
       
   870 		if (aHomogeneousRun.Length()>0)
       
   871 			{
       
   872 			return ETrue;
       
   873 			}
       
   874 		}
       
   875 	}
       
   876 
       
   877 TBool CnvUtilities::MatchesEscapeSequence(TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TDesC8& aEscapeSequence)
       
   878 	{
       
   879 	const TInt lengthOfEscapeSequence=aEscapeSequence.Length();
       
   880 	if (IsStartOf(aEscapeSequence, aRemainderOfForeign))
       
   881 		{
       
   882 		aRemainderOfForeign.Set(aRemainderOfForeign.Mid(lengthOfEscapeSequence));
       
   883 		const TInt startOfNextEscapeSequence=aRemainderOfForeign.Locate(KControlCharacterEscape);
       
   884 		if (startOfNextEscapeSequence==KErrNotFound)
       
   885 			{
       
   886 			aHomogeneousRun.Set(aRemainderOfForeign);
       
   887 			aRemainderOfForeign.Set(NULL, 0);
       
   888 			}
       
   889 		else
       
   890 			{
       
   891 			aHomogeneousRun.Set(aRemainderOfForeign.Left(startOfNextEscapeSequence));
       
   892 			aRemainderOfForeign.Set(aRemainderOfForeign.Mid(startOfNextEscapeSequence));
       
   893 			}
       
   894 		aNumberOfForeignBytesConsumed+=lengthOfEscapeSequence;
       
   895 		return ETrue;
       
   896 		}
       
   897 	return EFalse;
       
   898 	}
       
   899 
       
   900 TBool CnvUtilities::IsStartOf(const TDesC8& aStart, const TDesC8& aPotentiallyLongerDescriptor)
       
   901 	{
       
   902 	const TInt lengthOfStart=aStart.Length();
       
   903 	return (aPotentiallyLongerDescriptor.Length()>=lengthOfStart) && (aPotentiallyLongerDescriptor.Left(lengthOfStart)==aStart);
       
   904 	}
       
   905