charconvfw/charconvplugins/src/plugins/hz.cpp
changeset 0 1fb32624e06b
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 * HZ is defined in RFC 1843
       
    16 *
       
    17 */
       
    18 
       
    19 
       
    20 #include <e32std.h>
       
    21 #include <charconv.h>
       
    22 #include "gb2312.h"
       
    23 #include <ecom/implementationproxy.h>
       
    24 #include <charactersetconverter.h>
       
    25 
       
    26 const TInt KIsInGbBlock=CCnvCharacterSetConverter::KStateDefault+1;
       
    27 #if defined(_DEBUG)
       
    28 const TInt KLengthOfIntermediateBuffer=6;
       
    29 #else
       
    30 const TInt KLengthOfIntermediateBuffer=150;
       
    31 #endif
       
    32 
       
    33 #if defined(_DEBUG)
       
    34 
       
    35 _LIT(KLitPanicText, "HZ");
       
    36 
       
    37 enum TPanic
       
    38 	{
       
    39 	EPanicTooManyMatchingIndicesFound=1,
       
    40 	EPanicBadNumberOfBytesRequiredToBeAvailable,
       
    41 	EPanicBadNumberOfBytesAvailable,
       
    42 	EPanicBadNumberOfBytesThatCanBeMadeAvailable,
       
    43 	EPanicBadNumberOfBytesMadeAvailable1,
       
    44 	EPanicBadNumberOfBytesMadeAvailable2,
       
    45 	EPanicBadDescriptorSubDivision1,
       
    46 	EPanicBadDescriptorSubDivision2,
       
    47 	EPanicBadDescriptorSubDivision3,
       
    48 	EPanicBadDescriptorSubDivision4,
       
    49 	EPanicBadPointers1,
       
    50 	EPanicBadPointers2,
       
    51 	EPanicBadPointers3,
       
    52 	EPanicBadPointers4,
       
    53 	EPanicBadPointers5,
       
    54 	EPanicBadPointers6,
       
    55 	EPanicBadPointers7,
       
    56 	EPanicBadPointers8,
       
    57 	EPanicBadPointers9,
       
    58 	EPanicBadPointers10,
       
    59 	EPanicBadPointers11,
       
    60 	EPanicBadPointers12,
       
    61 	EPanicStillInGbBlock,
       
    62 	EPanicBadState,
       
    63 	EPanicSplitBoundaryIsNotAsLateAsPossible1,
       
    64 	EPanicSplitBoundaryIsNotAsLateAsPossible2,
       
    65 	EPanicBadGb2312Index,
       
    66 	EPanicBadHzIndex,
       
    67 	EPanicBadTildeSequence,
       
    68 	EPanicBadReturnValue1,
       
    69 	EPanicBadReturnValue2,
       
    70 	EPanicRemainderOfHzHasGotLonger
       
    71 	};
       
    72 
       
    73 LOCAL_C void Panic(TPanic aPanic)
       
    74 	{
       
    75 	User::Panic(KLitPanicText, aPanic);
       
    76 	}
       
    77 
       
    78 #endif
       
    79 
       
    80 class CHZConverterImpl : public CCharacterSetConverterPluginInterface
       
    81 	{
       
    82 
       
    83 public:
       
    84 	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
       
    85 
       
    86 	virtual TInt ConvertFromUnicode(
       
    87 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    88 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
    89 		TDes8& aForeign, 
       
    90 		const TDesC16& aUnicode, 
       
    91 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
       
    92 
       
    93 	virtual TInt ConvertToUnicode(
       
    94 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    95 		TDes16& aUnicode, 
       
    96 		const TDesC8& aForeign, 
       
    97 		TInt& aState, 
       
    98 		TInt& aNumberOfUnconvertibleCharacters, 
       
    99 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
       
   100 
       
   101 	virtual TBool IsInThisCharacterSetL(
       
   102 		TBool& aSetToTrue, 
       
   103 		TInt& aConfidenceLevel, 
       
   104 		const TDesC8& aSample);
       
   105 
       
   106 	static CHZConverterImpl* NewL();
       
   107 	virtual ~CHZConverterImpl();
       
   108 
       
   109 private:
       
   110 	CHZConverterImpl();
       
   111 
       
   112 	};
       
   113 
       
   114 
       
   115 
       
   116 const TDesC8& CHZConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
       
   117 	{
       
   118 	return CnvGb2312::ReplacementForUnconvertibleUnicodeCharacters();
       
   119 	}
       
   120 
       
   121 LOCAL_C void IncrementNumberOfUnicodeCharactersNotConverted(TInt aLengthOfUnicode, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) // these seemingly haphazard order of these paramters is to match the position of the second and third parameters with the caller
       
   122 	{
       
   123 	++aNumberOfUnicodeCharactersNotConverted;
       
   124 	const TInt indexOfUnicodeCharacterNowNotConverted=aLengthOfUnicode-aNumberOfUnicodeCharactersNotConverted;
       
   125 #if defined(_DEBUG)
       
   126 	TInt numberOfMatchingIndicesFound=0;
       
   127 #endif
       
   128 	for (TInt i=aIndicesOfUnconvertibleCharacters.NumberOfIndices()-1; i>=0; --i) // must iterate backwards as items from aIndicesOfUnconvertibleCharacters may be deleted
       
   129 		{
       
   130 		if (aIndicesOfUnconvertibleCharacters[i]==indexOfUnicodeCharacterNowNotConverted)
       
   131 			{
       
   132 			aIndicesOfUnconvertibleCharacters.Remove(i);
       
   133 #if defined(_DEBUG)
       
   134 			++numberOfMatchingIndicesFound;
       
   135 #endif
       
   136 			}
       
   137 		}
       
   138 	__ASSERT_DEBUG(numberOfMatchingIndicesFound<=1, Panic(EPanicTooManyMatchingIndicesFound));
       
   139 	}
       
   140 
       
   141 LOCAL_C void MakeAvailable(TInt aNumberOfBytesRequiredToBeAvailable, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TInt aLengthOfUnicode, const TUint8*& aPointerToLastUsedByte, TInt& aNumberOfBytesAvailable, TInt aNumberOfBytesThatCanBeMadeAvailable) // these seemingly haphazard order of these paramters is to match the position of the second to fourth parameters (inclusive) with the caller
       
   142 // makes available as much of aNumberOfBytesRequiredToBeAvailable as it can, even if the final value (i.e. value on returning) of aNumberOfBytesAvailable<aNumberOfBytesRequiredToBeAvailable (i.e. it doesn't initially give up straight away and do nothing if aNumberOfBytesRequiredToBeAvailable>aNumberOfBytesThatCanBeMadeAvailable+aNumberOfBytesAvailable)
       
   143 	{
       
   144 	__ASSERT_DEBUG(aNumberOfBytesRequiredToBeAvailable>0, Panic(EPanicBadNumberOfBytesRequiredToBeAvailable));
       
   145 	__ASSERT_DEBUG(aNumberOfBytesAvailable>=0, Panic(EPanicBadNumberOfBytesAvailable));
       
   146 	__ASSERT_DEBUG(aNumberOfBytesThatCanBeMadeAvailable>=0, Panic(EPanicBadNumberOfBytesThatCanBeMadeAvailable));
       
   147 	TInt numberOfBytesMadeAvailable=0;
       
   148 	FOREVER
       
   149 		{
       
   150 		if (aNumberOfBytesAvailable>=aNumberOfBytesRequiredToBeAvailable)
       
   151 			{
       
   152 			break; // no more needs to be done
       
   153 			}
       
   154 		__ASSERT_DEBUG(numberOfBytesMadeAvailable<=aNumberOfBytesThatCanBeMadeAvailable, Panic(EPanicBadNumberOfBytesMadeAvailable1));
       
   155 		if (numberOfBytesMadeAvailable>=aNumberOfBytesThatCanBeMadeAvailable)
       
   156 			{
       
   157 			break; // give up - no more can be done
       
   158 			}
       
   159 		const TInt numberOfBytesInCharacter=(*aPointerToLastUsedByte&0x80)? 2: 1;
       
   160 		aPointerToLastUsedByte-=numberOfBytesInCharacter;
       
   161 		aNumberOfBytesAvailable+=numberOfBytesInCharacter;
       
   162 		numberOfBytesMadeAvailable+=numberOfBytesInCharacter;
       
   163 		IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters);
       
   164 		}
       
   165 	__ASSERT_DEBUG(numberOfBytesMadeAvailable<=aNumberOfBytesThatCanBeMadeAvailable, Panic(EPanicBadNumberOfBytesMadeAvailable2));
       
   166 	}
       
   167 
       
   168 LOCAL_C void ConvertFromGb2312ToHzInPlace(TDes8& aDescriptor, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TInt aLengthOfUnicode)
       
   169 	{
       
   170 	// it is legal for aDescriptor to be of length 0
       
   171 	const TInt originalLengthOfDescriptor=aDescriptor.Length();
       
   172 	if (originalLengthOfDescriptor>0)
       
   173 		{
       
   174 		TInt numberOfBytesAvailable=aDescriptor.MaxLength()-originalLengthOfDescriptor;
       
   175 		TUint8* pointerToPreviousByte=CONST_CAST(TUint8*, aDescriptor.Ptr()-1);
       
   176 		const TUint8* pointerToLastUsedByte=pointerToPreviousByte+originalLengthOfDescriptor;
       
   177 		TBool isInGbBlock=EFalse;
       
   178 		FOREVER
       
   179 			{
       
   180 			__ASSERT_DEBUG((pointerToLastUsedByte-(aDescriptor.Ptr()-1))+numberOfBytesAvailable==aDescriptor.MaxLength(), Panic(EPanicBadDescriptorSubDivision1));
       
   181 			__ASSERT_DEBUG(pointerToPreviousByte<pointerToLastUsedByte, Panic(EPanicBadPointers1));
       
   182 			const TUint currentByte=*(pointerToPreviousByte+1);
       
   183 			if (currentByte&0x80)
       
   184 				{
       
   185 				if (!isInGbBlock)
       
   186 					{
       
   187 					MakeAvailable(4, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, (pointerToLastUsedByte-pointerToPreviousByte)-2); // what's passed into the last parameter is not a typo - we do not want the two-byte character currently pointed to by (pointerToPreviousByte+1) to be made available
       
   188 					if (numberOfBytesAvailable<4) // 4 bytes are required for the "~{" "~}" escape sequences (thus ensuring that at least a single double-byte character can be put into the GB-block)
       
   189 						{
       
   190 						break;
       
   191 						}
       
   192 					isInGbBlock=ETrue;
       
   193 					Mem::Copy(pointerToPreviousByte+3, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte);
       
   194 					++pointerToPreviousByte;
       
   195 					*pointerToPreviousByte='~';
       
   196 					++pointerToPreviousByte;
       
   197 					*pointerToPreviousByte='{';
       
   198 					numberOfBytesAvailable-=2;
       
   199 					pointerToLastUsedByte+=2;
       
   200 					}
       
   201 				++pointerToPreviousByte;
       
   202 				*pointerToPreviousByte&=~0x80;
       
   203 				__ASSERT_DEBUG(pointerToPreviousByte<pointerToLastUsedByte, Panic(EPanicBadPointers2));
       
   204 				++pointerToPreviousByte;
       
   205 				*pointerToPreviousByte&=~0x80;
       
   206 				}
       
   207 			else
       
   208 				{
       
   209 				if (isInGbBlock)
       
   210 					{
       
   211 closeGbBlock:
       
   212 					isInGbBlock=EFalse;
       
   213 					MakeAvailable(2, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, pointerToLastUsedByte-pointerToPreviousByte);
       
   214 					if (numberOfBytesAvailable<2) // 2 bytes are required for the "~}" escape sequence
       
   215 						{
       
   216 						IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters);
       
   217 						*(pointerToPreviousByte-1)='~';
       
   218 						*pointerToPreviousByte='}';
       
   219 						break;
       
   220 						}
       
   221 					Mem::Copy(pointerToPreviousByte+3, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte);
       
   222 					++pointerToPreviousByte;
       
   223 					*pointerToPreviousByte='~';
       
   224 					++pointerToPreviousByte;
       
   225 					*pointerToPreviousByte='}';
       
   226 					numberOfBytesAvailable-=2;
       
   227 					pointerToLastUsedByte+=2;
       
   228 					__ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers3));
       
   229 					if (pointerToPreviousByte>=pointerToLastUsedByte)
       
   230 						{
       
   231 						break;
       
   232 						}
       
   233 					}
       
   234 				if (currentByte=='~')
       
   235 					{
       
   236 					MakeAvailable(1, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, (pointerToLastUsedByte-pointerToPreviousByte)-1); // what's passed into the last parameter is not a typo - we do not want the "~" currently pointed to by (pointerToPreviousByte+1) to be made available
       
   237 					if (numberOfBytesAvailable<1) // 1 byte is required for the extra "~" character
       
   238 						{
       
   239 						break;
       
   240 						}
       
   241 					Mem::Copy(pointerToPreviousByte+2, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte);
       
   242 					++pointerToPreviousByte;
       
   243 					*pointerToPreviousByte='~';
       
   244 					numberOfBytesAvailable-=1;
       
   245 					pointerToLastUsedByte+=1;
       
   246 					}
       
   247 				++pointerToPreviousByte;
       
   248 				}
       
   249 			__ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers4));
       
   250 			if (pointerToPreviousByte>=pointerToLastUsedByte)
       
   251 				{
       
   252 				if (isInGbBlock)
       
   253 					{
       
   254 					goto closeGbBlock; // this is to share the code for closing the GB-block
       
   255 					}
       
   256 				break;
       
   257 				}
       
   258 			}
       
   259 		__ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers5));
       
   260 		if (pointerToPreviousByte<pointerToLastUsedByte)
       
   261 			{
       
   262 			__ASSERT_DEBUG((pointerToPreviousByte==pointerToLastUsedByte-1) || (pointerToPreviousByte==pointerToLastUsedByte-2), Panic(EPanicBadPointers6));
       
   263 			numberOfBytesAvailable+=(pointerToLastUsedByte-pointerToPreviousByte);
       
   264 			pointerToLastUsedByte=pointerToPreviousByte;
       
   265 			IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters);
       
   266 			}
       
   267 		//if it gets out from FOREVER, isInGbBlock could not be ETrue ~~~ so wouldn't need the assert
       
   268 		//__ASSERT_DEBUG(!isInGbBlock, Panic(EPanicStillInGbBlock));
       
   269 		aDescriptor.SetLength(aDescriptor.MaxLength()-numberOfBytesAvailable);
       
   270 		__ASSERT_DEBUG(aDescriptor.Length()==pointerToLastUsedByte-(aDescriptor.Ptr()-1), Panic(EPanicBadDescriptorSubDivision2));
       
   271 		}
       
   272 	}
       
   273 
       
   274 TInt CHZConverterImpl::ConvertFromUnicode(
       
   275 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   276 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
   277 		TDes8& aForeign, 
       
   278 		const TDesC16& aUnicode, 
       
   279 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
       
   280 	{
       
   281 	TInt returnValue=CCnvCharacterSetConverter::DoConvertFromUnicode(CnvGb2312::ConversionData(), aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
       
   282 	if (returnValue<0)
       
   283 		{
       
   284 		return returnValue; // this is an error-code
       
   285 		}
       
   286 	ConvertFromGb2312ToHzInPlace(aForeign, returnValue, aIndicesOfUnconvertibleCharacters, aUnicode.Length());
       
   287 	return returnValue;
       
   288 	}
       
   289 
       
   290 LOCAL_C TInt ConvertFromHzToHomogeneousGb2312(TBuf8<KLengthOfIntermediateBuffer>& aGb2312, TPtrC8& aHzBeingConsumed, TPtrC8& aRemainderOfHz, TInt& aState, TUint& aOutputConversionFlags)
       
   291 	{
       
   292 	// this function panics if aRemainderOfHz is of length 0
       
   293 	TUint8* pointerToPreviousGb2312Byte=CONST_CAST(TUint8*, aGb2312.Ptr()-1);
       
   294 	const TUint8* pointerToCurrentHzByte=aRemainderOfHz.Ptr();
       
   295 	const TUint8* const pointerToLastHzByte=pointerToCurrentHzByte+(aRemainderOfHz.Length()-1);
       
   296 	const TUint8* const pointerToLastHzByteToConvertThisTime=Min(pointerToLastHzByte, pointerToCurrentHzByte+(KLengthOfIntermediateBuffer-1));
       
   297 	FOREVER
       
   298 		{
       
   299 		const TUint currentHzByte=*pointerToCurrentHzByte;
       
   300 		if (currentHzByte=='~')
       
   301 			{
       
   302 			__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers7));
       
   303 			if (pointerToCurrentHzByte>=pointerToLastHzByte)
       
   304 				{
       
   305 				aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
       
   306 				--pointerToCurrentHzByte;
       
   307 				break;
       
   308 				}
       
   309 			++pointerToCurrentHzByte;
       
   310 			const TUint nextHzByte=*pointerToCurrentHzByte;
       
   311 			switch (nextHzByte)
       
   312 				{
       
   313 			case '{':
       
   314 				if (aState==KIsInGbBlock)
       
   315 					{
       
   316 					return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   317 					}
       
   318 				aState=KIsInGbBlock;
       
   319 				break;
       
   320 			case '}':
       
   321 				if (aState==CCnvCharacterSetConverter::KStateDefault)
       
   322 					{
       
   323 					return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   324 					}
       
   325 				aState=CCnvCharacterSetConverter::KStateDefault;
       
   326 				break;
       
   327 			case '~':
       
   328 				++pointerToPreviousGb2312Byte;
       
   329 				*pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte);
       
   330 				break;
       
   331 			case 0x0a:
       
   332 				break;
       
   333 			default:
       
   334 				return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   335 				}
       
   336 			}
       
   337 		else
       
   338 			{
       
   339 			__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers8));
       
   340 			if (pointerToCurrentHzByte>pointerToLastHzByteToConvertThisTime)
       
   341 				{
       
   342 				--pointerToCurrentHzByte;
       
   343 				break;
       
   344 				}
       
   345 			if (aState==CCnvCharacterSetConverter::KStateDefault)
       
   346 				{
       
   347 				++pointerToPreviousGb2312Byte;
       
   348 				*pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte);
       
   349 				}
       
   350 			else
       
   351 				{
       
   352 				__ASSERT_DEBUG(aState==KIsInGbBlock, Panic(EPanicBadState));
       
   353 				__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByteToConvertThisTime, Panic(EPanicBadPointers9));
       
   354 				if (pointerToCurrentHzByte>=pointerToLastHzByteToConvertThisTime)
       
   355 					{
       
   356 					aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
       
   357 					--pointerToCurrentHzByte;
       
   358 					break;
       
   359 					}
       
   360 				++pointerToCurrentHzByte;
       
   361 				++pointerToPreviousGb2312Byte;
       
   362 				*pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte|0x80);
       
   363 				++pointerToPreviousGb2312Byte;
       
   364 				*pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, *pointerToCurrentHzByte|0x80);
       
   365 				}
       
   366 			}
       
   367 		__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers10));
       
   368 		if (pointerToCurrentHzByte>=pointerToLastHzByte)
       
   369 			{
       
   370 			break;
       
   371 			}
       
   372 		++pointerToCurrentHzByte;
       
   373 		}
       
   374 	aGb2312.SetLength((pointerToPreviousGb2312Byte+1)-aGb2312.Ptr());
       
   375 	const TInt numberOfHzBytesBeingConsumed=(pointerToCurrentHzByte+1)-aRemainderOfHz.Ptr();
       
   376 	aHzBeingConsumed.Set(aRemainderOfHz.Left(numberOfHzBytesBeingConsumed));
       
   377 	aRemainderOfHz.Set(aRemainderOfHz.Mid(numberOfHzBytesBeingConsumed));
       
   378 #if defined(_DEBUG)
       
   379 	// AAA: check that if the split occurs on a boundary between some one-byte and some two-byte text, then aState corresponds to the state *after* the split (the code marked "BBB" relies on this)
       
   380 	if (aRemainderOfHz.Length()>=2)
       
   381 		{
       
   382 		__ASSERT_DEBUG(aRemainderOfHz.Left(2)!=_L8("~{"), Panic(EPanicSplitBoundaryIsNotAsLateAsPossible1));
       
   383 		__ASSERT_DEBUG(aRemainderOfHz.Left(2)!=_L8("~}"), Panic(EPanicSplitBoundaryIsNotAsLateAsPossible2));
       
   384 		}
       
   385 #endif
       
   386 	return 0;
       
   387 	}
       
   388 
       
   389 LOCAL_C TInt Gb2312IndexToHzIndex(const TDesC8& aHz, TInt aGb2312Index, TBool aReturnMaximalHzIndex)
       
   390 	{
       
   391 	// this function panics if aHz is of length 0
       
   392 	// aHz may start in either KIsInGbBlock or CCnvCharacterSetConverter::KStateDefault state, but it must *not* have any truncated sequences (i.e. "tilde <something>" sequence that is not complete, or part of a 2-byte character sequence) at either its start or its end
       
   393 	__ASSERT_DEBUG(aGb2312Index>=0, Panic(EPanicBadGb2312Index));
       
   394 	TInt hzIndex=0;
       
   395 	TInt offsetFromGb2312IndexToHzIndex=0;
       
   396 	const TUint8* const pointerToFirstHzByte=aHz.Ptr();
       
   397 	const TUint8* pointerToCurrentHzByte=pointerToFirstHzByte;
       
   398 	const TUint8* const pointerToLastHzByte=pointerToFirstHzByte+(aHz.Length()-1);
       
   399 	FOREVER
       
   400 		{
       
   401 		const TInt newHzIndex=pointerToCurrentHzByte-pointerToFirstHzByte;
       
   402 		const TInt candidateHzIndex=aGb2312Index+offsetFromGb2312IndexToHzIndex;
       
   403 		__ASSERT_DEBUG(hzIndex<=candidateHzIndex, Panic(EPanicBadHzIndex));
       
   404 		if (aReturnMaximalHzIndex? (newHzIndex>candidateHzIndex): (hzIndex>=candidateHzIndex))
       
   405 			{
       
   406 			break;
       
   407 			}
       
   408 		hzIndex=newHzIndex;
       
   409 		if (*pointerToCurrentHzByte=='~')
       
   410 			{
       
   411 			__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers11));
       
   412 			if (pointerToCurrentHzByte>=pointerToLastHzByte)
       
   413 				{
       
   414 				break;
       
   415 				}
       
   416 			++pointerToCurrentHzByte;
       
   417 			const TUint currentHzByte=*pointerToCurrentHzByte;
       
   418 			if (currentHzByte=='~')
       
   419 				{
       
   420 				++offsetFromGb2312IndexToHzIndex;
       
   421 				}
       
   422 			else
       
   423 				{
       
   424 				__ASSERT_DEBUG((currentHzByte=='{') || (currentHzByte=='}') || (currentHzByte==0x0a), Panic(EPanicBadTildeSequence));
       
   425 				offsetFromGb2312IndexToHzIndex+=2;
       
   426 				}
       
   427 			}
       
   428 		__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers12));
       
   429 		if (pointerToCurrentHzByte>=pointerToLastHzByte)
       
   430 			{
       
   431 			break;
       
   432 			}
       
   433 		++pointerToCurrentHzByte;
       
   434 		}
       
   435 	return hzIndex;
       
   436 	}
       
   437 
       
   438 TInt CHZConverterImpl::ConvertToUnicode(
       
   439 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   440 		TDes16& aUnicode, 
       
   441 		const TDesC8& aForeign, 
       
   442 		TInt& aState, 
       
   443 		TInt& aNumberOfUnconvertibleCharacters, 
       
   444 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
       
   445 	{
       
   446 	aUnicode.SetLength(0);
       
   447 	TPtrC8 remainderOfHz(aForeign);
       
   448 	TInt numberOfHzBytesConsumed=0;
       
   449 	TUint outputConversionFlags=0;
       
   450 	TUint inputConversionFlags=CCnvCharacterSetConverter::EInputConversionFlagAppend;
       
   451 	const SCnvConversionData& gb2312ConversionData=CnvGb2312::ConversionData();
       
   452 	FOREVER
       
   453 		{
       
   454 		__ASSERT_DEBUG(numberOfHzBytesConsumed+remainderOfHz.Length()==aForeign.Length(), Panic(EPanicBadDescriptorSubDivision3));
       
   455 #if defined(_DEBUG)
       
   456 		const TInt oldLengthOfRemainderOfHz=remainderOfHz.Length();
       
   457 #endif
       
   458 		TBuf8<KLengthOfIntermediateBuffer> gb2312;
       
   459 		TPtrC8 hzBeingConsumed;
       
   460 		const TInt returnValue1=ConvertFromHzToHomogeneousGb2312(gb2312, hzBeingConsumed, remainderOfHz, aState, outputConversionFlags);
       
   461 		if (returnValue1<0)
       
   462 			{
       
   463 			return returnValue1; // this is an error-code
       
   464 			}
       
   465 		__ASSERT_DEBUG(returnValue1==0, Panic(EPanicBadReturnValue1));
       
   466 		__ASSERT_DEBUG(hzBeingConsumed.Length()+remainderOfHz.Length()==oldLengthOfRemainderOfHz, Panic(EPanicRemainderOfHzHasGotLonger));
       
   467 		if (hzBeingConsumed.Length()==0)
       
   468 			{
       
   469 			break;
       
   470 			}
       
   471 		TInt numberOfUnconvertibleCharacters;
       
   472 		TInt indexOfFirstByteOfFirstUnconvertibleCharacter;
       
   473 		const TInt returnValue2=CCnvCharacterSetConverter::DoConvertToUnicode(gb2312ConversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, gb2312, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, outputConversionFlags, inputConversionFlags);
       
   474 		if (returnValue2<0)
       
   475 			{
       
   476 			return returnValue2; // this is an error-code
       
   477 			}
       
   478 		if (numberOfUnconvertibleCharacters>0)
       
   479 			{
       
   480 			if (aNumberOfUnconvertibleCharacters==0)
       
   481 				{
       
   482 				aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfHzBytesConsumed+Gb2312IndexToHzIndex(hzBeingConsumed, indexOfFirstByteOfFirstUnconvertibleCharacter, EFalse);
       
   483 				}
       
   484 			aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters;
       
   485 			}
       
   486 		if (returnValue2>0)
       
   487 			{
       
   488 			const TInt numberOfGb2312BytesConverted=gb2312.Length()-returnValue2;
       
   489 			__ASSERT_DEBUG(numberOfGb2312BytesConverted>=0, Panic(EPanicBadReturnValue2));
       
   490 			// don't call gb2312.SetLength(numberOfGb2312BytesConverted) as we want to access gb2312[numberOfGb2312BytesConverted] - in any case, gb2312's length is never going to be used again
       
   491 			// don't bother re-setting remainderOfHz as it won't be used again
       
   492 			numberOfHzBytesConsumed+=Gb2312IndexToHzIndex(hzBeingConsumed, numberOfGb2312BytesConverted, ETrue);
       
   493 			aState=(gb2312[numberOfGb2312BytesConverted]&0x80)? KIsInGbBlock: CCnvCharacterSetConverter::KStateDefault; // BBB: if the split (between the text that was converted and the text that wasn't converted) occurs on a boundary between some one-byte and some two-byte text, then aState corresponds to the state *after* the split (the code marked "AAA" checks this) - this means that we set aState according to gb2312[numberOfGb2312BytesConverted] rather than gb2312[numberOfGb2312BytesConverted-1]
       
   494 			break;
       
   495 			}
       
   496 		numberOfHzBytesConsumed+=hzBeingConsumed.Length();
       
   497 		remainderOfHz.Set(aForeign.Mid(numberOfHzBytesConsumed));
       
   498 		__ASSERT_DEBUG(numberOfHzBytesConsumed+remainderOfHz.Length()==aForeign.Length(), Panic(EPanicBadDescriptorSubDivision4));
       
   499 		if (remainderOfHz.Length()==0)
       
   500 			{
       
   501 			break;
       
   502 			}
       
   503 		if (numberOfHzBytesConsumed>0)
       
   504 			{
       
   505 			inputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
       
   506 			}
       
   507 		}
       
   508 	// N.B. remainderOfHz is in an undefined state by this point
       
   509 	if ((numberOfHzBytesConsumed==0) && (outputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated))
       
   510 		{
       
   511 		return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   512 		}
       
   513 	return aForeign.Length()-numberOfHzBytesConsumed;
       
   514 	}
       
   515 
       
   516 TBool CHZConverterImpl::IsInThisCharacterSetL(
       
   517 		TBool& aSetToTrue, 
       
   518 		TInt& aConfidenceLevel, 
       
   519 		const TDesC8& aSample)
       
   520 	{
       
   521 	aSetToTrue=ETrue;
       
   522 	TInt sampleLength = aSample.Length();
       
   523 	TInt pairOfTilde=0;
       
   524 	TInt occrenceOfNonHz=0;
       
   525 	aConfidenceLevel = 50;
       
   526 	// Hz encoding uses escape sequences...
       
   527 	for (TInt i = 0; i < sampleLength; ++i)
       
   528 		{
       
   529 		if (aSample[i]>0x7e)
       
   530 			occrenceOfNonHz++;
       
   531 		if (aSample[i]==0x7e)
       
   532 			{
       
   533 			TInt increment1 = i+1;
       
   534 			if (increment1 >= sampleLength)
       
   535 				break;
       
   536 			if ((aSample[increment1] == 0x7b)||(aSample[increment1] == 0x7d)||(aSample[increment1] == 0x7e))
       
   537 				{
       
   538 				pairOfTilde++;
       
   539 				i++;
       
   540 				}
       
   541 			}
       
   542 		}//for
       
   543 	if (sampleLength)
       
   544 		{
       
   545 		TInt occurrenceOftilde =2*pairOfTilde*100/sampleLength;
       
   546 		aConfidenceLevel=aConfidenceLevel-Max(0,(4-occurrenceOftilde));
       
   547 		aConfidenceLevel += occurrenceOftilde;
       
   548 		aConfidenceLevel -= ((occrenceOfNonHz*100)/sampleLength);
       
   549 		}
       
   550 	return ETrue;
       
   551 	}
       
   552 
       
   553 CHZConverterImpl* CHZConverterImpl::NewL()
       
   554 	{
       
   555 	CHZConverterImpl* self = new(ELeave) CHZConverterImpl();
       
   556 	return self;
       
   557 	}
       
   558 
       
   559 CHZConverterImpl::~CHZConverterImpl()
       
   560 	{
       
   561 	}
       
   562 
       
   563 CHZConverterImpl::CHZConverterImpl()
       
   564 	{
       
   565 	}
       
   566 
       
   567 const TImplementationProxy ImplementationTable[] = 
       
   568 	{
       
   569 		IMPLEMENTATION_PROXY_ENTRY(0x10006065,	CHZConverterImpl::NewL)
       
   570 	};
       
   571 
       
   572 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
       
   573 	{
       
   574 	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
       
   575 
       
   576 	return ImplementationTable;
       
   577 	}