charconvfw/charconvplugins/src/plugins/gb18030.cpp
changeset 0 1fb32624e06b
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description:  GB18030 converter implementation
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 #include <e32std.h>
       
    20 #include <charconv.h>
       
    21 #include <convutils.h>
       
    22 #include <convdata.h>
       
    23 #include "gb2312.h"
       
    24 #include "gbk.h"
       
    25 #include "gb18030_4byte.h"
       
    26 #include "gb18030_diff_gbk.h"
       
    27 #include <ecom/implementationproxy.h>
       
    28 #include <charactersetconverter.h>
       
    29 
       
    30 class CGB18030ConverterImpl : public CCharacterSetConverterPluginInterface
       
    31 	{
       
    32 
       
    33 public:
       
    34 	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
       
    35 
       
    36 	virtual TInt ConvertFromUnicode(
       
    37 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    38 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
    39 		TDes8& aForeign, 
       
    40 		const TDesC16& aUnicode, 
       
    41 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
       
    42 
       
    43 	virtual TInt ConvertToUnicode(
       
    44 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    45 		TDes16& aUnicode, 
       
    46 		const TDesC8& aForeign, 
       
    47 		TInt& aState, 
       
    48 		TInt& aNumberOfUnconvertibleCharacters, 
       
    49 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
       
    50 
       
    51 	virtual TBool IsInThisCharacterSetL(
       
    52 		TBool& aSetToTrue, 
       
    53 		TInt& aConfidenceLevel, 
       
    54 		const TDesC8& aSample);
       
    55 
       
    56 	static CGB18030ConverterImpl* NewL();
       
    57 	virtual ~CGB18030ConverterImpl();
       
    58 
       
    59 private:
       
    60 	CGB18030ConverterImpl();
       
    61 	TInt ConstructL();
       
    62 	SCnvConversionData * completeGb18030_2byteConversionData; // a merged conversion data including Gb18030-diff-g2312, GB2312, Gb18030-diff-gbk and Gbk
       
    63 	TUint8 * workingMemory;
       
    64 	};
       
    65 
       
    66 // Implement gb18030 plug-in using cnvutils framework in which gb2312 and gbk conversion data is re-used for memory saving 
       
    67 // 1) foreign->unicode:
       
    68 //    1.1) 1 byte->unicode bmp: use gb2312 mapping table;
       
    69 //
       
    70 //    1.2) 2 byte->unicode bmp: use gb18030-2byte mapping table (gb18030_diff_gb2312ConversionData + gb2312ConversionData + gb18030_diff_gbkConversionData + gbkConversionData);
       
    71 //
       
    72 //    1.3) 4 byte->unicode bmp: use gb18030-4byte mapping table;
       
    73 //
       
    74 //    1.4) 4 byte->unicode non-bmp: calculate with formula.
       
    75 //
       
    76 // 2) unicode->foreign:
       
    77 //    2.1) firstly check gb18030-2byte mapping table (gb18030_diff_gb2312ConversionData + gb2312ConversionData + gb18030_diff_gbkConversionData + gbkConversionData);
       
    78 //
       
    79 //    2.2) if not found in 2.1), check gb18030-4byte mapping table;
       
    80 //
       
    81 //    2.3) if not found in 2.2), calculate with formula (gb18030-4byte non BMP);
       
    82 //
       
    83 
       
    84 
       
    85 // GB18030-diff-gb2312 defines 1 foreign-to-Unicode range and 2 unicode-to-Foreign range
       
    86 // GB2312.CTL defines 21 foreign-to-Unicode ranges and 21 Unicode-to-foreign ranges
       
    87 // GB18030-diff-gbk defines 1 foreign-to-Unicode ranges and 2 Unicode-to-foreign range
       
    88 // GBK.CTL defines 2 foreign-to-Unicode ranges and 2 Unicode-to-foreign range
       
    89 const TInt KNumberOfBytesOfWorkingMemory=(1+2+21+21+1+2+2+2)*sizeof(SCnvConversionData::SOneDirectionData::SRange);  //totally 1040 bytes
       
    90 
       
    91 const TDesC8& CGB18030ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
       
    92 	{
       
    93 	return CnvGb18030_diff_gbk::ReplacementForUnconvertibleUnicodeCharacters();
       
    94 	}
       
    95 
       
    96 _LIT(KLitPanicText, "GB18030");
       
    97 enum TPanic
       
    98 	{
       
    99 	EPanicNothingToConvert1=1,
       
   100 	EPanicNothingToConvert2,
       
   101 	EPanicNothingToConvert3,
       
   102 	EPanicNothingToConvert4,
       
   103 	EPanicNothingToConvert5,
       
   104 	EPanicNothingToConvert6,
       
   105 	EPanicOddNumberOfBytes1,
       
   106 	EPanicOddNumberOfBytes2,
       
   107 	EPanicOddNumberOfBytes3,
       
   108 	EPanicOddNumberOfBytes4,
       
   109 	EPanicOddNumberOfBytes5,
       
   110 	EPanicOddNumberOfBytes6,
       
   111 	EPanicBadHighBit1,
       
   112 	EPanicBadHighBit2,
       
   113 	EPanicBadHighBit3,
       
   114 	EPanicBadHighBit4,
       
   115 	EPanicBadHighBit5,
       
   116 	EPanicBadHighBit6,
       
   117 	EPanicBadHighBit7,
       
   118 	EPanicBadPointers1,
       
   119 	EPanicBadPointers2,
       
   120 	EPanicBadPointers3,
       
   121 	EPanicBadPointers4,
       
   122 	EPanicBadPointers5,
       
   123 	EPanicBadPointers6,
       
   124 	EPanicBadPointers7,
       
   125 	EPanicBadPointers8,
       
   126 	EPanicBadPointers9,
       
   127 	EPanicBadPointers10,
       
   128 	EPanicBadPointers11,
       
   129 	EPanicBadPointers12,
       
   130 	EPanicBadPointers13,
       
   131 	EPanicBadPointers14,
       
   132 	EPanicBadPointers15,
       
   133 	EPanicBadPointers16,
       
   134 	EPanicBadPointers17,
       
   135 	EPanicBadPointers18,
       
   136 	EPanicBadPointers19,
       
   137 	EPanicBadPointers20,
       
   138 	EPanicBadPointers21,
       
   139 	EPanicBadPointers22,
       
   140 	EPanicBadPointers23,
       
   141 	EPanicBadPointers24,
       
   142 	EPanicBadPointers25,
       
   143 	EPanicBadPointers26,
       
   144 	EPanicBadPointers27,
       
   145 	EPanicBadPointers28,
       
   146 	EPanicBadPointers29,
       
   147 	EPanicBadPointers30,
       
   148 	EPanicBadPointers31,
       
   149 	EPanicBadPointers32,
       
   150 	EPanicBadPointers33,
       
   151 	EPanicBadPointers34,
       
   152 	EPanicBadPointers35,
       
   153 	EPanicBadPointers36,
       
   154 	EPanicBadCalculation1,
       
   155 	EPanicBadCalculation2,
       
   156 	EPanicNumberOfBytesIsNotMultipleOfThree1,
       
   157 	EPanicNumberOfBytesIsNotMultipleOfThree2,
       
   158 	EPanicSingleShift2Expected,
       
   159 	EPanicSingleShift3Expected,
       
   160 	EPanicTooManyBytesOfWorkingMemoryUsed1,
       
   161 	EPanicTooManyBytesOfWorkingMemoryUsed2
       
   162 	};
       
   163 
       
   164 LOCAL_C void Panic(TPanic aPanic)
       
   165 	{
       
   166 	User::Panic(KLitPanicText, aPanic);
       
   167 	}
       
   168 
       
   169 #define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
       
   170 
       
   171 LOCAL_C void Step12DummyConvertFromIntermediateBufferInPlace(TInt, TDes8&, TInt& aNumberOfCharactersThatDroppedOut)
       
   172 	{
       
   173 	aNumberOfCharactersThatDroppedOut=0;
       
   174 	}
       
   175 
       
   176 // Perform the actual conversion (unicode -> gb18030 4byte non-BMP) using formula in this function
       
   177 LOCAL_C void Step3ConvertFromIntermediateBufferInPlace(TInt aStartPositionInDescriptor, TDes8& aDescriptor, TInt& aNumberOfCharactersThatDroppedOut)
       
   178 	{
       
   179 	aNumberOfCharactersThatDroppedOut = 0;		// no drop out, because all GB18030 outside BMP are exactly 4-bytes
       
   180 	
       
   181 	const TInt descriptorLength=aDescriptor.Length();
       
   182 	TUint8* pVeryFrom = CONST_CAST(TUint8*, aDescriptor.Ptr());
       
   183 	const TUint8* pEnd = pVeryFrom + descriptorLength;
       
   184 	TUint8* pFrom = pVeryFrom + aStartPositionInDescriptor;
       
   185 	FOREVER
       
   186 		{
       
   187 		if (pFrom + 4 > pEnd)
       
   188 			{
       
   189 			__ASSERT_DEBUG(pFrom==pEnd, Panic(EPanicBadPointers25));
       
   190 			break;
       
   191 			}
       
   192 		TUint characterCode = 0;
       
   193 		for (TInt i=0; i<4; i++)
       
   194 			{
       
   195 			characterCode <<= 8;
       
   196 			characterCode += pFrom[i];
       
   197 			}
       
   198 		
       
   199 		// to gb18030
       
   200 		characterCode -= 0x10000;
       
   201 		TUint b4 = characterCode % 10 + 0x30;
       
   202 		characterCode /= 10;
       
   203 		TUint b3 = characterCode % 126 + 0x81;
       
   204 		characterCode /= 126;
       
   205 		TUint b2 = characterCode % 10 + 0x30;
       
   206 		TUint b1 = characterCode / 10 + 0x90;
       
   207 		
       
   208 		*pFrom++ = b1;
       
   209 		*pFrom++ = b2;
       
   210 		*pFrom++ = b3;
       
   211 		*pFrom++ = b4;
       
   212 		}
       
   213 	aDescriptor.SetLength(pFrom-pVeryFrom);
       
   214 	}
       
   215 
       
   216 // gb2312-1byte ->unicode (0x00 - 0x7F)
       
   217 LOCAL_C TInt Step0NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor)
       
   218 	{
       
   219 	const TInt descriptorLength=aDescriptor.Length();
       
   220 	const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1;
       
   221 	const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length();
       
   222 	
       
   223 	TInt numOfBytes = 0;
       
   224 	FOREVER
       
   225 		{
       
   226 		if (pointerToPreviousByte>=pointerToLastByte)
       
   227 			{
       
   228 			break;
       
   229 			}
       
   230 		// byte 1
       
   231 		TUint b1 = pointerToPreviousByte[1];
       
   232 		if (b1 <= 0x7F)
       
   233 			{
       
   234 			pointerToPreviousByte++;
       
   235 			numOfBytes++;
       
   236 			}
       
   237 		else
       
   238 			break;
       
   239 		}
       
   240 		return numOfBytes;
       
   241 	}
       
   242 
       
   243 // gb18030-2byte --> unicode (0x8140 - 0xFE7E, 0x8180 - 0xFEFE)
       
   244 LOCAL_C TInt Step1NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor)
       
   245 	{
       
   246 	const TInt descriptorLength=aDescriptor.Length();
       
   247 	const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1;
       
   248 	const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length();
       
   249 
       
   250 	TInt numOfBytes = 0;
       
   251 	FOREVER
       
   252 		{
       
   253 		if (pointerToPreviousByte>=pointerToLastByte)
       
   254 			{
       
   255 			break;
       
   256 			}
       
   257 		// byte 1
       
   258 		TUint b1 = pointerToPreviousByte[1];
       
   259 		if (b1 <= 0x80 || b1 > 0xFE)
       
   260 			break;
       
   261 
       
   262 		// byte 2
       
   263 		if (pointerToPreviousByte+1 >= pointerToLastByte)
       
   264 			break;
       
   265 		TUint b2 = pointerToPreviousByte[2];
       
   266 		if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F)		// all gb18030 2-byte code
       
   267 			{
       
   268 			pointerToPreviousByte = pointerToPreviousByte + 2;
       
   269 			numOfBytes = numOfBytes + 2;
       
   270 			}
       
   271 		else if (b2 < 0x30 || b2 > 0x39)
       
   272 			{
       
   273 			if (numOfBytes <= 0)
       
   274 				return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   275 			else
       
   276 				break;
       
   277 			}
       
   278 		else
       
   279 			break;
       
   280 		}
       
   281 		return numOfBytes;
       
   282 	}
       
   283 
       
   284 
       
   285 // gb18030 4-bytes bmp --> unicode (0x81308130 - 0x8439FE39)
       
   286 LOCAL_C TInt Step2NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor)
       
   287 	{
       
   288 	const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1;
       
   289 	const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length();
       
   290 	__ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastByte, Panic(EPanicBadPointers25));
       
   291 	
       
   292 	TInt numOfBytes = 0;
       
   293 	FOREVER
       
   294 		{
       
   295 		if (pointerToPreviousByte>=pointerToLastByte)
       
   296 			{
       
   297 			break;
       
   298 			}
       
   299 	
       
   300 		// byte 1
       
   301 		TUint b1 = pointerToPreviousByte[1];
       
   302 		if ((b1 < 0x81) || (b1 > 0x84)){
       
   303 			break;
       
   304 		}
       
   305 			
       
   306 		// byte 2
       
   307 		if (pointerToPreviousByte+1 >= pointerToLastByte)
       
   308 			break;
       
   309 		TUint b2 = pointerToPreviousByte[2];
       
   310 		if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F)		// all gb18030 2-byte code
       
   311 			break;
       
   312 		else if (b2 < 0x30 || b2 > 0x39)
       
   313 			{
       
   314 			if (numOfBytes == 0)
       
   315 				return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   316 			else 
       
   317 				break;
       
   318 			}
       
   319 
       
   320 		
       
   321 		// byte 3
       
   322 		if (pointerToPreviousByte+2 >= pointerToLastByte)
       
   323 			break;
       
   324 		TUint b3 = pointerToPreviousByte[3];
       
   325 		if (b3 < 0x81 || b3 > 0xFE)
       
   326 			{
       
   327 			if (numOfBytes == 0)
       
   328 				return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   329 			else 
       
   330 				break;
       
   331 			}
       
   332 		
       
   333 		// byte 4
       
   334 		if (pointerToPreviousByte+3 >= pointerToLastByte)
       
   335 			break;
       
   336 		TUint b4 = pointerToPreviousByte[4];
       
   337 		if (b4 < 0x30 || b4 > 0x39)
       
   338 			{
       
   339 			if (numOfBytes == 0)
       
   340 				return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   341 			else 
       
   342 				break;
       
   343 			}
       
   344 		else
       
   345 			{
       
   346 				numOfBytes = numOfBytes + 4;
       
   347 				pointerToPreviousByte = pointerToPreviousByte+4;
       
   348 			}
       
   349 		}
       
   350 		
       
   351 		return numOfBytes;
       
   352 	}
       
   353 
       
   354 // gb18030 4-bytes non-bmp --> unicode (0x90308130~0xE339FE39)
       
   355 LOCAL_C TInt Step3NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor)
       
   356 	{
       
   357 	const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1;
       
   358 	const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length();
       
   359 	__ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastByte, Panic(EPanicBadPointers25));
       
   360 
       
   361 	TInt numOfBytes = 0;
       
   362 	FOREVER
       
   363 		{
       
   364 		if (pointerToPreviousByte>=pointerToLastByte)
       
   365 			{
       
   366 			break;
       
   367 			}
       
   368 		
       
   369 		// byte 1
       
   370 		TUint b1 = pointerToPreviousByte[1];
       
   371 		if (b1 < 0x90 || b1 > 0xE3)
       
   372 			break;
       
   373 		
       
   374 		// byte 2
       
   375 		if (pointerToPreviousByte+1 >= pointerToLastByte)
       
   376 			break;
       
   377 		TUint b2 = pointerToPreviousByte[2];
       
   378 		if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F)
       
   379 			break;
       
   380 		else if (b2 < 0x30 || b2 > 0x39)
       
   381 			{
       
   382 			if (numOfBytes == 0)
       
   383 				return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   384 			else 
       
   385 				break;
       
   386 			}
       
   387 			
       
   388 		// byte 3
       
   389 		if (pointerToPreviousByte+2 >= pointerToLastByte)
       
   390 			break;
       
   391 		TUint b3 = pointerToPreviousByte[3];
       
   392 		if (b3 < 0x81 || b3 > 0xFE)
       
   393 			{
       
   394 			if (numOfBytes == 0)
       
   395 				return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   396 			else 
       
   397 				break;
       
   398 			}
       
   399 		
       
   400 		// byte 4
       
   401 		if (pointerToPreviousByte+3 >= pointerToLastByte)
       
   402 			break;
       
   403 		TUint b4 = pointerToPreviousByte[4];
       
   404 		if (b4 < 0x30 || b4 > 0x39)
       
   405 			{
       
   406 			if (numOfBytes == 0)
       
   407 				return CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   408 			else 
       
   409 				break;
       
   410 			}
       
   411 		else 
       
   412 			{
       
   413 			numOfBytes = numOfBytes + 4;
       
   414 			pointerToPreviousByte = pointerToPreviousByte + 4;
       
   415 			}
       
   416 		}
       
   417 		return numOfBytes;
       
   418 	}
       
   419 	
       
   420 void Step012DummyConvertToIntermediateBufferInPlace(TDes8&)
       
   421 	{
       
   422 	}
       
   423 
       
   424 // Perform the actual conversion (gb18030 4byte non-BMP -> unicode) using formula in this function
       
   425 LOCAL_C void Step3ConvertToIntermediateBufferInPlace(TDes8& aDescriptor)
       
   426 	{
       
   427 	const TInt descriptorLength=aDescriptor.Length();
       
   428 	__ASSERT_DEBUG(descriptorLength%4 == 0, Panic(EPanicNothingToConvert5));
       
   429 	TUint8* pointerToTargetByte=CONST_CAST(TUint8*, aDescriptor.Ptr());
       
   430 	const TUint8* pointerToSourceByte=pointerToTargetByte;
       
   431 	const TUint8* const pointerToLastByte=pointerToSourceByte+descriptorLength;
       
   432 	
       
   433 	FOREVER
       
   434 		{
       
   435 		if (pointerToLastByte - pointerToSourceByte < 4)
       
   436 			break;
       
   437 		
       
   438 		// conversion
       
   439 		TUint8 b1 = pointerToSourceByte[0];
       
   440 		TUint8 b2 = pointerToSourceByte[1];
       
   441 		TUint8 b3 = pointerToSourceByte[2];
       
   442 		TUint8 b4 = pointerToSourceByte[3];
       
   443 		
       
   444 		TUint characterCode = 0x10000 + (b1 - 0x90) * 12600 +
       
   445 										(b2 - 0x30) * 1260 +
       
   446 										(b3 - 0x81) * 10 +
       
   447 										(b4 - 0x30);
       
   448 		
       
   449 		pointerToTargetByte[0] = ((characterCode >> 24) & 0xFF);
       
   450 		pointerToTargetByte[1] = ((characterCode >> 16) & 0xFF);
       
   451 		pointerToTargetByte[2] = ((characterCode >> 8) & 0xFF);
       
   452 		pointerToTargetByte[3] = (characterCode & 0xFF);
       
   453 		
       
   454 		pointerToSourceByte = pointerToSourceByte + 4;
       
   455 		pointerToTargetByte = pointerToTargetByte + 4;
       
   456 		}
       
   457 	
       
   458 		aDescriptor.SetLength(descriptorLength);
       
   459 	}
       
   460 
       
   461 
       
   462 // A dummy "direct" mapping table for non-Bmp chars in step 3
       
   463 // Use 32-bit Unicode value as intermediate coding
       
   464 LOCAL_D const SCnvConversionData::SVariableByteData::SRange step3ForeignVariableByteDataRanges[]=
       
   465 	{
       
   466 		{
       
   467 		0x00,		// from 0x10000
       
   468 		0x00,		// to  0x10FFFF
       
   469 		3,			// total 4 bytes
       
   470 		0
       
   471 		},		
       
   472 	};
       
   473 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange step3ForeignToUnicodeDataRanges[]=
       
   474 	{
       
   475 		{
       
   476 		0x10000,	// from 0x10000
       
   477 		0x10ffff,	// to  0x10FFFF
       
   478 		SCnvConversionData::SOneDirectionData::SRange::EDirect,
       
   479 		0,
       
   480 		0,
       
   481 			{
       
   482 			0		// map from intermediate to unicode with offset = 0
       
   483 			}
       
   484 		},
       
   485 	};
       
   486 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange step3UnicodeToForeignDataRanges[]=
       
   487 	{
       
   488 		{
       
   489 		0x10000,	//from 0x10000
       
   490 		0x10FFFF, //to 0x10FFFF
       
   491 		SCnvConversionData::SOneDirectionData::SRange::EDirect,
       
   492 		4,			// output byte count = 4
       
   493 		0,
       
   494 			{
       
   495 			0		// offset = 0
       
   496 			}
       
   497 		},
       
   498 	};
       
   499 GLDEF_D const SCnvConversionData step3ConversionData=
       
   500 	{
       
   501 	SCnvConversionData::EFixedBigEndian,
       
   502 		{
       
   503 		ARRAY_LENGTH(step3ForeignVariableByteDataRanges),
       
   504 		step3ForeignVariableByteDataRanges
       
   505 		},
       
   506 		{
       
   507 		ARRAY_LENGTH(step3ForeignToUnicodeDataRanges),
       
   508 		step3ForeignToUnicodeDataRanges
       
   509 		},
       
   510 		{
       
   511 		ARRAY_LENGTH(step3UnicodeToForeignDataRanges),
       
   512 		step3UnicodeToForeignDataRanges
       
   513 		},
       
   514 	NULL,
       
   515 	NULL
       
   516 	};
       
   517 
       
   518 
       
   519 // An internal mapping table to reslove the conflict introduced in symbian GB2312-80 plug-in.
       
   520 // It will be merged into the gb18030-2byte Conversion Data.
       
   521 // It includes mapping: (0xA1A4 -> 0x00B7, 0xA1AA -> 0x2014, 0xA844 <- 0x2015, 0x8139A739 <- 0x30FB)
       
   522 LOCAL_D const SCnvConversionData::SVariableByteData::SRange gb18030_diff_gb2312ForeignVariableByteDataRanges[]=
       
   523 	{
       
   524 		{
       
   525 		0xA1, //from 0xA1A4
       
   526 		0xA1, //to 0xA1AA
       
   527 		1,			
       
   528 		0
       
   529 		},		
       
   530 	};
       
   531 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_foreignToUnicode_1[]=
       
   532 	{
       
   533 			{
       
   534 			0xA1A4,
       
   535 			0x00B7
       
   536 			},
       
   537 			{
       
   538 			0xA1AA,
       
   539 			0x2014
       
   540 			}
       
   541 	};
       
   542 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange gb18030_diff_gb2312ForeignToUnicodeDataRanges[]=
       
   543 	{
       
   544 		{
       
   545 		0xA1A4,
       
   546 		0xA1AA,
       
   547 		SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616,
       
   548 		0,
       
   549 		0,
       
   550 			{
       
   551 			UData_SKeyedTable1616(keyedTable1616_foreignToUnicode_1)
       
   552 			}
       
   553 		},
       
   554 	};
       
   555 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_unicodeToForeign_1[]=
       
   556 	{
       
   557 			{
       
   558 			0x2015,
       
   559 			0xA844
       
   560 			}
       
   561 	};
       
   562 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable3232::SEntry keyedTable3232_unicodeToForeign_1[]=
       
   563 	{
       
   564 			{
       
   565 			0x30FB,
       
   566 			0x8139A739
       
   567 			}
       
   568 	};
       
   569 
       
   570 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange gb18030_diff_gb2312UnicodeToForeignDataRanges[]=
       
   571 	{
       
   572 		{
       
   573 		0x2015,
       
   574 		0x2015,
       
   575 		SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616,
       
   576 		2,			// output byte count = 2
       
   577 		0,
       
   578 			{
       
   579 			UData_SKeyedTable1616(keyedTable1616_unicodeToForeign_1)
       
   580 			}
       
   581 		},
       
   582 		{
       
   583 		0x30FB,
       
   584 		0x30FB,
       
   585 		SCnvConversionData::SOneDirectionData::SRange::EKeyedTable3232,
       
   586 		4,			// output byte count = 4
       
   587 		0,
       
   588 			{
       
   589 			UData_SKeyedTable3232(keyedTable3232_unicodeToForeign_1)
       
   590 			}
       
   591 		},
       
   592 	};
       
   593 GLDEF_D const SCnvConversionData gb18030_diff_gb2312ConversionData=
       
   594 	{
       
   595 	SCnvConversionData::EFixedBigEndian,
       
   596 		{
       
   597 		ARRAY_LENGTH(gb18030_diff_gb2312ForeignVariableByteDataRanges),
       
   598 		gb18030_diff_gb2312ForeignVariableByteDataRanges
       
   599 		},
       
   600 		{
       
   601 		ARRAY_LENGTH(gb18030_diff_gb2312ForeignToUnicodeDataRanges),
       
   602 		gb18030_diff_gb2312ForeignToUnicodeDataRanges
       
   603 		},
       
   604 		{
       
   605 		ARRAY_LENGTH(gb18030_diff_gb2312UnicodeToForeignDataRanges),
       
   606 		gb18030_diff_gb2312UnicodeToForeignDataRanges
       
   607 		},
       
   608 	NULL,
       
   609 	NULL
       
   610 	};
       
   611 
       
   612 LOCAL_D const SCnvConversionData::SVariableByteData::SRange foreignVariableByteDataRanges[]=
       
   613 	{
       
   614 		{
       
   615 		0x00,
       
   616 		0x7f,
       
   617 		0,
       
   618 		0
       
   619 		},
       
   620 		{
       
   621 		0x80,
       
   622 		0xff,
       
   623 		1,
       
   624 		0
       
   625 		}
       
   626 	};
       
   627 
       
   628 LOCAL_C void SetUpCompleteGb18030_2byteConversionData(SCnvConversionData& aCompleteGb18030_2byteConversionData, TUint8* aWorkingMemory)
       
   629 	{
       
   630 	const SCnvConversionData& gb2312ConversionData=CnvGb2312::ConversionData();
       
   631 	const SCnvConversionData& gb18030_diff_gbkConversionData=CnvGb18030_diff_gbk::ConversionData();
       
   632 	const SCnvConversionData& gbkConversionData=CnvGbk::ConversionData();
       
   633 	// create a SCnvConversionData that is the combination of gb18030_diff_gb2312ConversionData, gb2312ConversionData, gb18030_diff_gbkConversionData and gbkConversionData;
       
   634 	aCompleteGb18030_2byteConversionData.iEndiannessOfForeignCharacters=SCnvConversionData::EFixedBigEndian;
       
   635 	aCompleteGb18030_2byteConversionData.iForeignVariableByteData.iNumberOfRanges=ARRAY_LENGTH(foreignVariableByteDataRanges);
       
   636 	aCompleteGb18030_2byteConversionData.iForeignVariableByteData.iRangeArray=foreignVariableByteDataRanges;
       
   637 	TInt numberOfBytesOfWorkingMemoryUsed=0;
       
   638 	
       
   639 	// set up the foreign-to-Unicode data
       
   640 	const TInt numberOfForeignToUnicodeDataRanges=gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb18030_diff_gbkConversionData.iForeignToUnicodeData.iNumberOfRanges + gbkConversionData.iForeignToUnicodeData.iNumberOfRanges;
       
   641 	aCompleteGb18030_2byteConversionData.iForeignToUnicodeData.iNumberOfRanges=numberOfForeignToUnicodeDataRanges;
       
   642 	SCnvConversionData::SOneDirectionData::SRange* foreignToUnicodeDataRangeArray=REINTERPRET_CAST(SCnvConversionData::SOneDirectionData::SRange*, aWorkingMemory+numberOfBytesOfWorkingMemoryUsed);
       
   643 	numberOfBytesOfWorkingMemoryUsed+=(numberOfForeignToUnicodeDataRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
       
   644 	__ASSERT_ALWAYS(numberOfBytesOfWorkingMemoryUsed<=KNumberOfBytesOfWorkingMemory, Panic(EPanicTooManyBytesOfWorkingMemoryUsed1));
       
   645 	aCompleteGb18030_2byteConversionData.iForeignToUnicodeData.iRangeArray=foreignToUnicodeDataRangeArray;
       
   646 	Mem::Copy(foreignToUnicodeDataRangeArray, gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iRangeArray, gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
       
   647 	Mem::Copy(foreignToUnicodeDataRangeArray + gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges, gb2312ConversionData.iForeignToUnicodeData.iRangeArray, gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
       
   648 	Mem::Copy(foreignToUnicodeDataRangeArray + gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges, gb18030_diff_gbkConversionData.iForeignToUnicodeData.iRangeArray, gb18030_diff_gbkConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
       
   649 	Mem::Copy(foreignToUnicodeDataRangeArray + gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb18030_diff_gbkConversionData.iForeignToUnicodeData.iNumberOfRanges, gbkConversionData.iForeignToUnicodeData.iRangeArray, gbkConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
       
   650 
       
   651 	// set up the Unicode-to-foreign data
       
   652 	const TInt numberOfUnicodeToForeignDataRanges=gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb18030_diff_gbkConversionData.iUnicodeToForeignData.iNumberOfRanges + gbkConversionData.iUnicodeToForeignData.iNumberOfRanges;
       
   653 	aCompleteGb18030_2byteConversionData.iUnicodeToForeignData.iNumberOfRanges=numberOfUnicodeToForeignDataRanges;
       
   654 	SCnvConversionData::SOneDirectionData::SRange* unicodeToForeignDataRangeArray=REINTERPRET_CAST(SCnvConversionData::SOneDirectionData::SRange*, aWorkingMemory+numberOfBytesOfWorkingMemoryUsed);
       
   655 	numberOfBytesOfWorkingMemoryUsed+=(numberOfUnicodeToForeignDataRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
       
   656 	__ASSERT_ALWAYS(numberOfBytesOfWorkingMemoryUsed<=KNumberOfBytesOfWorkingMemory, Panic(EPanicTooManyBytesOfWorkingMemoryUsed2));
       
   657 	aCompleteGb18030_2byteConversionData.iUnicodeToForeignData.iRangeArray=unicodeToForeignDataRangeArray;
       
   658 	Mem::Copy(unicodeToForeignDataRangeArray, gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iRangeArray, gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
       
   659 	Mem::Copy(unicodeToForeignDataRangeArray + gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges, gb2312ConversionData.iUnicodeToForeignData.iRangeArray, gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
       
   660 	Mem::Copy(unicodeToForeignDataRangeArray + gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges, gb18030_diff_gbkConversionData.iUnicodeToForeignData.iRangeArray, gb18030_diff_gbkConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
       
   661 	Mem::Copy(unicodeToForeignDataRangeArray + gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb18030_diff_gbkConversionData.iUnicodeToForeignData.iNumberOfRanges, gbkConversionData.iUnicodeToForeignData.iRangeArray, gbkConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
       
   662 	}
       
   663 
       
   664 
       
   665 TInt CGB18030ConverterImpl::ConvertFromUnicode(
       
   666 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   667 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
   668 		TDes8& aForeign, 
       
   669 		const TDesC16& aUnicode, 
       
   670 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
       
   671 	{
       
   672 	TFixedArray<CnvUtilities::SCharacterSet, 3> characterSets;
       
   673 	
       
   674 	// step 1) gb18030-2byte
       
   675 	characterSets[0].iConversionData						= completeGb18030_2byteConversionData;
       
   676 	characterSets[0].iConvertFromIntermediateBufferInPlace	= Step12DummyConvertFromIntermediateBufferInPlace;
       
   677 	characterSets[0].iEscapeSequence						= &KNullDesC8;
       
   678 	
       
   679 	// step 2) gb18030-4byte BMP
       
   680 	characterSets[1].iConversionData						= &CnvGb18030_4byte::ConversionData();
       
   681 	characterSets[1].iConvertFromIntermediateBufferInPlace	= Step12DummyConvertFromIntermediateBufferInPlace;
       
   682 	characterSets[1].iEscapeSequence						= &KNullDesC8;
       
   683 	
       
   684 	// step 3) gb18030-4byte non-BMP
       
   685 	characterSets[2].iConversionData						= &step3ConversionData;
       
   686 	characterSets[2].iConvertFromIntermediateBufferInPlace	= Step3ConvertFromIntermediateBufferInPlace;
       
   687 	characterSets[2].iEscapeSequence						= &KNullDesC8;
       
   688 	
       
   689 	return CnvUtilities::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters, characterSets.Array());
       
   690 	}
       
   691 
       
   692 
       
   693 TInt CGB18030ConverterImpl::ConvertToUnicode(
       
   694 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   695 		TDes16& aUnicode, 
       
   696 		const TDesC8& aForeign, 
       
   697 		TInt& /*aState*/, 
       
   698 		TInt& aNumberOfUnconvertibleCharacters, 
       
   699 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
       
   700 	{
       
   701 	TFixedArray<CnvUtilities::SMethod, 4> methods;
       
   702 	// step 0) gb2312-1byte
       
   703 	methods[0].iNumberOfBytesAbleToConvert			= Step0NumberOfBytesAbleToConvertToUnicode;
       
   704 	methods[0].iConvertToIntermediateBufferInPlace	= Step012DummyConvertToIntermediateBufferInPlace;
       
   705 	methods[0].iConversionData						= &CnvGb2312::ConversionData(); //only use one byte part
       
   706 	methods[0].iNumberOfBytesPerCharacter			= 1;
       
   707 	methods[0].iNumberOfCoreBytesPerCharacter		= 1;
       
   708 	
       
   709 	// step 1) gb18030-2byte
       
   710 	methods[1].iNumberOfBytesAbleToConvert			= Step1NumberOfBytesAbleToConvertToUnicode;
       
   711 	methods[1].iConvertToIntermediateBufferInPlace	= Step012DummyConvertToIntermediateBufferInPlace;
       
   712 	methods[1].iConversionData						= completeGb18030_2byteConversionData;
       
   713 	methods[1].iNumberOfBytesPerCharacter			= 2;
       
   714 	methods[1].iNumberOfCoreBytesPerCharacter		= 2;
       
   715 	
       
   716 	// step 2) gb18030 4-byte BMP
       
   717 	methods[2].iNumberOfBytesAbleToConvert			= Step2NumberOfBytesAbleToConvertToUnicode;
       
   718 	methods[2].iConvertToIntermediateBufferInPlace	= Step012DummyConvertToIntermediateBufferInPlace;
       
   719 	methods[2].iConversionData						= &CnvGb18030_4byte::ConversionData();
       
   720 	methods[2].iNumberOfBytesPerCharacter			= 4;
       
   721 	methods[2].iNumberOfCoreBytesPerCharacter		= 4;
       
   722 
       
   723 	// step 3) gb18030 4-byte non-BMP
       
   724 	methods[3].iNumberOfBytesAbleToConvert			= Step3NumberOfBytesAbleToConvertToUnicode;
       
   725 	methods[3].iConvertToIntermediateBufferInPlace	= Step3ConvertToIntermediateBufferInPlace;
       
   726 	methods[3].iConversionData						= &step3ConversionData;
       
   727 	methods[3].iNumberOfBytesPerCharacter			= 4;
       
   728 	methods[3].iNumberOfCoreBytesPerCharacter		= 4;
       
   729 	
       
   730 	return CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter, methods.Array());
       
   731 	}
       
   732 
       
   733 TBool CGB18030ConverterImpl::IsInThisCharacterSetL(
       
   734 		TBool& aSetToTrue, 
       
   735 		TInt& aConfidenceLevel, 
       
   736 		const TDesC8& aSample)
       
   737 	{
       
   738 	aSetToTrue = ETrue;
       
   739 	return CnvGb2312::IsCharGBBased(aConfidenceLevel, aSample);
       
   740 	}
       
   741 
       
   742 CGB18030ConverterImpl* CGB18030ConverterImpl::NewL()
       
   743 	{
       
   744 	CGB18030ConverterImpl* self = new(ELeave) CGB18030ConverterImpl();
       
   745 	CleanupStack::PushL(self);
       
   746 	self->ConstructL();
       
   747 	CleanupStack::Pop(); // self
       
   748 	return self;
       
   749 	}
       
   750 
       
   751 CGB18030ConverterImpl::~CGB18030ConverterImpl()
       
   752 	{
       
   753 	if (workingMemory)
       
   754 		delete[] workingMemory;
       
   755 	if (completeGb18030_2byteConversionData)
       
   756 		delete completeGb18030_2byteConversionData;
       
   757 	}
       
   758 
       
   759 CGB18030ConverterImpl::CGB18030ConverterImpl()
       
   760 	{
       
   761 	}
       
   762 
       
   763 TInt CGB18030ConverterImpl::ConstructL()
       
   764 	{
       
   765 	completeGb18030_2byteConversionData = new (ELeave)SCnvConversionData;
       
   766 	CleanupStack::PushL(completeGb18030_2byteConversionData);
       
   767 	workingMemory = new (ELeave) TUint8[KNumberOfBytesOfWorkingMemory]; //1040 bytes
       
   768 	CleanupStack::Pop(); // completeGb18030_2byteConversionData
       
   769 	SetUpCompleteGb18030_2byteConversionData(*completeGb18030_2byteConversionData, workingMemory);
       
   770 	return 1;
       
   771 	}
       
   772 
       
   773 const TImplementationProxy ImplementationTable[] = 
       
   774 	{
       
   775 		IMPLEMENTATION_PROXY_ENTRY(0x10287038,CGB18030ConverterImpl::NewL)
       
   776 	};
       
   777 
       
   778 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
       
   779 	{
       
   780 	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
       
   781 
       
   782 	return ImplementationTable;
       
   783 	}
       
   784