charconvfw/Charconv/ongoing/Source/foreign/plugins/BIG5.CPP
changeset 0 1fb32624e06b
child 16 56cd22a7a1cb
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 1997-2004 Nokia Corporation and/or its subsidiary(-ies). 
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description:      
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 
       
    20 
       
    21 
       
    22 
       
    23 
       
    24 
       
    25 #include <e32std.h>
       
    26 #include <charconv.h>
       
    27 #include <big5.h>
       
    28 #include <ecom/implementationproxy.h>
       
    29 #include "charactersetconverter.h"
       
    30 
       
    31 class CBIG5ConverterImpl : public CCharacterSetConverterPluginInterface
       
    32 	{
       
    33 
       
    34 public:
       
    35 	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
       
    36 
       
    37 	virtual TInt ConvertFromUnicode(
       
    38 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    39 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
    40 		TDes8& aForeign, 
       
    41 		const TDesC16& aUnicode, 
       
    42 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
       
    43 
       
    44 	virtual TInt ConvertToUnicode(
       
    45 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    46 		TDes16& aUnicode, 
       
    47 		const TDesC8& aForeign, 
       
    48 		TInt& aState, 
       
    49 		TInt& aNumberOfUnconvertibleCharacters, 
       
    50 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
       
    51 
       
    52 	virtual TBool IsInThisCharacterSetL(
       
    53 		TBool& aSetToTrue, 
       
    54 		TInt& aConfidenceLevel, 
       
    55 		const TDesC8& aSample);
       
    56 
       
    57 	static CBIG5ConverterImpl* NewL();
       
    58 	virtual ~CBIG5ConverterImpl();
       
    59 
       
    60 private:
       
    61 	CBIG5ConverterImpl();
       
    62 	void ConstructL();
       
    63 
       
    64 	};
       
    65 
       
    66 
       
    67 const TDesC8& CBIG5ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
       
    68 	{
       
    69 	return CnvBig5::ReplacementForUnconvertibleUnicodeCharacters();
       
    70 	}
       
    71 
       
    72 TInt CBIG5ConverterImpl::ConvertFromUnicode(
       
    73 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    74 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
    75 		TDes8& aForeign, 
       
    76 		const TDesC16& aUnicode, 
       
    77 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
       
    78 	{
       
    79 	return CCnvCharacterSetConverter::DoConvertFromUnicode(CnvBig5::ConversionData(), aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
       
    80 	}
       
    81 
       
    82 TInt CBIG5ConverterImpl::ConvertToUnicode(
       
    83 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    84 		TDes16& aUnicode, 
       
    85 		const TDesC8& aForeign, 
       
    86 		TInt& /*aState*/, 
       
    87 		TInt& aNumberOfUnconvertibleCharacters, 
       
    88 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
       
    89 	{
       
    90 	return CCnvCharacterSetConverter::DoConvertToUnicode(CnvBig5::ConversionData(), aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
       
    91 	}
       
    92 
       
    93 TBool CBIG5ConverterImpl::IsInThisCharacterSetL(
       
    94 		TBool& aSetToTrue, 
       
    95 		TInt& aConfidenceLevel, 
       
    96 		const TDesC8& aSample)
       
    97 	{
       
    98 	aSetToTrue=ETrue;
       
    99 	TInt sampleLength = aSample.Length();
       
   100 	aConfidenceLevel = 0;
       
   101 	//WBB the following is for distiguish between big5 and GBK
       
   102 	TInt totalWeight=0;		//sum of the weights of 20 most frequent chars
       
   103 	TInt sumOfGoodChar=0;		//the number of chars whose first byte and second are both in the range
       
   104 	TInt sumOfWeight=0;		//sum of the weights of the chars which are included in the sample
       
   105 	TInt sumOutChar=0;		//the number of chars which are not common
       
   106 	TInt sumOfBadSecondByte=0;//the number of chars whose first byte is in the range but not the second
       
   107 	struct referenceChar
       
   108 		{
       
   109 		TUint charBig5;
       
   110 		TInt weight;
       
   111 		};
       
   112 
       
   113 	referenceChar refBig5[20];
       
   114 	const TInt iniWeight[20]=
       
   115 		{
       
   116 		//occurence per 1000 chars
       
   117 		30,20,20,10,10,10,10,10,5,5,
       
   118 		5,5,5,5,5,5,5,5,5,5
       
   119 		};
       
   120 
       
   121 	const TUint iniChar[20]=
       
   122 		{
       
   123 		0xa141,0xaaba,0xa446,0xadd3,0xa4a3,0xa7e2,0xa440,0xac4f,0xad6e,0xa45d,
       
   124 		0xa4d1,0xa457,0xa457,0xa94d,0xa4a4,0xa569,0xa662,0xa470,0xa448,0xa455
       
   125 		};
       
   126 
       
   127 	for (TInt k=0; k<20; k++)
       
   128 		{
       
   129 		refBig5[k].charBig5=iniChar[k];
       
   130 		refBig5[k].weight=iniWeight[k];
       
   131 		totalWeight=totalWeight+iniWeight[k];
       
   132 		}
       
   133 	//WBB
       
   134 	for (TInt i = 0; i < sampleLength; ++i)
       
   135 		{
       
   136 		// Big 5 encoding first byte range 0xA1-0xFE 
       
   137 		//                second byte range 0x40-0x7E  0xA1-0xFE
       
   138 		if((aSample[i] >= 0xa1) && (aSample[i] <= 0xfe))
       
   139 			{
       
   140 			TInt increment1 = i+1;
       
   141 			if (increment1 >= sampleLength)
       
   142 				break;
       
   143 			if(((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) ||
       
   144 				((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)))
       
   145 				{
       
   146 				TUint charBig5=(aSample[i]<<8)|(aSample[increment1]);
       
   147 				if (charBig5>=0xc6a1)//Kanas start and rare chars follow after 
       
   148 					sumOutChar++;
       
   149 				TInt j;
       
   150 				for (j=0; j<20; j++)
       
   151 					{
       
   152 					if (charBig5==refBig5[j].charBig5)
       
   153 						{
       
   154 						sumOfWeight=sumOfWeight+refBig5[j].weight;
       
   155 						break;
       
   156 						}
       
   157 					}
       
   158 				sumOfGoodChar++;
       
   159 				i++;
       
   160 				}
       
   161 			else
       
   162 				{
       
   163 				sumOfBadSecondByte++;
       
   164 				}
       
   165 			}
       
   166 		} // for 
       
   167 
       
   168 	if (sumOfGoodChar)
       
   169 		{
       
   170 		aConfidenceLevel=sumOfGoodChar*100/(sumOfBadSecondByte+sumOfGoodChar);
       
   171 		aConfidenceLevel=aConfidenceLevel-Max(0,((totalWeight-sumOfWeight)*sumOfGoodChar/1000));//against frequent chars 
       
   172 		aConfidenceLevel=aConfidenceLevel-sumOutChar*100/sumOfGoodChar;//against gap
       
   173 		aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
       
   174 		}
       
   175 	else
       
   176 		aConfidenceLevel=0;
       
   177 	return ETrue;
       
   178 	}
       
   179 
       
   180 CBIG5ConverterImpl* CBIG5ConverterImpl::NewL()
       
   181 	{
       
   182 	CBIG5ConverterImpl* self = new(ELeave) CBIG5ConverterImpl();
       
   183 	CleanupStack::PushL(self);
       
   184 	self->ConstructL();
       
   185 	CleanupStack::Pop(self);
       
   186 	return self;
       
   187 	}
       
   188 
       
   189 CBIG5ConverterImpl::~CBIG5ConverterImpl()
       
   190 	{
       
   191 	}
       
   192 
       
   193 CBIG5ConverterImpl::CBIG5ConverterImpl()
       
   194 	{
       
   195 	}
       
   196 
       
   197 void CBIG5ConverterImpl::ConstructL()
       
   198 	{
       
   199 	}
       
   200 
       
   201 const TImplementationProxy ImplementationTable[] = 
       
   202 	{
       
   203 		IMPLEMENTATION_PROXY_ENTRY(0x10000FBF,CBIG5ConverterImpl::NewL)
       
   204 	};
       
   205 
       
   206 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
       
   207 	{
       
   208 	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
       
   209 
       
   210 	return ImplementationTable;
       
   211 	}
       
   212