diff -r 000000000000 -r 1fb32624e06b charconvfw/charconvplugins/src/plugins/big5.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/charconvfw/charconvplugins/src/plugins/big5.cpp Tue Feb 02 02:02:46 2010 +0200 @@ -0,0 +1,198 @@ +/* +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + + +#include +#include +#include "big5.h" +#include +#include + +class CBIG5ConverterImpl : public CCharacterSetConverterPluginInterface + { + +public: + virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters(); + + virtual TInt ConvertFromUnicode( + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, + const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, + TDes8& aForeign, + const TDesC16& aUnicode, + CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters); + + virtual TInt ConvertToUnicode( + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, + TDes16& aUnicode, + const TDesC8& aForeign, + TInt& aState, + TInt& aNumberOfUnconvertibleCharacters, + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter); + + virtual TBool IsInThisCharacterSetL( + TBool& aSetToTrue, + TInt& aConfidenceLevel, + const TDesC8& aSample); + + static CBIG5ConverterImpl* NewL(); + virtual ~CBIG5ConverterImpl(); + +private: + CBIG5ConverterImpl(); + + }; + + +const TDesC8& CBIG5ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters() + { + return CnvBig5::ReplacementForUnconvertibleUnicodeCharacters(); + } + +TInt CBIG5ConverterImpl::ConvertFromUnicode( + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, + const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, + TDes8& aForeign, + const TDesC16& aUnicode, + CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) + { + return CCnvCharacterSetConverter::DoConvertFromUnicode(CnvBig5::ConversionData(), aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters); + } + +TInt CBIG5ConverterImpl::ConvertToUnicode( + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, + TDes16& aUnicode, + const TDesC8& aForeign, + TInt& /*aState*/, + TInt& aNumberOfUnconvertibleCharacters, + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) + { + return CCnvCharacterSetConverter::DoConvertToUnicode(CnvBig5::ConversionData(), aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter); + } + +TBool CBIG5ConverterImpl::IsInThisCharacterSetL( + TBool& aSetToTrue, + TInt& aConfidenceLevel, + const TDesC8& aSample) + { + aSetToTrue=ETrue; + TInt sampleLength = aSample.Length(); + aConfidenceLevel = 0; + //WBB the following is for distiguish between big5 and GBK + TInt totalWeight=0; //sum of the weights of 20 most frequent chars + TInt sumOfGoodChar=0; //the number of chars whose first byte and second are both in the range + TInt sumOfWeight=0; //sum of the weights of the chars which are included in the sample + TInt sumOutChar=0; //the number of chars which are not common + TInt sumOfBadSecondByte=0;//the number of chars whose first byte is in the range but not the second + struct referenceChar + { + TUint charBig5; + TInt weight; + }; + + referenceChar refBig5[20]; + static const TInt iniWeight[20]= + { + //occurence per 1000 chars + 30,20,20,10,10,10,10,10,5,5, + 5,5,5,5,5,5,5,5,5,5 + }; + + static const TUint iniChar[20]= + { + 0xa141,0xaaba,0xa446,0xadd3,0xa4a3,0xa7e2,0xa440,0xac4f,0xad6e,0xa45d, + 0xa4d1,0xa457,0xa457,0xa94d,0xa4a4,0xa569,0xa662,0xa470,0xa448,0xa455 + }; + + for (TInt k=0; k<20; k++) + { + refBig5[k].charBig5=iniChar[k]; + refBig5[k].weight=iniWeight[k]; + totalWeight=totalWeight+iniWeight[k]; + } + //WBB + for (TInt i = 0; i < sampleLength; ++i) + { + // Big 5 encoding first byte range 0xA1-0xFE + // second byte range 0x40-0x7E 0xA1-0xFE + if((aSample[i] >= 0xa1) && (aSample[i] <= 0xfe)) + { + TInt increment1 = i+1; + if (increment1 >= sampleLength) + break; + if(((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) || + ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe))) + { + TUint charBig5=(aSample[i]<<8)|(aSample[increment1]); + if (charBig5>=0xc6a1)//Kanas start and rare chars follow after + sumOutChar++; + TInt j; + for (j=0; j<20; j++) + { + if (charBig5==refBig5[j].charBig5) + { + sumOfWeight=sumOfWeight+refBig5[j].weight; + break; + } + } + sumOfGoodChar++; + i++; + } + else + { + sumOfBadSecondByte++; + } + } + } // for + + if (sumOfGoodChar) + { + aConfidenceLevel=sumOfGoodChar*100/(sumOfBadSecondByte+sumOfGoodChar); + aConfidenceLevel=aConfidenceLevel-Max(0,((totalWeight-sumOfWeight)*sumOfGoodChar/1000));//against frequent chars + aConfidenceLevel=aConfidenceLevel-sumOutChar*100/sumOfGoodChar;//against gap + aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel; + } + else + aConfidenceLevel=0; + return ETrue; + } + +CBIG5ConverterImpl* CBIG5ConverterImpl::NewL() + { + CBIG5ConverterImpl* self = new(ELeave) CBIG5ConverterImpl(); + return self; + } + +CBIG5ConverterImpl::~CBIG5ConverterImpl() + { + } + +CBIG5ConverterImpl::CBIG5ConverterImpl() + { + } + +const TImplementationProxy ImplementationTable[] = + { + IMPLEMENTATION_PROXY_ENTRY(0x10000FBF,CBIG5ConverterImpl::NewL) + }; + +EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount) + { + aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy); + + return ImplementationTable; + } +