diff -r 000000000000 -r 1fb32624e06b charconvfw/charconvplugins/src/shared/gb2312_shared.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/charconvfw/charconvplugins/src/shared/gb2312_shared.cpp Tue Feb 02 02:02:46 2010 +0200 @@ -0,0 +1,122 @@ +/* +* Copyright (c) 2000-2009 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + + +#include +#include +#include "gb2312.h" + +struct SCnvConversionData; + +EXPORT_C const TDesC8& CnvGb2312::ReplacementForUnconvertibleUnicodeCharacters() + { + return ReplacementForUnconvertibleUnicodeCharacters_internal(); + } + +EXPORT_C const SCnvConversionData& CnvGb2312::ConversionData() + { + return conversionData; + } + +EXPORT_C TBool CnvGb2312::IsCharGBBased(TInt& aConfidenceLevel, const TDesC8& aSample) + { + TInt sampleLength = aSample.Length(); + aConfidenceLevel = 0; + //WBB the following is for distiguish between big5 and GBK + TInt totalWeight=0; //sum of the weights of 20 most frequent chars + TInt sumOfGoodChar=0; //the number of chars whose first byte and second are both in the range + TInt sumOfWeight=0; //sum of the weights of the chars which are included in the sample + TInt sumOutChar=0; //the number of chars which are not common + TInt sumOfBadSecondByte=0;//the number of chars whose first byte is in the range but not the second + struct referenceChar + { + TUint charGBK; + TInt weight; + }; + + referenceChar refGbk[20]; + static const TInt iniWeight[20]= + { + //occurence per 1000 chars + 30,20,20,10,10,10,10,10,5,5, + 5,5,5,5,5,5,5,5,5,5 + }; + + static const TUint iniChar[20]= + { + 0xa3ac,0xb5c4,0xc1cb,0xb8f6,0xb2bb,0xb0d1,0xd2bb,0xcac7,0xd2aa,0xbecd, + 0xd2b2,0xccec,0xc9cf,0xbacd,0xd6d0,0xd4da,0xd0a1,0xc8cb,0xcfc2,0xd6d0, + }; + + for (TInt k=0; k<20; k++) + { + refGbk[k].charGBK=iniChar[k]; + refGbk[k].weight=iniWeight[k]; + totalWeight=totalWeight+iniWeight[k]; + } + + + //WBB + for (TInt i = 0; i < sampleLength; ++i) + { + //GBK encoding first byte range 0x81-0xfe + // second byte range 0x40-0x7e, 0x80-0xfe + if((aSample[i] >= 0x81) && (aSample[i] <= 0xfe)) + { + TInt increment1 = i+1; + if (increment1 >= sampleLength) + break; + if (((aSample[increment1] >=0x40) && (aSample[increment1] <= 0x7e)) || + ((aSample[increment1] >=0x80) && (aSample[increment1] <= 0xfe))) + { + //WBB + TUint charGbk=(aSample[i]<<8)|(aSample[increment1]); + TInt j; + for (j=0; j<20; j++) + { + if (charGbk==refGbk[j].charGBK) + { + sumOfWeight=sumOfWeight+refGbk[j].weight; + break; + } + } + if ((aSample[i]>=0xa4)&&(aSample[i]<=0xaf)) + sumOutChar++; + sumOfGoodChar++; + i++; + //WBB + } + else + { + sumOfBadSecondByte++; + } + } + } // for + + TInt limit; + limit = (10*sampleLength)/100; + if (sumOfGoodChar > limit) + { + aConfidenceLevel=sumOfGoodChar*100/(sumOfBadSecondByte+sumOfGoodChar); + aConfidenceLevel=aConfidenceLevel-Max(0,((totalWeight-sumOfWeight)*sumOfGoodChar/1000));//against frequent chars + aConfidenceLevel=aConfidenceLevel-(sumOutChar*100/sumOfGoodChar);//against gap + aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel; + } + else + aConfidenceLevel=0; + return ETrue; + }