charconvfw/charconvplugins/src/shared/gb2312_shared.cpp
changeset 0 1fb32624e06b
child 28 26914f8d1faf
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 2000-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 #include <e32std.h>
       
    20 #include <convgeneratedcpp.h>
       
    21 #include "gb2312.h"
       
    22 
       
    23 struct SCnvConversionData;
       
    24 
       
    25 EXPORT_C const TDesC8& CnvGb2312::ReplacementForUnconvertibleUnicodeCharacters()
       
    26 	{
       
    27 	return ReplacementForUnconvertibleUnicodeCharacters_internal();
       
    28 	}
       
    29 
       
    30 EXPORT_C const SCnvConversionData& CnvGb2312::ConversionData()
       
    31 	{
       
    32 	return conversionData;
       
    33 	}
       
    34 
       
    35 EXPORT_C TBool CnvGb2312::IsCharGBBased(TInt& aConfidenceLevel, const TDesC8& aSample)
       
    36 	{
       
    37 	TInt sampleLength = aSample.Length();
       
    38 	aConfidenceLevel = 0;
       
    39 	//WBB the following is for distiguish between big5 and GBK
       
    40 	TInt totalWeight=0;		//sum of the weights of 20 most frequent chars
       
    41 	TInt sumOfGoodChar=0;		//the number of chars whose first byte and second are both in the range
       
    42 	TInt sumOfWeight=0;		//sum of the weights of the chars which are included in the sample
       
    43 	TInt sumOutChar=0;		//the number of chars which are not common
       
    44 	TInt sumOfBadSecondByte=0;//the number of chars whose first byte is in the range but not the second
       
    45 	struct referenceChar
       
    46 		{
       
    47 		TUint charGBK;
       
    48 		TInt weight;
       
    49 		};
       
    50 
       
    51 	referenceChar refGbk[20];
       
    52 	static const TInt iniWeight[20]=
       
    53 		{
       
    54 		//occurence per 1000 chars
       
    55 		30,20,20,10,10,10,10,10,5,5,
       
    56 		5,5,5,5,5,5,5,5,5,5
       
    57 		};
       
    58 
       
    59 	static const TUint iniChar[20]=
       
    60 		{
       
    61 		0xa3ac,0xb5c4,0xc1cb,0xb8f6,0xb2bb,0xb0d1,0xd2bb,0xcac7,0xd2aa,0xbecd,
       
    62 		0xd2b2,0xccec,0xc9cf,0xbacd,0xd6d0,0xd4da,0xd0a1,0xc8cb,0xcfc2,0xd6d0,
       
    63 		};
       
    64 
       
    65 	for (TInt k=0; k<20; k++)
       
    66 		{
       
    67 		refGbk[k].charGBK=iniChar[k];
       
    68 		refGbk[k].weight=iniWeight[k];
       
    69 		totalWeight=totalWeight+iniWeight[k];
       
    70 		}
       
    71 
       
    72 	
       
    73 	//WBB
       
    74 	for (TInt i = 0; i < sampleLength; ++i)
       
    75 		{
       
    76 		//GBK encoding first byte range 0x81-0xfe
       
    77 		//              second byte range 0x40-0x7e, 0x80-0xfe
       
    78 		if((aSample[i] >= 0x81) && (aSample[i] <= 0xfe))
       
    79 			{
       
    80 			TInt increment1 = i+1;
       
    81 			if (increment1 >= sampleLength)
       
    82 				break;
       
    83 			if (((aSample[increment1] >=0x40) && (aSample[increment1] <= 0x7e)) ||
       
    84 				((aSample[increment1] >=0x80) && (aSample[increment1] <= 0xfe)))
       
    85 				{
       
    86 				//WBB
       
    87 				TUint charGbk=(aSample[i]<<8)|(aSample[increment1]);
       
    88 				TInt j;
       
    89 				for (j=0; j<20; j++)
       
    90 					{
       
    91 					if (charGbk==refGbk[j].charGBK)
       
    92 						{
       
    93 						sumOfWeight=sumOfWeight+refGbk[j].weight;
       
    94 						break;
       
    95 						}
       
    96 					}
       
    97 				if ((aSample[i]>=0xa4)&&(aSample[i]<=0xaf))
       
    98 					sumOutChar++;
       
    99 				sumOfGoodChar++;
       
   100 				i++;
       
   101 				//WBB
       
   102 				}
       
   103 			else
       
   104 				{
       
   105 				sumOfBadSecondByte++;				
       
   106 				}
       
   107 			}
       
   108 		} // for 
       
   109 
       
   110 	TInt limit;
       
   111 	limit = (10*sampleLength)/100;
       
   112 	if (sumOfGoodChar > limit)
       
   113 		{
       
   114 		aConfidenceLevel=sumOfGoodChar*100/(sumOfBadSecondByte+sumOfGoodChar);
       
   115 		aConfidenceLevel=aConfidenceLevel-Max(0,((totalWeight-sumOfWeight)*sumOfGoodChar/1000));//against frequent chars 
       
   116 		aConfidenceLevel=aConfidenceLevel-(sumOutChar*100/sumOfGoodChar);//against gap
       
   117 		aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
       
   118 		}
       
   119 	else
       
   120 		aConfidenceLevel=0;
       
   121 	return ETrue;
       
   122 	}