|
1 /* |
|
2 * Copyright (c) 1997-2004 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
|
22 |
|
23 |
|
24 |
|
25 #include <e32std.h> |
|
26 #include <charconv.h> |
|
27 #include <big5.h> |
|
28 #include <ecom/implementationproxy.h> |
|
29 #include "charactersetconverter.h" |
|
30 |
|
31 class CBIG5ConverterImpl : public CCharacterSetConverterPluginInterface |
|
32 { |
|
33 |
|
34 public: |
|
35 virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters(); |
|
36 |
|
37 virtual TInt ConvertFromUnicode( |
|
38 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
39 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, |
|
40 TDes8& aForeign, |
|
41 const TDesC16& aUnicode, |
|
42 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters); |
|
43 |
|
44 virtual TInt ConvertToUnicode( |
|
45 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
46 TDes16& aUnicode, |
|
47 const TDesC8& aForeign, |
|
48 TInt& aState, |
|
49 TInt& aNumberOfUnconvertibleCharacters, |
|
50 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter); |
|
51 |
|
52 virtual TBool IsInThisCharacterSetL( |
|
53 TBool& aSetToTrue, |
|
54 TInt& aConfidenceLevel, |
|
55 const TDesC8& aSample); |
|
56 |
|
57 static CBIG5ConverterImpl* NewL(); |
|
58 virtual ~CBIG5ConverterImpl(); |
|
59 |
|
60 private: |
|
61 CBIG5ConverterImpl(); |
|
62 void ConstructL(); |
|
63 |
|
64 }; |
|
65 |
|
66 |
|
67 const TDesC8& CBIG5ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters() |
|
68 { |
|
69 return CnvBig5::ReplacementForUnconvertibleUnicodeCharacters(); |
|
70 } |
|
71 |
|
72 TInt CBIG5ConverterImpl::ConvertFromUnicode( |
|
73 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
74 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, |
|
75 TDes8& aForeign, |
|
76 const TDesC16& aUnicode, |
|
77 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) |
|
78 { |
|
79 return CCnvCharacterSetConverter::DoConvertFromUnicode(CnvBig5::ConversionData(), aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters); |
|
80 } |
|
81 |
|
82 TInt CBIG5ConverterImpl::ConvertToUnicode( |
|
83 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
84 TDes16& aUnicode, |
|
85 const TDesC8& aForeign, |
|
86 TInt& /*aState*/, |
|
87 TInt& aNumberOfUnconvertibleCharacters, |
|
88 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) |
|
89 { |
|
90 return CCnvCharacterSetConverter::DoConvertToUnicode(CnvBig5::ConversionData(), aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter); |
|
91 } |
|
92 |
|
93 TBool CBIG5ConverterImpl::IsInThisCharacterSetL( |
|
94 TBool& aSetToTrue, |
|
95 TInt& aConfidenceLevel, |
|
96 const TDesC8& aSample) |
|
97 { |
|
98 aSetToTrue=ETrue; |
|
99 TInt sampleLength = aSample.Length(); |
|
100 aConfidenceLevel = 0; |
|
101 //WBB the following is for distiguish between big5 and GBK |
|
102 TInt totalWeight=0; //sum of the weights of 20 most frequent chars |
|
103 TInt sumOfGoodChar=0; //the number of chars whose first byte and second are both in the range |
|
104 TInt sumOfWeight=0; //sum of the weights of the chars which are included in the sample |
|
105 TInt sumOutChar=0; //the number of chars which are not common |
|
106 TInt sumOfBadSecondByte=0;//the number of chars whose first byte is in the range but not the second |
|
107 struct referenceChar |
|
108 { |
|
109 TUint charBig5; |
|
110 TInt weight; |
|
111 }; |
|
112 |
|
113 referenceChar refBig5[20]; |
|
114 const TInt iniWeight[20]= |
|
115 { |
|
116 //occurence per 1000 chars |
|
117 30,20,20,10,10,10,10,10,5,5, |
|
118 5,5,5,5,5,5,5,5,5,5 |
|
119 }; |
|
120 |
|
121 const TUint iniChar[20]= |
|
122 { |
|
123 0xa141,0xaaba,0xa446,0xadd3,0xa4a3,0xa7e2,0xa440,0xac4f,0xad6e,0xa45d, |
|
124 0xa4d1,0xa457,0xa457,0xa94d,0xa4a4,0xa569,0xa662,0xa470,0xa448,0xa455 |
|
125 }; |
|
126 |
|
127 for (TInt k=0; k<20; k++) |
|
128 { |
|
129 refBig5[k].charBig5=iniChar[k]; |
|
130 refBig5[k].weight=iniWeight[k]; |
|
131 totalWeight=totalWeight+iniWeight[k]; |
|
132 } |
|
133 //WBB |
|
134 for (TInt i = 0; i < sampleLength; ++i) |
|
135 { |
|
136 // Big 5 encoding first byte range 0xA1-0xFE |
|
137 // second byte range 0x40-0x7E 0xA1-0xFE |
|
138 if((aSample[i] >= 0xa1) && (aSample[i] <= 0xfe)) |
|
139 { |
|
140 TInt increment1 = i+1; |
|
141 if (increment1 >= sampleLength) |
|
142 break; |
|
143 if(((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) || |
|
144 ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe))) |
|
145 { |
|
146 TUint charBig5=(aSample[i]<<8)|(aSample[increment1]); |
|
147 if (charBig5>=0xc6a1)//Kanas start and rare chars follow after |
|
148 sumOutChar++; |
|
149 TInt j; |
|
150 for (j=0; j<20; j++) |
|
151 { |
|
152 if (charBig5==refBig5[j].charBig5) |
|
153 { |
|
154 sumOfWeight=sumOfWeight+refBig5[j].weight; |
|
155 break; |
|
156 } |
|
157 } |
|
158 sumOfGoodChar++; |
|
159 i++; |
|
160 } |
|
161 else |
|
162 { |
|
163 sumOfBadSecondByte++; |
|
164 } |
|
165 } |
|
166 } // for |
|
167 |
|
168 if (sumOfGoodChar) |
|
169 { |
|
170 aConfidenceLevel=sumOfGoodChar*100/(sumOfBadSecondByte+sumOfGoodChar); |
|
171 aConfidenceLevel=aConfidenceLevel-Max(0,((totalWeight-sumOfWeight)*sumOfGoodChar/1000));//against frequent chars |
|
172 aConfidenceLevel=aConfidenceLevel-sumOutChar*100/sumOfGoodChar;//against gap |
|
173 aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel; |
|
174 } |
|
175 else |
|
176 aConfidenceLevel=0; |
|
177 return ETrue; |
|
178 } |
|
179 |
|
180 CBIG5ConverterImpl* CBIG5ConverterImpl::NewL() |
|
181 { |
|
182 CBIG5ConverterImpl* self = new(ELeave) CBIG5ConverterImpl(); |
|
183 CleanupStack::PushL(self); |
|
184 self->ConstructL(); |
|
185 CleanupStack::Pop(self); |
|
186 return self; |
|
187 } |
|
188 |
|
189 CBIG5ConverterImpl::~CBIG5ConverterImpl() |
|
190 { |
|
191 } |
|
192 |
|
193 CBIG5ConverterImpl::CBIG5ConverterImpl() |
|
194 { |
|
195 } |
|
196 |
|
197 void CBIG5ConverterImpl::ConstructL() |
|
198 { |
|
199 } |
|
200 |
|
201 const TImplementationProxy ImplementationTable[] = |
|
202 { |
|
203 IMPLEMENTATION_PROXY_ENTRY(0x10000FBF,CBIG5ConverterImpl::NewL) |
|
204 }; |
|
205 |
|
206 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount) |
|
207 { |
|
208 aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy); |
|
209 |
|
210 return ImplementationTable; |
|
211 } |
|
212 |