|
1 /* |
|
2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 #include <e32std.h> |
|
20 #include <charconv.h> |
|
21 #include "SHIFTJIS_2.H" |
|
22 #include <ecom/implementationproxy.h> |
|
23 #include "charactersetconverter.h" |
|
24 #include "featmgr/featmgr.h" |
|
25 |
|
26 /** |
|
27 Shift-JIS character converter wrapper |
|
28 |
|
29 @internalTechnology |
|
30 @released 9.1 |
|
31 */ |
|
32 class CShiftJisConverterImpl : public CCharacterSetConverterPluginInterface |
|
33 { |
|
34 |
|
35 public: |
|
36 virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters(); |
|
37 |
|
38 virtual TInt ConvertFromUnicode( |
|
39 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
40 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, |
|
41 TDes8& aForeign, |
|
42 const TDesC16& aUnicode, |
|
43 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters); |
|
44 |
|
45 virtual TInt ConvertToUnicode( |
|
46 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
47 TDes16& aUnicode, |
|
48 const TDesC8& aForeign, |
|
49 TInt& aState, |
|
50 TInt& aNumberOfUnconvertibleCharacters, |
|
51 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter); |
|
52 |
|
53 virtual TBool IsInThisCharacterSetL( |
|
54 TBool& aSetToTrue, |
|
55 TInt& aConfidenceLevel, |
|
56 const TDesC8& aSample); |
|
57 |
|
58 static CShiftJisConverterImpl* NewL(); |
|
59 virtual ~CShiftJisConverterImpl(); |
|
60 |
|
61 private: |
|
62 CShiftJisConverterImpl(); |
|
63 void ConstructL(); |
|
64 |
|
65 }; |
|
66 |
|
67 /** |
|
68 Get the the Shift-JIS byte sequence which will replace any Unicode characters which can't be converted. |
|
69 |
|
70 @return The Shift-JIS byte sequence which will replace any Unicode characters which can't be converted. |
|
71 @internalTechnology |
|
72 */ |
|
73 const TDesC8& CShiftJisConverterImpl::ReplacementForUnconvertibleUnicodeCharacters() |
|
74 { |
|
75 return CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters(); |
|
76 } |
|
77 |
|
78 TInt CShiftJisConverterImpl::ConvertFromUnicode( |
|
79 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
80 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, |
|
81 TDes8& aForeign, |
|
82 const TDesC16& aUnicode, |
|
83 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) |
|
84 { |
|
85 return CnvShiftJis::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters); |
|
86 } |
|
87 |
|
88 |
|
89 /** |
|
90 Converts Shift-JIS encoded input text to Unicode |
|
91 |
|
92 NOTE: For debugging the selected character set is returned in the state. |
|
93 |
|
94 @released 9.1 |
|
95 @param aDefaultEndiannessOfForeignCharacters The default endian-ness to use when reading characters |
|
96 in the foreign character set. |
|
97 @param aUnicode On return, contains the text converted into Unicode. |
|
98 @param aForeign The non-Unicode source text to be converted. |
|
99 @param aState Used to save state information across multiple calls |
|
100 to <code>ConvertToUnicode()</code>. |
|
101 @param aNumberOfUnconvertibleCharacters On return, contains the number of bytes which were not |
|
102 converted. |
|
103 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, contains the index of the first bytein the |
|
104 input text that could not be converted. A negative |
|
105 value indicates that all the characters were |
|
106 converted. |
|
107 @return The number of unconverted bytes left at the end of the input descriptor |
|
108 (e.g. because the output descriptor is not long enough to hold all the text), |
|
109 or one of the error values defined in TError. |
|
110 @internalTechnology |
|
111 */ |
|
112 TInt CShiftJisConverterImpl::ConvertToUnicode( |
|
113 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
114 TDes16& aUnicode, |
|
115 const TDesC8& aForeign, |
|
116 TInt& /*aState*/, |
|
117 TInt& aNumberOfUnconvertibleCharacters, |
|
118 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) |
|
119 { |
|
120 return CnvShiftJis::ConvertToUnicode(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter); |
|
121 } |
|
122 |
|
123 |
|
124 /** |
|
125 This API is used by CCnvCharacterSetConverter::AutoDetectCharacterSetL(). |
|
126 This method returns a value between 0 and 100, indicating how likely it |
|
127 is that this is the correct converter, for the text supplied. |
|
128 @internalTechnology |
|
129 */ |
|
130 TBool CShiftJisConverterImpl::IsInThisCharacterSetL( |
|
131 TBool& aSetToTrue, |
|
132 TInt& aConfidenceLevel, |
|
133 const TDesC8& aSample) |
|
134 { |
|
135 aSetToTrue=ETrue; |
|
136 TInt sampleLength = aSample.Length(); |
|
137 aConfidenceLevel = 0; |
|
138 TInt numberOfShiftJis=0; |
|
139 TInt occurrence=0; |
|
140 for (TInt i = 0; i < sampleLength; ++i) |
|
141 { |
|
142 // Check for JISX 0208:1997 Charset |
|
143 // First Byte in range 0x81-0x9f, 0xe0-0xef |
|
144 if (((aSample[i] >= 0x81) && (aSample[i] <= 0x9f)) || |
|
145 ((aSample[i] >= 0xe0) && (aSample[i] <= 0xef))) |
|
146 { |
|
147 // check that the second byte is in range as well |
|
148 TInt increment1 = i+1; |
|
149 if(increment1 >= sampleLength) |
|
150 break; |
|
151 if (((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) || |
|
152 ((aSample[increment1] >= 0x80) && (aSample[increment1] <= 0xfc))) |
|
153 { |
|
154 // increase the confidence of this sample as ShiftJis |
|
155 aConfidenceLevel=(aConfidenceLevel >0)?aConfidenceLevel+5:60; |
|
156 |
|
157 TUint charShiftJis=(aSample[i]<<8)|(aSample[increment1]); |
|
158 if ((charShiftJis>=0x829f)&&(charShiftJis<=0x82f1)|| |
|
159 (charShiftJis>=0x8340)&&(charShiftJis<=0x8396))//those are kanas range |
|
160 occurrence++; |
|
161 numberOfShiftJis++; |
|
162 i++; |
|
163 } |
|
164 } |
|
165 // Check That no other Japanese escape sequence occur... if they do, cancel this and return 0 |
|
166 // eg EUC-JP's SS(Single shift) characters followed by the |
|
167 if(aSample[i]==0x8e) |
|
168 { |
|
169 TInt increment1 = i+1; |
|
170 if(increment1 >= sampleLength) |
|
171 break; |
|
172 if ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xdf)) |
|
173 { |
|
174 // This could be EUC-JP format.. |
|
175 aConfidenceLevel=0; |
|
176 i++; |
|
177 } |
|
178 } |
|
179 if(aSample[i]==0x8f) |
|
180 { |
|
181 TInt increment1 = i+1; |
|
182 TInt increment2 = i+2; |
|
183 if((increment1 >= sampleLength) || (increment2 >= sampleLength)) |
|
184 break; |
|
185 if (((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)) && |
|
186 ((aSample[increment2] >= 0xa1) && (aSample[increment2] <= 0xfe))) |
|
187 { |
|
188 // This is definitely EUC-JP format. |
|
189 aConfidenceLevel=0; |
|
190 break; |
|
191 } |
|
192 } |
|
193 // Check the half width Katakana |
|
194 if (aSample[i]>=0xa1 && aSample[i]<=0xdf) |
|
195 { |
|
196 // increase the confidence of this sample as ShiftJis |
|
197 aConfidenceLevel=(aConfidenceLevel > 0) ? aConfidenceLevel+5 : 75; |
|
198 occurrence++; |
|
199 numberOfShiftJis++; |
|
200 } |
|
201 else if (aSample[i]>=0xf0) |
|
202 { |
|
203 aConfidenceLevel=0; |
|
204 } |
|
205 } // for |
|
206 |
|
207 if(numberOfShiftJis) |
|
208 { |
|
209 aConfidenceLevel=(aConfidenceLevel >100)?100:((aConfidenceLevel <0)?0:aConfidenceLevel); |
|
210 aConfidenceLevel=aConfidenceLevel-Max(0,(30-occurrence*100/numberOfShiftJis)); |
|
211 } |
|
212 aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel; |
|
213 return ETrue; |
|
214 } |
|
215 |
|
216 |
|
217 CShiftJisConverterImpl* CShiftJisConverterImpl::NewL() |
|
218 { |
|
219 CShiftJisConverterImpl* self = new(ELeave) CShiftJisConverterImpl(); |
|
220 CleanupStack::PushL(self); |
|
221 self->ConstructL(); |
|
222 CleanupStack::Pop(self); |
|
223 return self; |
|
224 } |
|
225 |
|
226 |
|
227 CShiftJisConverterImpl::~CShiftJisConverterImpl() |
|
228 { |
|
229 FeatureManager::UnInitializeLib(); |
|
230 } |
|
231 |
|
232 CShiftJisConverterImpl::CShiftJisConverterImpl() |
|
233 { |
|
234 } |
|
235 |
|
236 |
|
237 void CShiftJisConverterImpl::ConstructL() |
|
238 { |
|
239 FeatureManager::InitializeLibL(); |
|
240 } |
|
241 |
|
242 const TImplementationProxy ImplementationTable[] = |
|
243 { |
|
244 IMPLEMENTATION_PROXY_ENTRY(0x10000FBD, CShiftJisConverterImpl::NewL) |
|
245 }; |
|
246 |
|
247 |
|
248 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount) |
|
249 { |
|
250 aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy); |
|
251 |
|
252 return ImplementationTable; |
|
253 } |