|
1 /* |
|
2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 #include <e32std.h> |
|
20 #include <charconv.h> |
|
21 #include "shiftjis.h" |
|
22 #include <ecom/implementationproxy.h> |
|
23 #include <charactersetconverter.h> |
|
24 |
|
25 |
|
26 /** |
|
27 Shift-JIS character converter wrapper |
|
28 |
|
29 @internalTechnology |
|
30 @released 9.1 |
|
31 */ |
|
32 class CShiftJisConverterImpl : public CCharacterSetConverterPluginInterface |
|
33 { |
|
34 |
|
35 public: |
|
36 virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters(); |
|
37 |
|
38 virtual TInt ConvertFromUnicode( |
|
39 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
40 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, |
|
41 TDes8& aForeign, |
|
42 const TDesC16& aUnicode, |
|
43 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters); |
|
44 |
|
45 virtual TInt ConvertToUnicode( |
|
46 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
47 TDes16& aUnicode, |
|
48 const TDesC8& aForeign, |
|
49 TInt& aState, |
|
50 TInt& aNumberOfUnconvertibleCharacters, |
|
51 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter); |
|
52 |
|
53 virtual TBool IsInThisCharacterSetL( |
|
54 TBool& aSetToTrue, |
|
55 TInt& aConfidenceLevel, |
|
56 const TDesC8& aSample); |
|
57 |
|
58 static CShiftJisConverterImpl* NewL(); |
|
59 virtual ~CShiftJisConverterImpl(); |
|
60 |
|
61 private: |
|
62 CShiftJisConverterImpl(); |
|
63 |
|
64 }; |
|
65 |
|
66 /** |
|
67 Get the the Shift-JIS byte sequence which will replace any Unicode characters which can't be converted. |
|
68 |
|
69 @return The Shift-JIS byte sequence which will replace any Unicode characters which can't be converted. |
|
70 @internalTechnology |
|
71 */ |
|
72 const TDesC8& CShiftJisConverterImpl::ReplacementForUnconvertibleUnicodeCharacters() |
|
73 { |
|
74 return CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters(); |
|
75 } |
|
76 |
|
77 TInt CShiftJisConverterImpl::ConvertFromUnicode( |
|
78 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
79 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, |
|
80 TDes8& aForeign, |
|
81 const TDesC16& aUnicode, |
|
82 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) |
|
83 { |
|
84 return CnvShiftJis::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters); |
|
85 } |
|
86 |
|
87 |
|
88 /** |
|
89 Converts Shift-JIS encoded input text to Unicode |
|
90 |
|
91 NOTE: For debugging the selected character set is returned in the state. |
|
92 |
|
93 @released 9.1 |
|
94 @param aDefaultEndiannessOfForeignCharacters The default endian-ness to use when reading characters |
|
95 in the foreign character set. |
|
96 @param aUnicode On return, contains the text converted into Unicode. |
|
97 @param aForeign The non-Unicode source text to be converted. |
|
98 @param aState Used to save state information across multiple calls |
|
99 to <code>ConvertToUnicode()</code>. |
|
100 @param aNumberOfUnconvertibleCharacters On return, contains the number of bytes which were not |
|
101 converted. |
|
102 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, contains the index of the first bytein the |
|
103 input text that could not be converted. A negative |
|
104 value indicates that all the characters were |
|
105 converted. |
|
106 @return The number of unconverted bytes left at the end of the input descriptor |
|
107 (e.g. because the output descriptor is not long enough to hold all the text), |
|
108 or one of the error values defined in TError. |
|
109 @internalTechnology |
|
110 */ |
|
111 TInt CShiftJisConverterImpl::ConvertToUnicode( |
|
112 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
113 TDes16& aUnicode, |
|
114 const TDesC8& aForeign, |
|
115 TInt& /*aState*/, |
|
116 TInt& aNumberOfUnconvertibleCharacters, |
|
117 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) |
|
118 { |
|
119 return CnvShiftJis::ConvertToUnicode(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter); |
|
120 } |
|
121 |
|
122 |
|
123 /** |
|
124 This API is used by CCnvCharacterSetConverter::AutoDetectCharacterSetL(). |
|
125 This method returns a value between 0 and 100, indicating how likely it |
|
126 is that this is the correct converter, for the text supplied. |
|
127 @internalTechnology |
|
128 */ |
|
129 TBool CShiftJisConverterImpl::IsInThisCharacterSetL( |
|
130 TBool& aSetToTrue, |
|
131 TInt& aConfidenceLevel, |
|
132 const TDesC8& aSample) |
|
133 { |
|
134 aSetToTrue=ETrue; |
|
135 TInt sampleLength = aSample.Length(); |
|
136 aConfidenceLevel = 0; |
|
137 TInt numberOfShiftJis=0; |
|
138 TInt occurrence=0; |
|
139 for (TInt i = 0; i < sampleLength; ++i) |
|
140 { |
|
141 // Check for JISX 0208:1997 Charset |
|
142 // First Byte in range 0x81-0x9f, 0xe0-0xef |
|
143 if (((aSample[i] >= 0x81) && (aSample[i] <= 0x9f)) || |
|
144 ((aSample[i] >= 0xe0) && (aSample[i] <= 0xef))) |
|
145 { |
|
146 // check that the second byte is in range as well |
|
147 TInt increment1 = i+1; |
|
148 if(increment1 >= sampleLength) |
|
149 break; |
|
150 if (((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) || |
|
151 ((aSample[increment1] >= 0x80) && (aSample[increment1] <= 0xfc))) |
|
152 { |
|
153 // increase the confidence of this sample as ShiftJis |
|
154 aConfidenceLevel=(aConfidenceLevel >0)?aConfidenceLevel+5:60; |
|
155 |
|
156 TUint charShiftJis=(aSample[i]<<8)|(aSample[increment1]); |
|
157 if ((charShiftJis>=0x829f)&&(charShiftJis<=0x82f1)|| |
|
158 (charShiftJis>=0x8340)&&(charShiftJis<=0x8396))//those are kanas range |
|
159 occurrence++; |
|
160 numberOfShiftJis++; |
|
161 i++; |
|
162 } |
|
163 } |
|
164 // Check That no other Japanese escape sequence occur... if they do, cancel this and return 0 |
|
165 // eg EUC-JP's SS(Single shift) characters followed by the |
|
166 if(aSample[i]==0x8e) |
|
167 { |
|
168 TInt increment1 = i+1; |
|
169 if(increment1 >= sampleLength) |
|
170 break; |
|
171 if ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xdf)) |
|
172 { |
|
173 // This could be EUC-JP format.. |
|
174 aConfidenceLevel=0; |
|
175 i++; |
|
176 } |
|
177 } |
|
178 if(aSample[i]==0x8f) |
|
179 { |
|
180 TInt increment1 = i+1; |
|
181 TInt increment2 = i+2; |
|
182 if((increment1 >= sampleLength) || (increment2 >= sampleLength)) |
|
183 break; |
|
184 if (((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)) && |
|
185 ((aSample[increment2] >= 0xa1) && (aSample[increment2] <= 0xfe))) |
|
186 { |
|
187 // This is definitely EUC-JP format. |
|
188 aConfidenceLevel=0; |
|
189 break; |
|
190 } |
|
191 } |
|
192 } // for |
|
193 |
|
194 if(numberOfShiftJis) |
|
195 { |
|
196 aConfidenceLevel=(aConfidenceLevel >100)?100:((aConfidenceLevel <0)?0:aConfidenceLevel); |
|
197 aConfidenceLevel=aConfidenceLevel-Max(0,(30-occurrence*100/numberOfShiftJis)); |
|
198 } |
|
199 aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel; |
|
200 return ETrue; |
|
201 } |
|
202 |
|
203 |
|
204 CShiftJisConverterImpl* CShiftJisConverterImpl::NewL() |
|
205 { |
|
206 CShiftJisConverterImpl* self = new(ELeave) CShiftJisConverterImpl(); |
|
207 return self; |
|
208 } |
|
209 |
|
210 |
|
211 CShiftJisConverterImpl::~CShiftJisConverterImpl() |
|
212 { |
|
213 } |
|
214 |
|
215 CShiftJisConverterImpl::CShiftJisConverterImpl() |
|
216 { |
|
217 } |
|
218 |
|
219 const TImplementationProxy ImplementationTable[] = |
|
220 { |
|
221 #ifdef KDDIAU_TEST |
|
222 // for the test build use a special test UID which is called |
|
223 //explicitly from test code |
|
224 IMPLEMENTATION_PROXY_ENTRY(0x01000001, CShiftJisConverterImpl::NewL) |
|
225 #else |
|
226 IMPLEMENTATION_PROXY_ENTRY(0x10000FBD, CShiftJisConverterImpl::NewL) |
|
227 #endif |
|
228 }; |
|
229 |
|
230 |
|
231 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount) |
|
232 { |
|
233 aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy); |
|
234 |
|
235 return ImplementationTable; |
|
236 } |