|
1 /* |
|
2 * Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: ISO2022kr conversion plugin |
|
15 * |
|
16 */ |
|
17 |
|
18 // INCLUDES |
|
19 #include <e32std.h> |
|
20 #include <charconv.h> |
|
21 #include <convgeneratedcpp.h> |
|
22 #include <ecom/implementationproxy.h> |
|
23 #include "cp949table.h" |
|
24 #include "charactersetconverter.h" |
|
25 |
|
26 static const TUint KBitsForNonStandardStates = 0x03; |
|
27 static const TUint KShiftedToKSCState = 0x01; |
|
28 |
|
29 static const TUint KMaxSizeOfTmpBuffer = 1024; |
|
30 |
|
31 static const TUint8 KMaxAscii = 0x9f; |
|
32 |
|
33 _LIT8(KLit8EscapeSequence, "\x1b\x24\x43"); |
|
34 |
|
35 #define SHIFT_IN_BYTE 0x0F |
|
36 #define SHIFT_OUT_BYTE 0x0E |
|
37 |
|
38 typedef enum |
|
39 { |
|
40 EISO2022Initialize, |
|
41 EISO2022Ascii, |
|
42 EISO2022KSC |
|
43 } TISO2022FromUniState; |
|
44 |
|
45 // New Interface class |
|
46 class CISO2022KRImplementation : public CCharacterSetConverterPluginInterface |
|
47 { |
|
48 public: |
|
49 virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters(); |
|
50 |
|
51 virtual TInt ConvertFromUnicode( |
|
52 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
53 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, |
|
54 TDes8& aForeign, |
|
55 const TDesC16& aUnicode, |
|
56 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters ); |
|
57 |
|
58 virtual TInt ConvertToUnicode( |
|
59 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
60 TDes16& aUnicode, |
|
61 const TDesC8& aForeign, |
|
62 TInt& aState, |
|
63 TInt& aNumberOfUnconvertibleCharacters, |
|
64 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter ); |
|
65 |
|
66 virtual TBool IsInThisCharacterSetL( |
|
67 TBool& aSetToTrue, |
|
68 TInt& aConfidenceLevel, |
|
69 const TDesC8& ); |
|
70 |
|
71 static CISO2022KRImplementation* NewL(); |
|
72 |
|
73 virtual ~CISO2022KRImplementation(); |
|
74 private: |
|
75 CISO2022KRImplementation(); |
|
76 }; |
|
77 |
|
78 // FUNCTION DEFINITIONS |
|
79 const TDesC8& CISO2022KRImplementation::ReplacementForUnconvertibleUnicodeCharacters() |
|
80 { |
|
81 return CnvCp949Table::ReplacementForUnconvertibleUnicodeCharacters(); |
|
82 } |
|
83 |
|
84 TInt CISO2022KRImplementation::ConvertFromUnicode( |
|
85 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
86 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, |
|
87 TDes8& aForeign, |
|
88 const TDesC16& aUnicode, |
|
89 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) |
|
90 { |
|
91 TInt ret; |
|
92 TInt currPos = 3; |
|
93 TUint outputConversionFlags = 0; |
|
94 TUint inputConversionFlags = CCnvCharacterSetConverter::EInputConversionFlagAppend; |
|
95 TISO2022FromUniState currState = EISO2022Initialize; |
|
96 TUint8 shiftByte = 0; |
|
97 TPtr8 shiftBytePtr(NULL, 0); |
|
98 |
|
99 aForeign.SetLength(0); |
|
100 |
|
101 /* Start with escape sequence */ |
|
102 aForeign.Append( KLit8EscapeSequence ); |
|
103 |
|
104 ret = CCnvCharacterSetConverter::DoConvertFromUnicode( CnvCp949Table::ConversionData(), |
|
105 aDefaultEndiannessOfForeignCharacters, |
|
106 aReplacementForUnconvertibleUnicodeCharacters, |
|
107 aForeign, |
|
108 aUnicode, |
|
109 aIndicesOfUnconvertibleCharacters, |
|
110 outputConversionFlags, |
|
111 inputConversionFlags ); |
|
112 /* Append shift in and out bytes as needed */ |
|
113 while( currPos < aForeign.Length() ) |
|
114 { |
|
115 TUint8 *currChar = (TUint8 *)aForeign.Mid(currPos).Ptr(); |
|
116 if( *currChar > KMaxAscii ) |
|
117 { /* KSC character */ |
|
118 if( currState != EISO2022KSC ) |
|
119 { /* Insert shift out byte */ |
|
120 shiftByte = SHIFT_OUT_BYTE; |
|
121 currState = EISO2022KSC; |
|
122 } |
|
123 |
|
124 /* Clear the 8th bit */ |
|
125 *currChar = (*currChar & ~(0x80)); |
|
126 } |
|
127 else |
|
128 { /* ASCII character */ |
|
129 if( currState != EISO2022Ascii ) |
|
130 { /* Insert shift in byte */ |
|
131 shiftByte = SHIFT_IN_BYTE; |
|
132 currState = EISO2022Ascii; |
|
133 } |
|
134 } |
|
135 |
|
136 if( shiftByte ) |
|
137 { |
|
138 if( (aForeign.Length() + 1) > aForeign.MaxLength() ) |
|
139 { /* Make room for shift byte */ |
|
140 if( aForeign[ (aForeign.Length() - 1) ] > KMaxAscii ) |
|
141 { /* Drop a dual byte KSC character */ |
|
142 aForeign.SetLength( aForeign.Length() - 2 ); |
|
143 } |
|
144 else |
|
145 { /* Drop a single byte ASCII character */ |
|
146 aForeign.SetLength( aForeign.Length() - 1 ); |
|
147 } |
|
148 /* Increase unconverted amount */ |
|
149 ret++; |
|
150 /* TBD, propably should try to fix aIndicesOfUnconvertibleCharacters |
|
151 if possible */ |
|
152 } |
|
153 shiftBytePtr.Set( &shiftByte, 1, 1 ); |
|
154 aForeign.Insert( currPos, shiftBytePtr ); |
|
155 currPos++; |
|
156 shiftByte = 0; |
|
157 } |
|
158 |
|
159 /* Skip current character */ |
|
160 currPos++; |
|
161 } |
|
162 |
|
163 return ret; |
|
164 } |
|
165 |
|
166 TInt CISO2022KRImplementation::ConvertToUnicode( |
|
167 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, |
|
168 TDes16& aUnicode, |
|
169 const TDesC8& aForeign, |
|
170 TInt& aState, |
|
171 TInt& aNumberOfUnconvertibleCharacters, |
|
172 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) |
|
173 { |
|
174 TInt err; |
|
175 TInt ret = 0; |
|
176 TInt currPos = 0; |
|
177 TInt convPos = 0; |
|
178 TInt shiftInPos = KErrNotFound; |
|
179 TInt shiftOutPos = KErrNotFound; |
|
180 TInt shiftPos = KErrNotFound; |
|
181 TInt escPos = KErrNotFound; |
|
182 TPtrC8 currSegment; |
|
183 TPtrC8 convSegment; |
|
184 TBool changeState = EFalse; |
|
185 |
|
186 TUint outputConversionFlags = 0; |
|
187 TUint inputConversionFlags = CCnvCharacterSetConverter::EInputConversionFlagAppend; |
|
188 TInt numberOfUnconvertibleCharacters = 0; |
|
189 TInt indexOfFirstByteOfFirstUnconvertibleCharacter = 0; |
|
190 aNumberOfUnconvertibleCharacters = 0; |
|
191 |
|
192 while( currPos < aForeign.Length() ) |
|
193 { |
|
194 |
|
195 currSegment.Set( aForeign.Mid( currPos ) ); |
|
196 |
|
197 /* First change state if needed */ |
|
198 if( changeState ) |
|
199 { |
|
200 changeState = EFalse; |
|
201 if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState ) |
|
202 { /* Switch back to default ASCII */ |
|
203 aState &= ~(KShiftedToKSCState); |
|
204 } |
|
205 else |
|
206 { /* Switch to KSC */ |
|
207 aState |= KShiftedToKSCState; |
|
208 } |
|
209 } |
|
210 |
|
211 /* Search for escape which should be skipped */ |
|
212 escPos = currSegment.Find( KLit8EscapeSequence ); |
|
213 |
|
214 /* Search for shift in byte */ |
|
215 shiftInPos = currSegment.Locate( SHIFT_IN_BYTE ); |
|
216 |
|
217 /* Search for shift out byte */ |
|
218 shiftOutPos = currSegment.Locate( SHIFT_OUT_BYTE ); |
|
219 |
|
220 /* Set shift pos according to found shift bytes */ |
|
221 if( shiftInPos == KErrNotFound && |
|
222 shiftOutPos == KErrNotFound ) |
|
223 { /* Neither found */ |
|
224 shiftPos = KErrNotFound; |
|
225 } |
|
226 else |
|
227 { |
|
228 if( (shiftInPos != KErrNotFound) && |
|
229 ((shiftInPos < shiftOutPos) || (shiftOutPos == KErrNotFound)) ) |
|
230 { /* shift in is nearer or shift out not found */ |
|
231 shiftPos = shiftInPos; |
|
232 /* Set state change if needed */ |
|
233 if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState ) |
|
234 { |
|
235 changeState = ETrue; |
|
236 } |
|
237 } |
|
238 else |
|
239 { /* shift out must be nearer or shift in not fouind */ |
|
240 shiftPos = shiftOutPos; |
|
241 /* Set state change if needed */ |
|
242 if( (aState & KBitsForNonStandardStates) != KShiftedToKSCState ) |
|
243 { |
|
244 changeState = ETrue; |
|
245 } |
|
246 } |
|
247 } |
|
248 |
|
249 if( shiftPos == KErrNotFound ) |
|
250 { /* Shift byte not found, same coding for the rest of the data */ |
|
251 if( escPos == KErrNotFound ) |
|
252 { /* No escape sequence either, just convert the rest */ |
|
253 convSegment.Set( currSegment ); |
|
254 } |
|
255 } |
|
256 else if( ((escPos != KErrNotFound) && (shiftPos < escPos)) || |
|
257 (escPos == KErrNotFound) ) |
|
258 { /* Shift byte found and it comes before escape sequence or no escape |
|
259 sequence was found, convert data preceeding the shift byte if shift |
|
260 byte isn't the first character */ |
|
261 if( shiftPos == 0 ) |
|
262 { /* No data to convert preceeds the shift byte, just skip it and continue */ |
|
263 currPos += 1; |
|
264 continue; |
|
265 } |
|
266 convSegment.Set( currSegment.Left( shiftPos ) ); |
|
267 /* Clear to prevent convert to escape sequence */ |
|
268 escPos = KErrNotFound; |
|
269 } |
|
270 |
|
271 if( escPos != KErrNotFound ) |
|
272 { /* Escape sequence found before any shift bytes, |
|
273 clear possible state change and convert data |
|
274 preceeding the escape sequence if |
|
275 escape sequence is not at the beginning */ |
|
276 changeState = EFalse; |
|
277 if( escPos == 0 ) |
|
278 { /* No data to convert preceeds the escape sequence, just skip it continue */ |
|
279 currPos += KLit8EscapeSequence().Length(); |
|
280 continue; |
|
281 } |
|
282 convSegment.Set( currSegment.Left( escPos ) ); |
|
283 } |
|
284 |
|
285 if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState ) |
|
286 { /* Convert KSC encoded */ |
|
287 HBufC8 *tmpForeign = NULL; |
|
288 |
|
289 if( (convSegment.Length() & 0x1) ) |
|
290 { /* KSC should have even amount of bytes */ |
|
291 ret = CCnvCharacterSetConverter::EErrorIllFormedInput; |
|
292 } |
|
293 else |
|
294 { |
|
295 convPos = 0; |
|
296 while( convPos < convSegment.Length() ) |
|
297 { |
|
298 TRAP( err, tmpForeign = HBufC8::NewL( KMaxSizeOfTmpBuffer ) ); |
|
299 if( err != KErrNone ) |
|
300 { |
|
301 User::Panic( _L("ISO-2022-KR"), err ); |
|
302 } |
|
303 |
|
304 if( convSegment.Length() < KMaxSizeOfTmpBuffer ) |
|
305 { /* Convert whole segment */ |
|
306 tmpForeign->Des().Copy( convSegment ); |
|
307 } |
|
308 else |
|
309 { /* Convert in chunks */ |
|
310 if( (convPos + KMaxSizeOfTmpBuffer) >= convSegment.Length() ) |
|
311 { /* Last chunk */ |
|
312 tmpForeign->Des().Copy( convSegment.Mid( convPos ) ); |
|
313 } |
|
314 else |
|
315 { |
|
316 tmpForeign->Des().Copy( convSegment.Mid( convPos, KMaxSizeOfTmpBuffer ) ); |
|
317 } |
|
318 } |
|
319 |
|
320 TUint8 *chars = (TUint8 *)tmpForeign->Des().Ptr(); |
|
321 for( TInt i = 0 ; i < tmpForeign->Length() ; i++ ) |
|
322 { /* Set highest bit in characters */ |
|
323 chars[i] |= 0x80; |
|
324 } |
|
325 |
|
326 numberOfUnconvertibleCharacters = 0; |
|
327 ret = CCnvCharacterSetConverter::DoConvertToUnicode( CnvCp949Table::ConversionData(), |
|
328 aDefaultEndiannessOfForeignCharacters, |
|
329 aUnicode, *tmpForeign, |
|
330 numberOfUnconvertibleCharacters, |
|
331 indexOfFirstByteOfFirstUnconvertibleCharacter, |
|
332 outputConversionFlags, |
|
333 inputConversionFlags ); |
|
334 if( numberOfUnconvertibleCharacters != 0 && |
|
335 aNumberOfUnconvertibleCharacters == 0 ) |
|
336 { /* First uncovertible found, set index relative to actual input buffer*/ |
|
337 aIndexOfFirstByteOfFirstUnconvertibleCharacter = (currPos + convPos + indexOfFirstByteOfFirstUnconvertibleCharacter); |
|
338 } |
|
339 |
|
340 aNumberOfUnconvertibleCharacters += numberOfUnconvertibleCharacters; |
|
341 |
|
342 if( ret < 0 ) |
|
343 { /* Some error, break the loop, |
|
344 errors are handled later */ |
|
345 delete tmpForeign; |
|
346 break; |
|
347 } |
|
348 |
|
349 if( ret > 0 ) |
|
350 { /* Not all were converted, fix return value |
|
351 to be relative to convSegment and break the loop */ |
|
352 ret = (convSegment.Length() - convPos - tmpForeign->Length() + ret); |
|
353 delete tmpForeign; |
|
354 break; |
|
355 } |
|
356 |
|
357 convPos += tmpForeign->Length(); |
|
358 delete tmpForeign; |
|
359 } |
|
360 } |
|
361 } |
|
362 else |
|
363 { /* Convert ASCII encoded by default, KSC can be used without setting highest bit */ |
|
364 numberOfUnconvertibleCharacters = 0; |
|
365 ret = CCnvCharacterSetConverter::DoConvertToUnicode( CnvCp949Table::ConversionData(), |
|
366 aDefaultEndiannessOfForeignCharacters, |
|
367 aUnicode, convSegment, |
|
368 numberOfUnconvertibleCharacters, |
|
369 indexOfFirstByteOfFirstUnconvertibleCharacter, |
|
370 outputConversionFlags, |
|
371 inputConversionFlags ); |
|
372 if( numberOfUnconvertibleCharacters != 0 && |
|
373 aNumberOfUnconvertibleCharacters == 0 ) |
|
374 { /* First uncovertible found, set index relative to actual input buffer*/ |
|
375 aIndexOfFirstByteOfFirstUnconvertibleCharacter = currPos + indexOfFirstByteOfFirstUnconvertibleCharacter; |
|
376 } |
|
377 aNumberOfUnconvertibleCharacters += numberOfUnconvertibleCharacters; |
|
378 } |
|
379 |
|
380 if( ret < 0 ) |
|
381 { /* Error during conversion */ |
|
382 return ret; |
|
383 } |
|
384 else if( ret > 0 ) |
|
385 { /* Not all characters where converted, return |
|
386 value indicating how many bytes in total are left unconverted */ |
|
387 return (aForeign.Length() - currPos - convSegment.Length() + ret); |
|
388 } |
|
389 |
|
390 /* Increase to skip converted data */ |
|
391 currPos += convSegment.Length(); |
|
392 if( escPos != KErrNotFound ) |
|
393 { /* Increase to skip escape sequence */ |
|
394 currPos += KLit8EscapeSequence().Length(); |
|
395 } |
|
396 else if( shiftPos != KErrNotFound ) |
|
397 { /* Increase to skip shift byte */ |
|
398 currPos += 1; |
|
399 } |
|
400 |
|
401 } |
|
402 |
|
403 return 0; |
|
404 } |
|
405 |
|
406 |
|
407 TBool CISO2022KRImplementation::IsInThisCharacterSetL( |
|
408 TBool& aSetToTrue, |
|
409 TInt& aConfidenceLevel, |
|
410 const TDesC8& aBuf) |
|
411 { |
|
412 aSetToTrue=ETrue; |
|
413 aConfidenceLevel=200; |
|
414 |
|
415 TUint8 ch(0); |
|
416 for (TInt i=0;i<aBuf.Length();i++) |
|
417 { |
|
418 ch=aBuf[i]; |
|
419 if (ch<0x7F) |
|
420 { |
|
421 continue; |
|
422 } |
|
423 else if (0xa1<=ch&&ch<=0xfe) |
|
424 { |
|
425 i++; |
|
426 __ASSERT_DEBUG(i<aBuf.Length(),User::Panic(_L("IS2022KR"),__LINE__)); |
|
427 } |
|
428 else |
|
429 { |
|
430 aConfidenceLevel=0; |
|
431 aSetToTrue=EFalse; |
|
432 break; |
|
433 } |
|
434 } |
|
435 return aSetToTrue; |
|
436 } |
|
437 |
|
438 CISO2022KRImplementation* CISO2022KRImplementation::NewL() |
|
439 { |
|
440 CISO2022KRImplementation* self = new(ELeave) CISO2022KRImplementation; |
|
441 return self; |
|
442 } |
|
443 |
|
444 CISO2022KRImplementation::CISO2022KRImplementation() |
|
445 { |
|
446 //default constructor.. do nothing |
|
447 } |
|
448 |
|
449 CISO2022KRImplementation::~CISO2022KRImplementation() |
|
450 { |
|
451 //default destructor .. do nothing |
|
452 } |
|
453 |
|
454 // ECOM CREATION FUNCTION |
|
455 const TImplementationProxy ImplementationTable[] = |
|
456 { |
|
457 // Note: This is the same UID as defined in old mmp-file |
|
458 // Used also in 12221212.rss ( implementation_uid ) |
|
459 IMPLEMENTATION_PROXY_ENTRY( 0x20010101, CISO2022KRImplementation::NewL ) |
|
460 }; |
|
461 |
|
462 EXPORT_C const TImplementationProxy* ImplementationGroupProxy( TInt& aTableCount ) |
|
463 { |
|
464 aTableCount = sizeof( ImplementationTable ) / sizeof(TImplementationProxy); |
|
465 return ImplementationTable; |
|
466 } |
|
467 |