|
1 /* |
|
2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 #if !defined(__CHARCONV_H__) |
|
20 #define __CHARCONV_H__ |
|
21 |
|
22 #if !defined(__E32STD_H__) |
|
23 #include <e32std.h> |
|
24 #endif |
|
25 |
|
26 #if !defined(__E32BASE_H__) |
|
27 #include <e32base.h> |
|
28 #endif |
|
29 |
|
30 /** |
|
31 The maximum length in bytes of the replacement text for unconvertible Unicode |
|
32 characters (=50) (see CCnvCharacterSetConverter::SetReplacementForUnconvertibleUnicodeCharactersL()). |
|
33 @publishedAll |
|
34 @released |
|
35 */ |
|
36 const TInt KMaximumLengthOfReplacementForUnconvertibleUnicodeCharacters=50; |
|
37 |
|
38 /** |
|
39 UTF-7 |
|
40 @publishedAll |
|
41 @released |
|
42 */ |
|
43 const TUint KCharacterSetIdentifierUtf7=0x1000582c; |
|
44 /** |
|
45 UTF-8 |
|
46 @publishedAll |
|
47 @released |
|
48 */ |
|
49 const TUint KCharacterSetIdentifierUtf8=0x1000582d; |
|
50 /** |
|
51 IMAP UTF-7 |
|
52 @publishedAll |
|
53 @released |
|
54 */ |
|
55 const TUint KCharacterSetIdentifierImapUtf7=0x1000582e; |
|
56 /** |
|
57 Java UTF-8 |
|
58 @publishedAll |
|
59 @released |
|
60 */ |
|
61 const TUint KCharacterSetIdentifierJavaConformantUtf8=0x1000582f; |
|
62 /** |
|
63 Code Page 1252 |
|
64 @publishedAll |
|
65 @released |
|
66 */ |
|
67 const TUint KCharacterSetIdentifierCodePage1252=0x100012b6; |
|
68 /** |
|
69 ISO 8859-1 |
|
70 @publishedAll |
|
71 @released |
|
72 */ |
|
73 const TUint KCharacterSetIdentifierIso88591=0x10003b10; |
|
74 /** |
|
75 ISO 8859-2 |
|
76 @publishedAll |
|
77 @released |
|
78 */ |
|
79 const TUint KCharacterSetIdentifierIso88592=0x1000507e; |
|
80 /** |
|
81 ISO 8859-3 |
|
82 @publishedAll |
|
83 @released |
|
84 */ |
|
85 const TUint KCharacterSetIdentifierIso88593=0x10008a28; |
|
86 /** |
|
87 ISO 8859-4 |
|
88 @publishedAll |
|
89 @released |
|
90 */ |
|
91 const TUint KCharacterSetIdentifierIso88594=0x1000507f; |
|
92 /** |
|
93 ISO 8859-5 |
|
94 @publishedAll |
|
95 @released |
|
96 */ |
|
97 const TUint KCharacterSetIdentifierIso88595=0x10005080; |
|
98 /** |
|
99 ISO 8859-6 |
|
100 @publishedAll |
|
101 @released |
|
102 */ |
|
103 const TUint KCharacterSetIdentifierIso88596=0x10008a29; |
|
104 /** |
|
105 ISO 8859-7 |
|
106 @publishedAll |
|
107 @released |
|
108 */ |
|
109 const TUint KCharacterSetIdentifierIso88597=0x10005081; |
|
110 /** |
|
111 ISO 8859-8 |
|
112 @publishedAll |
|
113 @released |
|
114 */ |
|
115 const TUint KCharacterSetIdentifierIso88598=0x10008a2a; |
|
116 /** |
|
117 ISO 8859-9 |
|
118 @publishedAll |
|
119 @released |
|
120 */ |
|
121 const TUint KCharacterSetIdentifierIso88599=0x10005082; |
|
122 /** |
|
123 ISO 8859-10 |
|
124 @publishedAll |
|
125 @released |
|
126 */ |
|
127 const TUint KCharacterSetIdentifierIso885910=0x10008a2b; |
|
128 /** |
|
129 ISO 8859-13 |
|
130 @publishedAll |
|
131 @released |
|
132 */ |
|
133 const TUint KCharacterSetIdentifierIso885913=0x10008a2c; |
|
134 /** |
|
135 ISO 8859-14 |
|
136 @publishedAll |
|
137 @released |
|
138 */ |
|
139 const TUint KCharacterSetIdentifierIso885914=0x10008a2d; |
|
140 /** |
|
141 ISO 8859-15 |
|
142 @publishedAll |
|
143 @released |
|
144 */ |
|
145 const TUint KCharacterSetIdentifierIso885915=0x10008a2e; |
|
146 /** |
|
147 ASCII |
|
148 @publishedAll |
|
149 @released |
|
150 */ |
|
151 const TUint KCharacterSetIdentifierAscii=0x10004cc6; |
|
152 /** |
|
153 SMS 7-bit |
|
154 @publishedAll |
|
155 @released |
|
156 */ |
|
157 const TUint KCharacterSetIdentifierSms7Bit=0x100053ab; |
|
158 /** |
|
159 GB 2312 |
|
160 @publishedAll |
|
161 @released |
|
162 */ |
|
163 const TUint KCharacterSetIdentifierGb2312=0x10000fbe; |
|
164 /** |
|
165 HZ-GB-2312 |
|
166 @publishedAll |
|
167 @released |
|
168 */ |
|
169 const TUint KCharacterSetIdentifierHz=0x10006065; |
|
170 /** |
|
171 GB 12345 |
|
172 @publishedAll |
|
173 @released |
|
174 */ |
|
175 const TUint KCharacterSetIdentifierGb12345=0x1000401a; |
|
176 /** |
|
177 GBK |
|
178 @publishedAll |
|
179 @released |
|
180 */ |
|
181 const TUint KCharacterSetIdentifierGbk=0x10003ecb; |
|
182 /** |
|
183 GB18030 |
|
184 @publishedAll |
|
185 @released |
|
186 */ |
|
187 const TUint KCharacterSetIdentifierGb18030=0x10287038; |
|
188 /** |
|
189 Big 5 |
|
190 @publishedAll |
|
191 @released |
|
192 */ |
|
193 const TUint KCharacterSetIdentifierBig5=0x10000fbf; |
|
194 /** |
|
195 Shift-JIS |
|
196 @publishedAll |
|
197 @released |
|
198 */ |
|
199 const TUint KCharacterSetIdentifierShiftJis=0x10000fbd; |
|
200 /** |
|
201 ISO-2022-JP |
|
202 @publishedAll |
|
203 @released |
|
204 */ |
|
205 const TUint KCharacterSetIdentifierIso2022Jp=0x100066a0; |
|
206 /** |
|
207 ISO-2022-JP-1 |
|
208 @publishedAll |
|
209 @released |
|
210 */ |
|
211 const TUint KCharacterSetIdentifierIso2022Jp1=0x100066a3; |
|
212 /** |
|
213 JIS Encoding |
|
214 @publishedAll |
|
215 @released |
|
216 */ |
|
217 const TUint KCharacterSetIdentifierJis=0x10006066; |
|
218 /** |
|
219 EUC-JP |
|
220 @publishedAll |
|
221 @released |
|
222 */ |
|
223 const TUint KCharacterSetIdentifierEucJpPacked=0x10006067; |
|
224 |
|
225 /** |
|
226 JP5 |
|
227 @publishedAll |
|
228 @released |
|
229 */ |
|
230 const TUint KCharacterSetIdentifierJ5=0x1020D408; |
|
231 /** |
|
232 CP850 |
|
233 @publishedAll |
|
234 @released |
|
235 */ |
|
236 const TUint KCharacterSetIdentifierCP850=0x102825AD; |
|
237 |
|
238 const TUint KCharacterSetIdentifierUnicodeLittle=0x101f3fae; //Little Endian Unicode |
|
239 const TUint KCharacterSetIdentifierUnicodeBig=0x101f4052; // Big Endian Unicode |
|
240 const TUint KCharacterSetIdentifierUcs2=0x101ff492; |
|
241 |
|
242 |
|
243 /** |
|
244 Extended SMS 7-bit |
|
245 @publishedAll |
|
246 @released |
|
247 */ |
|
248 const TUint KCharacterSetIdentifierExtendedSms7Bit=0x102863FD; |
|
249 |
|
250 /** |
|
251 Turkish |
|
252 @publishedAll |
|
253 @released |
|
254 */ |
|
255 const TUint KCharacterSetIdentifierTurkishSingleSms7Bit=0x102863FE; |
|
256 const TUint KCharacterSetIdentifierTurkishLockingSms7Bit=0x102863FF; |
|
257 const TUint KCharacterSetIdentifierTurkishLockingAndSingleSms7Bit=0x10286400; |
|
258 |
|
259 /** |
|
260 Portuguese |
|
261 @publishedAll |
|
262 @released |
|
263 */ |
|
264 const TUint KCharacterSetIdentifierPortugueseSingleSms7Bit=0x10286407; |
|
265 const TUint KCharacterSetIdentifierPortugueseLockingSms7Bit=0x10286408; |
|
266 const TUint KCharacterSetIdentifierPortugueseLockingAndSingleSms7Bit=0x10286409; |
|
267 |
|
268 /** |
|
269 Spanish |
|
270 @publishedAll |
|
271 @released |
|
272 */ |
|
273 const TUint KCharacterSetIdentifierSpanishSingleSms7Bit=0x1028640A; |
|
274 |
|
275 |
|
276 /** |
|
277 code page 949 |
|
278 @publishedAll |
|
279 @released |
|
280 */ |
|
281 const TUint KCharacterSetIdentifierCP949=0x200100FF; |
|
282 |
|
283 /** |
|
284 Shift-JIS with Pictograph |
|
285 @publishedAll |
|
286 @released |
|
287 */ |
|
288 const TUint KCharacterSetIdentifierShiftJisDirectmap=0x101F8691; |
|
289 |
|
290 /** |
|
291 EUC-JP with direct mapped pictograph |
|
292 @publishedAll |
|
293 @released |
|
294 */ |
|
295 const TUint KCharacterSetIdentifierEucJpDirectmap=0x101F86A6; |
|
296 |
|
297 /** |
|
298 EUC-KR |
|
299 @publishedAll |
|
300 @released |
|
301 */ |
|
302 const TUint KCharacterSetIdentifierEUCKR=0x2000E526; |
|
303 |
|
304 /** |
|
305 iscii |
|
306 @publishedAll |
|
307 @released |
|
308 */ |
|
309 const TUint KCharacterSetIdentifierIscii=0x1027508E; |
|
310 |
|
311 /** |
|
312 ISO2022 Korean |
|
313 @publishedAll |
|
314 @released |
|
315 */ |
|
316 const TUint KCharacterSetIdentifierIso2022kr=0x20010101; |
|
317 |
|
318 /** |
|
319 KOI8-R Russian |
|
320 @publishedAll |
|
321 @released |
|
322 */ |
|
323 const TUint KCharacterSetIdentifierKOI8R=0x101F8778; |
|
324 |
|
325 /** |
|
326 KOI8-U Belorusian/Ukrainian Cyrillic |
|
327 @publishedAll |
|
328 @released |
|
329 */ |
|
330 const TUint KCharacterSetIdentifierKOI8U=0x101F8761; |
|
331 |
|
332 /** |
|
333 KSC5601 Korean |
|
334 @publishedAll |
|
335 @released |
|
336 */ |
|
337 const TUint KCharacterSetIdentifierKsc5601=0x200113CD; |
|
338 |
|
339 /** |
|
340 TIS_620 Thai |
|
341 @publishedAll |
|
342 @released |
|
343 */ |
|
344 const TUint KCharacterSetIdentifierTIS_620=0x101F8549; |
|
345 |
|
346 /** |
|
347 Code page 874 Thai |
|
348 @publishedAll |
|
349 @released |
|
350 */ |
|
351 const TUint KCharacterSetIdentifierWin874=0x101F854A; |
|
352 |
|
353 /** |
|
354 Code page 1250 Eastern European |
|
355 @publishedAll |
|
356 @released |
|
357 */ |
|
358 const TUint KCharacterSetIdentifierWin1250=0x100059D6; |
|
359 |
|
360 /** |
|
361 Code page 1251 Cyrillic |
|
362 @publishedAll |
|
363 @released |
|
364 */ |
|
365 const TUint KCharacterSetIdentifierWin1251=0x100059D7; |
|
366 |
|
367 /** |
|
368 Code page 1253 Greek |
|
369 @publishedAll |
|
370 @released |
|
371 */ |
|
372 const TUint KCharacterSetIdentifierWin1253=0x100059D8; |
|
373 |
|
374 /** |
|
375 Code page 1254 Turkish |
|
376 @publishedAll |
|
377 @released |
|
378 */ |
|
379 const TUint KCharacterSetIdentifierWin1254=0x100059D9; |
|
380 |
|
381 /** |
|
382 Code page 1255 Hebrew |
|
383 @publishedAll |
|
384 @released |
|
385 */ |
|
386 const TUint KCharacterSetIdentifierWin1255=0x101F8547; |
|
387 |
|
388 /** |
|
389 Code page 1256 Arabic |
|
390 @publishedAll |
|
391 @released |
|
392 */ |
|
393 const TUint KCharacterSetIdentifierWin1256=0x101F8548; |
|
394 |
|
395 /** |
|
396 Code page 1257 Baltic |
|
397 @publishedAll |
|
398 @released |
|
399 */ |
|
400 const TUint KCharacterSetIdentifierWin1257=0x100059DA; |
|
401 |
|
402 /** |
|
403 Windows-1258 |
|
404 @publishedAll |
|
405 @released |
|
406 */ |
|
407 const TUint KCharacterSetIdentifierWin1258=0x102073B8; |
|
408 |
|
409 // note that other character sets than those listed above may be available at run-time, and also that none of the above are necessarily available at run-time |
|
410 |
|
411 struct SCnvConversionData; |
|
412 class CDeepDestructingArrayOfCharactersSets; |
|
413 class CFileReader; |
|
414 class CStandardNamesAndMibEnums; |
|
415 class RFs; |
|
416 class CCharsetCnvCache; |
|
417 /** |
|
418 Converts text between Unicode and other character sets. |
|
419 |
|
420 The first stage of the conversion is to specify the non-Unicode character |
|
421 set being converted to or from. This is done by calling one of the overloads |
|
422 of PrepareToConvertToOrFromL(). |
|
423 |
|
424 The second stage is to convert the text, using one of the overloads of |
|
425 ConvertFromUnicode() or ConvertToUnicode(). |
|
426 |
|
427 Where possible the first documented overload of PrepareToConvertToOrFromL() |
|
428 should be used because the second overload panics if the specified character |
|
429 set is not available: the first overload simply returns whether the character |
|
430 set is available or not available. However if the conversions are to be |
|
431 performed often, or if the user must select the character set for the |
|
432 conversion from a list, the second overload may be more appropriate. |
|
433 |
|
434 The first overload is less efficient than the second, because it searches |
|
435 through the file system for the selected character set every time it is invoked. |
|
436 The second overload searches through an array of all available character sets. |
|
437 In this method, the file system need only be searched once - when |
|
438 CreateArrayOfCharacterSetsAvailableLC() or |
|
439 CreateArrayOfCharacterSetsAvailableL() is used to create the array. |
|
440 |
|
441 The conversion functions allow users of this class to perform partial |
|
442 conversions on an input descriptor, handling the situation where the input |
|
443 descriptor is truncated mid way through a multi-byte character. This means |
|
444 that you do not have to guess how big to make the output descriptor for a |
|
445 given input descriptor, you can simply do the conversion in a loop using a |
|
446 small output descriptor. The ability to handle truncated descriptors also |
|
447 allows users of the class to convert information received in chunks from an |
|
448 external source. |
|
449 |
|
450 The class also provides a number of utility functions. |
|
451 @publishedAll |
|
452 @released |
|
453 */ |
|
454 class CCnvCharacterSetConverter : public CBase |
|
455 { |
|
456 public: |
|
457 /** Indicates whether a character set is available or unavailable |
|
458 for conversion. Used by the second overload of |
|
459 PrepareToConvertToOrFromL(). */ |
|
460 enum TAvailability |
|
461 { |
|
462 /** The requested character set can be converted. */ |
|
463 EAvailable, |
|
464 /** The requested character set cannot be converted. */ |
|
465 ENotAvailable |
|
466 }; |
|
467 |
|
468 /** Conversion error flags. At this stage there is only one error |
|
469 flag- others may be added in the future. */ |
|
470 enum TError |
|
471 { |
|
472 /** The input descriptor contains a single corrupt character. This |
|
473 might occur when the input descriptor only contains some of the bytes |
|
474 of a single multi-byte character. */ |
|
475 EErrorIllFormedInput=KErrCorrupt |
|
476 }; |
|
477 |
|
478 /** Specifies the default endian-ness of the current character set. |
|
479 Used by SetDefaultEndiannessOfForeignCharacters(). */ |
|
480 enum TEndianness |
|
481 { |
|
482 /** The character set is big-endian. */ |
|
483 ELittleEndian, |
|
484 /** The character set is little-endian. */ |
|
485 EBigEndian |
|
486 }; |
|
487 |
|
488 /** Downgrade for line and paragraph separators */ |
|
489 enum TDowngradeForExoticLineTerminatingCharacters |
|
490 { |
|
491 /** Paragraph/line separators should be downgraded (if necessary) |
|
492 into carriage return and line feed pairs. */ |
|
493 EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed, |
|
494 /** Paragraph/line separators should be downgraded (if necessary) |
|
495 into a line feed only. */ |
|
496 EDowngradeExoticLineTerminatingCharactersToJustLineFeed |
|
497 }; |
|
498 |
|
499 /** Output flag used to indicate whether or not a character in the source |
|
500 descriptor is the first half of a surrogate pair, but is the last |
|
501 character in the descriptor to convert. |
|
502 |
|
503 Note: This enumeration can be used in the DoConvertToUnicode() and |
|
504 DoConvertFromUnicode() functions. These are part of the |
|
505 Character Conversion Plug-in Provider API and are for use by plug-in |
|
506 conversion libraries only. |
|
507 @since 6.0 */ |
|
508 enum |
|
509 { |
|
510 /** Appends the converted text to the output descriptor.*/ |
|
511 EInputConversionFlagAppend =0x00010000, |
|
512 /** By default, when the input descriptor passed to DoConvertFromUnicode() |
|
513 or DoConvertToUnicode() consists of nothing but a truncated sequence, |
|
514 the error-code EErrorIllFormedInput is returned. |
|
515 If this behaviour is undesirable, the input flag |
|
516 EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable |
|
517 should be set. */ |
|
518 EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable =0x00020000, |
|
519 /** Stops converting when the first unconvertible character is reached. */ |
|
520 EInputConversionFlagStopAtFirstUnconvertibleCharacter =0x00040000, |
|
521 /** Appends the default character set Escape sequence at end of converted text */ |
|
522 EInputConversionFlagMustEndInDefaultCharacterSet =0x00080000, |
|
523 /*defect fix: INC053609; According to RFC1468 we can assume the line starts |
|
524 in ASCII so there is no need to always insert an escape sequence*/ |
|
525 EInputConversionFlagAssumeStartInDefaultCharacterSet =0x00100000 |
|
526 }; |
|
527 enum |
|
528 { |
|
529 /** Indicates whether or not the source descriptor ends in a truncated |
|
530 sequence, e.g. the first half only of a surrogate pair. */ |
|
531 EOutputConversionFlagInputIsTruncated =0x01000000 |
|
532 }; |
|
533 |
|
534 /** Initial value for the state argument in a set of related calls to |
|
535 ConvertToUnicode(). */ |
|
536 enum {KStateDefault=0}; |
|
537 enum |
|
538 { |
|
539 /** The lowest confidence value for a character set accepted by |
|
540 Autodetect*/ |
|
541 ELowestThreshold = 25 |
|
542 }; |
|
543 |
|
544 /** Stores information about a non-Unicode character set. The information |
|
545 is used to locate the conversion information required by |
|
546 ConvertFromUnicode() and ConvertToUnicode(). |
|
547 |
|
548 An array of these structs that contain all available character sets |
|
549 can be generated by CreateArrayOfCharacterSetsAvailableLC() and |
|
550 CreateArrayOfCharacterSetsAvailableL(), and is used by one of the |
|
551 overloads of PrepareToConvertToOrFromL(). */ |
|
552 struct SCharacterSet |
|
553 { |
|
554 /** Gets the character sets UID. |
|
555 |
|
556 @return The UID of the character set. */ |
|
557 inline TUint Identifier() const {return iIdentifier;} |
|
558 |
|
559 /** Tests whether a filename given by the function SCharacterSet::Name() |
|
560 is a real file name (i.e. conversion is provided by a plug in DLL), or |
|
561 just the character set name (i.e. conversion is built into Symbian OS). |
|
562 |
|
563 Note: If the function returns ETrue then the path and filename can be |
|
564 parsed using TParse or TParsePtrC functions to obtain just the filename. |
|
565 |
|
566 @return ETrue if the name is a real filename. EFalse if it is just the |
|
567 character set name. */ |
|
568 inline TBool NameIsFileName() const {return iFlags&EFlagNameIsFileName;} |
|
569 |
|
570 /** Gets the full path and filename of the DLL which implements |
|
571 conversion for the character set. |
|
572 |
|
573 If the character set is one for which conversion is built into Symbian |
|
574 OS rather than implemented by a plug in DLL, the function just returns |
|
575 the name of the character set. The NameIsFileName() function can be |
|
576 used to determine whether or not it is legal to create a TParsePtrC |
|
577 object over the descriptor returned by Name(). |
|
578 |
|
579 Notes: |
|
580 |
|
581 The name returned cannot be treated as an Internet-standard name, it |
|
582 is locale-independent and should be mapped to the locale-dependent name |
|
583 by software at a higher level before being shown to the user. Conversion |
|
584 from Internet-standard names of character sets to the UID identifiers |
|
585 is provided by the member function |
|
586 ConvertStandardNameOfCharacterSetToIdentifierL(). |
|
587 |
|
588 Typically, to find the user-displayable name (as opposed to the |
|
589 internet-standard name) of a character set, you would do something |
|
590 like this: |
|
591 |
|
592 @code |
|
593 const CCnvCharacterSetConverter::SCharacterSet& characterSet=...; |
|
594 const TPtrC userDisplayable(characterSet.NameIsFileName()? TParsePtrC(characterSet.Name()).Name(): |
|
595 characterSet.Name()); |
|
596 @endcode |
|
597 |
|
598 @return Full path and filename of the character set converter plug in |
|
599 DLL, or just the name of the character set. */ |
|
600 inline TPtrC Name() const {return *iName;} |
|
601 private: |
|
602 enum |
|
603 { |
|
604 EFlagNameIsFileName =0x00000001, |
|
605 EFlagFileIsConversionPlugInLibrary =0x00000002 |
|
606 }; |
|
607 private: |
|
608 inline TBool FileIsConversionPlugInLibrary() const {return iFlags&EFlagFileIsConversionPlugInLibrary;} |
|
609 private: |
|
610 TUint iIdentifier; |
|
611 TUint iFlags; |
|
612 HBufC* iName; |
|
613 private: |
|
614 friend class CCnvCharacterSetConverter; |
|
615 friend class CDeepDestructingArrayOfCharactersSets; |
|
616 }; //SCharacterSet |
|
617 |
|
618 |
|
619 /** |
|
620 Holds an ascending array of the indices of the characters in the |
|
621 source Unicode text which could not be converted by |
|
622 CCnvCharacterSetConverter::ConvertFromUnicode() into the foreign |
|
623 character set |
|
624 @publishedAll |
|
625 @released |
|
626 */ |
|
627 class TArrayOfAscendingIndices |
|
628 { |
|
629 public: |
|
630 /** The return value of CCnvCharacterSetConverter::AppendIndex(). */ |
|
631 enum TAppendResult |
|
632 { |
|
633 /** The append failed. */ |
|
634 EAppendFailed, |
|
635 /** The append succeeded. */ |
|
636 EAppendSuccessful |
|
637 }; |
|
638 public: |
|
639 /** C++ constructor. The array is initialised to be of length zero. */ |
|
640 inline TArrayOfAscendingIndices() :iArrayOfIndices(0) {} |
|
641 |
|
642 IMPORT_C TAppendResult AppendIndex(TInt aIndex); |
|
643 |
|
644 /** Deletes a single index from the array. |
|
645 |
|
646 @param aIndexOfIndex The index of the index to delete. Must not be |
|
647 negative and must not be greater than the length of the array, or a |
|
648 panic occurs. */ |
|
649 inline void Remove(TInt aIndexOfIndex) {iArrayOfIndices.Delete(aIndexOfIndex, 1);} |
|
650 |
|
651 /** Deletes all indices from the array. */ |
|
652 inline void RemoveAll() {iArrayOfIndices.SetLength(0);} |
|
653 |
|
654 /** Returns the number of indices in the array. |
|
655 |
|
656 @return The number of indices in the array. */ |
|
657 inline TInt NumberOfIndices() const {return iArrayOfIndices.Length();} |
|
658 |
|
659 /** Gets the value of the specified index. |
|
660 |
|
661 @param aIndexOfIndex Index into the array. |
|
662 @return The value of the index. */ |
|
663 inline TInt operator[](TInt aIndexOfIndex) const {return iArrayOfIndices[aIndexOfIndex];} |
|
664 private: |
|
665 enum {KMaximumNumberOfIndices=25}; |
|
666 private: |
|
667 TBuf16<KMaximumNumberOfIndices> iArrayOfIndices; |
|
668 }; |
|
669 public: |
|
670 IMPORT_C static CCnvCharacterSetConverter* NewL(); |
|
671 IMPORT_C static CCnvCharacterSetConverter* NewLC(); |
|
672 IMPORT_C virtual ~CCnvCharacterSetConverter(); |
|
673 IMPORT_C static CArrayFix<SCharacterSet>* CreateArrayOfCharacterSetsAvailableL(RFs& aFileServerSession); |
|
674 IMPORT_C static CArrayFix<SCharacterSet>* CreateArrayOfCharacterSetsAvailableLC(RFs& aFileServerSession); |
|
675 IMPORT_C TUint ConvertStandardNameOfCharacterSetToIdentifierL(const TDesC8& aStandardNameOfCharacterSet, RFs& aFileServerSession); |
|
676 IMPORT_C HBufC8* ConvertCharacterSetIdentifierToStandardNameL(TUint aCharacterSetIdentifier, RFs& aFileServerSession); |
|
677 IMPORT_C TUint ConvertMibEnumOfCharacterSetToIdentifierL(TInt aMibEnumOfCharacterSet, RFs& aFileServerSession); |
|
678 IMPORT_C TInt ConvertCharacterSetIdentifierToMibEnumL(TUint aCharacterSetIdentifier, RFs& aFileServerSession); |
|
679 IMPORT_C void PrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, RFs& aFileServerSession); |
|
680 IMPORT_C TAvailability PrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, RFs& aFileServerSession); |
|
681 // the following attribute-setting functions should be called (if at all) after calling PrepareToConvertToOrFromL and before calling ConvertFromUnicode and/or ConvertToUnicode |
|
682 IMPORT_C void SetDefaultEndiannessOfForeignCharacters(TEndianness aEndianness); |
|
683 IMPORT_C void SetDowngradeForExoticLineTerminatingCharacters(TDowngradeForExoticLineTerminatingCharacters aDowngradeForExoticLineTerminatingCharacters); // by default this attribute is set to EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed |
|
684 IMPORT_C void SetReplacementForUnconvertibleUnicodeCharactersL(const TDesC8& aReplacementForUnconvertibleUnicodeCharacters); // must be a single character preceded by its escape sequence (if any), and must be little-endian if the endianness of the character-set is unspecified, otherwise in the same endianness as the character-set |
|
685 |
|
686 // the conversion functions return either one of the TError values above, or the number of unconverted elements left at the end of the input descriptor |
|
687 IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode) const; |
|
688 IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TInt& aNumberOfUnconvertibleCharacters) const; |
|
689 IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstUnconvertibleCharacter) const; |
|
690 IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) const; |
|
691 IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState) const; |
|
692 IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters) const; |
|
693 IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) const; |
|
694 IMPORT_C static void AutoDetectCharacterSetL(TInt& aConfidenceLevel, TUint& aCharacterSetIdentifier, const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample); |
|
695 IMPORT_C void AutoDetectCharSetL(TInt& aConfidenceLevel, TUint& aCharacterSetIdentifier, const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample); |
|
696 IMPORT_C static void ConvertibleToCharacterSetL(TInt& aConfidenceLevel, const TUint aCharacterSetIdentifier,const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample); |
|
697 IMPORT_C void ConvertibleToCharSetL(TInt& aConfidenceLevel, const TUint aCharacterSetIdentifier,const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample); |
|
698 IMPORT_C void SetMaxCacheSize(TInt aSize); |
|
699 // the following functions are only to be called by conversion plug-in libraries |
|
700 IMPORT_C static TInt DoConvertFromUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters); |
|
701 IMPORT_C static TInt DoConvertFromUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TUint& aOutputConversionFlags, TUint aInputConversionFlags); |
|
702 IMPORT_C static TInt DoConvertToUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter); |
|
703 IMPORT_C static TInt DoConvertToUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, TUint& aOutputConversionFlags, TUint aInputConversionFlags); |
|
704 IMPORT_C static const SCnvConversionData& AsciiConversionData(); |
|
705 inline TDowngradeForExoticLineTerminatingCharacters GetDowngradeForExoticLineTerminatingCharacters () |
|
706 { |
|
707 return iDowngradeForExoticLineTerminatingCharacters ; |
|
708 } ; |
|
709 |
|
710 private: |
|
711 enum |
|
712 { |
|
713 EStoredFlagOwnsConversionData =0x00000001, |
|
714 EStoredFlagConversionPlugInLibraryIsLoaded =0x00000002 |
|
715 }; |
|
716 enum TCharacterSetSearch |
|
717 { |
|
718 EStopCharacterSetSearch, |
|
719 EContinueCharacterSetSearch |
|
720 }; |
|
721 enum TConversionPlugInFunctionOrdinals |
|
722 { |
|
723 EReplacementForUnconvertibleUnicodeCharacters=1, |
|
724 EConvertFromUnicode=2, |
|
725 EConvertToUnicode=3, |
|
726 EIsInThisCharacterSet=4 |
|
727 }; |
|
728 |
|
729 private: |
|
730 CCnvCharacterSetConverter(); |
|
731 void ConstructL(); |
|
732 static CArrayFix<SCharacterSet>* DoCreateArrayOfCharacterSetsAvailableLC(RFs& aFileServerSession, TUint aIdentifierOfOnlyCharacterSetOfInterest); |
|
733 static TCharacterSetSearch AppendHardCodedCharacterSetIfRequiredL(CArrayFix<SCharacterSet>& aArrayOfCharacterSets, TUint aIdentifierOfOnlyCharacterSetOfInterest, TUint aIdentifierOfHardCodedCharacterSet, const TDesC& aNameOfHardCodedCharacterSet); |
|
734 void ScanForStandardNamesAndMibEnumsL(RFs& aFileServerSession); |
|
735 void ScanForStandardNamesAndMibEnumsROMOnlyL(RFs& aFileServerSession); |
|
736 TAvailability DoPrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, const CArrayFix<SCharacterSet>* aArrayOfCharacterSetsAvailable, RFs& aFileServerSession); |
|
737 static void DeleteConversionData(const SCnvConversionData* aConversionData); |
|
738 static void DeleteConversionData(TAny* aConversionData); |
|
739 static TEndianness EndiannessOfForeignCharacters(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters); |
|
740 |
|
741 private: |
|
742 TUint iStoredFlags; |
|
743 TUint iCharacterSetIdentifierOfLoadedConversionData; // 0 or a UID of the loaded plugin |
|
744 const SCnvConversionData* iConversionData; |
|
745 TEndianness iDefaultEndiannessOfForeignCharacters; |
|
746 TDowngradeForExoticLineTerminatingCharacters iDowngradeForExoticLineTerminatingCharacters; |
|
747 TBuf8<KMaximumLengthOfReplacementForUnconvertibleUnicodeCharacters> iReplacementForUnconvertibleUnicodeCharacters; |
|
748 CStandardNamesAndMibEnums* iStandardNamesAndMibEnums; |
|
749 TBool iTlsDataConstructed; |
|
750 CCharsetCnvCache* iCharsetCnvCache; |
|
751 TBool iIsSystemStandardNamesAndMibEnumsScanned; |
|
752 }; |
|
753 |
|
754 #endif |
|
755 |