charconvfw/charconv_fw/inc/charconv.h
changeset 0 1fb32624e06b
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 #if !defined(__CHARCONV_H__)
       
    20 #define __CHARCONV_H__
       
    21 
       
    22 #if !defined(__E32STD_H__)
       
    23 #include <e32std.h>
       
    24 #endif
       
    25 
       
    26 #if !defined(__E32BASE_H__)
       
    27 #include <e32base.h>
       
    28 #endif
       
    29 
       
    30 /** 
       
    31 The maximum length in bytes of the replacement text for unconvertible Unicode 
       
    32 characters (=50) (see CCnvCharacterSetConverter::SetReplacementForUnconvertibleUnicodeCharactersL()). 
       
    33 @publishedAll
       
    34 @released
       
    35 */
       
    36 const TInt KMaximumLengthOfReplacementForUnconvertibleUnicodeCharacters=50;
       
    37 
       
    38 /** 
       
    39 UTF-7 
       
    40 @publishedAll
       
    41 @released
       
    42 */
       
    43 const TUint KCharacterSetIdentifierUtf7=0x1000582c;
       
    44 /** 
       
    45 UTF-8 
       
    46 @publishedAll
       
    47 @released
       
    48 */
       
    49 const TUint KCharacterSetIdentifierUtf8=0x1000582d;
       
    50 /** 
       
    51 IMAP UTF-7 
       
    52 @publishedAll
       
    53 @released
       
    54 */
       
    55 const TUint KCharacterSetIdentifierImapUtf7=0x1000582e;
       
    56 /** 
       
    57 Java UTF-8 
       
    58 @publishedAll
       
    59 @released
       
    60 */
       
    61 const TUint KCharacterSetIdentifierJavaConformantUtf8=0x1000582f;
       
    62 /** 
       
    63 Code Page 1252 
       
    64 @publishedAll
       
    65 @released
       
    66 */
       
    67 const TUint KCharacterSetIdentifierCodePage1252=0x100012b6;
       
    68 /** 
       
    69 ISO 8859-1 
       
    70 @publishedAll
       
    71 @released
       
    72 */
       
    73 const TUint KCharacterSetIdentifierIso88591=0x10003b10;
       
    74 /** 
       
    75 ISO 8859-2 
       
    76 @publishedAll
       
    77 @released
       
    78 */
       
    79 const TUint KCharacterSetIdentifierIso88592=0x1000507e;
       
    80 /** 
       
    81 ISO 8859-3 
       
    82 @publishedAll
       
    83 @released
       
    84 */
       
    85 const TUint KCharacterSetIdentifierIso88593=0x10008a28;
       
    86 /** 
       
    87 ISO 8859-4 
       
    88 @publishedAll
       
    89 @released
       
    90 */
       
    91 const TUint KCharacterSetIdentifierIso88594=0x1000507f;
       
    92 /** 
       
    93 ISO 8859-5 
       
    94 @publishedAll
       
    95 @released
       
    96 */
       
    97 const TUint KCharacterSetIdentifierIso88595=0x10005080;
       
    98 /** 
       
    99 ISO 8859-6 
       
   100 @publishedAll
       
   101 @released
       
   102 */
       
   103 const TUint KCharacterSetIdentifierIso88596=0x10008a29;
       
   104 /** 
       
   105 ISO 8859-7 
       
   106 @publishedAll
       
   107 @released
       
   108 */
       
   109 const TUint KCharacterSetIdentifierIso88597=0x10005081;
       
   110 /** 
       
   111 ISO 8859-8 
       
   112 @publishedAll
       
   113 @released
       
   114 */
       
   115 const TUint KCharacterSetIdentifierIso88598=0x10008a2a;
       
   116 /** 
       
   117 ISO 8859-9 
       
   118 @publishedAll
       
   119 @released
       
   120 */
       
   121 const TUint KCharacterSetIdentifierIso88599=0x10005082;
       
   122 /** 
       
   123 ISO 8859-10 
       
   124 @publishedAll
       
   125 @released
       
   126 */
       
   127 const TUint KCharacterSetIdentifierIso885910=0x10008a2b;
       
   128 /** 
       
   129 ISO 8859-13 
       
   130 @publishedAll
       
   131 @released
       
   132 */
       
   133 const TUint KCharacterSetIdentifierIso885913=0x10008a2c;
       
   134 /** 
       
   135 ISO 8859-14 
       
   136 @publishedAll
       
   137 @released
       
   138 */
       
   139 const TUint KCharacterSetIdentifierIso885914=0x10008a2d;
       
   140 /** 
       
   141 ISO 8859-15 
       
   142 @publishedAll
       
   143 @released
       
   144 */
       
   145 const TUint KCharacterSetIdentifierIso885915=0x10008a2e;
       
   146 /** 
       
   147 ASCII 
       
   148 @publishedAll
       
   149 @released
       
   150 */
       
   151 const TUint KCharacterSetIdentifierAscii=0x10004cc6;
       
   152 /** 
       
   153 SMS 7-bit 
       
   154 @publishedAll
       
   155 @released
       
   156 */
       
   157 const TUint KCharacterSetIdentifierSms7Bit=0x100053ab;
       
   158 /** 
       
   159 GB 2312 
       
   160 @publishedAll
       
   161 @released
       
   162 */
       
   163 const TUint KCharacterSetIdentifierGb2312=0x10000fbe;
       
   164 /** 
       
   165 HZ-GB-2312 
       
   166 @publishedAll
       
   167 @released
       
   168 */
       
   169 const TUint KCharacterSetIdentifierHz=0x10006065;
       
   170 /** 
       
   171 GB 12345 
       
   172 @publishedAll
       
   173 @released
       
   174 */
       
   175 const TUint KCharacterSetIdentifierGb12345=0x1000401a;
       
   176 /** 
       
   177 GBK 
       
   178 @publishedAll
       
   179 @released
       
   180 */
       
   181 const TUint KCharacterSetIdentifierGbk=0x10003ecb;
       
   182 /** 
       
   183 GB18030
       
   184 @publishedAll
       
   185 @released
       
   186 */
       
   187 const TUint KCharacterSetIdentifierGb18030=0x10287038;
       
   188 /** 
       
   189 Big 5 
       
   190 @publishedAll
       
   191 @released
       
   192 */
       
   193 const TUint KCharacterSetIdentifierBig5=0x10000fbf;
       
   194 /** 
       
   195 Shift-JIS 
       
   196 @publishedAll
       
   197 @released
       
   198 */
       
   199 const TUint KCharacterSetIdentifierShiftJis=0x10000fbd;
       
   200 /** 
       
   201 ISO-2022-JP 
       
   202 @publishedAll
       
   203 @released
       
   204 */
       
   205 const TUint KCharacterSetIdentifierIso2022Jp=0x100066a0;
       
   206 /** 
       
   207 ISO-2022-JP-1 
       
   208 @publishedAll
       
   209 @released
       
   210 */
       
   211 const TUint KCharacterSetIdentifierIso2022Jp1=0x100066a3;
       
   212 /** 
       
   213 JIS Encoding 
       
   214 @publishedAll
       
   215 @released
       
   216 */
       
   217 const TUint KCharacterSetIdentifierJis=0x10006066;
       
   218 /** 
       
   219 EUC-JP 
       
   220 @publishedAll
       
   221 @released
       
   222 */
       
   223 const TUint KCharacterSetIdentifierEucJpPacked=0x10006067;
       
   224 
       
   225 /** 
       
   226 JP5 
       
   227 @publishedAll
       
   228 @released
       
   229 */
       
   230 const TUint KCharacterSetIdentifierJ5=0x1020D408;
       
   231 /** 
       
   232 CP850 
       
   233 @publishedAll
       
   234 @released
       
   235 */
       
   236 const TUint KCharacterSetIdentifierCP850=0x102825AD;
       
   237 
       
   238 const TUint KCharacterSetIdentifierUnicodeLittle=0x101f3fae;  //Little Endian Unicode
       
   239 const TUint KCharacterSetIdentifierUnicodeBig=0x101f4052; // Big Endian Unicode 
       
   240 const TUint KCharacterSetIdentifierUcs2=0x101ff492; 
       
   241 
       
   242 
       
   243 /** 
       
   244 Extended SMS 7-bit 
       
   245 @publishedAll
       
   246 @released
       
   247 */
       
   248 const TUint KCharacterSetIdentifierExtendedSms7Bit=0x102863FD;
       
   249 
       
   250 /** 
       
   251 Turkish 
       
   252 @publishedAll
       
   253 @released
       
   254 */
       
   255 const TUint KCharacterSetIdentifierTurkishSingleSms7Bit=0x102863FE;
       
   256 const TUint KCharacterSetIdentifierTurkishLockingSms7Bit=0x102863FF;
       
   257 const TUint KCharacterSetIdentifierTurkishLockingAndSingleSms7Bit=0x10286400;
       
   258 
       
   259 /** 
       
   260 Portuguese 
       
   261 @publishedAll
       
   262 @released
       
   263 */
       
   264 const TUint KCharacterSetIdentifierPortugueseSingleSms7Bit=0x10286407;
       
   265 const TUint KCharacterSetIdentifierPortugueseLockingSms7Bit=0x10286408;
       
   266 const TUint KCharacterSetIdentifierPortugueseLockingAndSingleSms7Bit=0x10286409;
       
   267 
       
   268 /** 
       
   269 Spanish
       
   270 @publishedAll
       
   271 @released
       
   272 */
       
   273 const TUint KCharacterSetIdentifierSpanishSingleSms7Bit=0x1028640A;
       
   274 
       
   275 
       
   276 /**
       
   277 code page 949
       
   278 @publishedAll
       
   279 @released
       
   280 */
       
   281 const TUint KCharacterSetIdentifierCP949=0x200100FF;
       
   282 
       
   283 /**
       
   284 Shift-JIS with Pictograph
       
   285 @publishedAll
       
   286 @released 
       
   287 */
       
   288 const TUint KCharacterSetIdentifierShiftJisDirectmap=0x101F8691;
       
   289 
       
   290 /**
       
   291 EUC-JP with direct mapped pictograph
       
   292 @publishedAll
       
   293 @released 
       
   294 */
       
   295 const TUint KCharacterSetIdentifierEucJpDirectmap=0x101F86A6;
       
   296 
       
   297 /**
       
   298 EUC-KR 
       
   299 @publishedAll
       
   300 @released
       
   301 */
       
   302 const TUint KCharacterSetIdentifierEUCKR=0x2000E526;
       
   303 
       
   304 /**
       
   305 iscii 
       
   306 @publishedAll
       
   307 @released
       
   308 */
       
   309 const TUint KCharacterSetIdentifierIscii=0x1027508E;
       
   310 
       
   311 /**
       
   312 ISO2022 Korean
       
   313 @publishedAll
       
   314 @released
       
   315 */
       
   316 const TUint KCharacterSetIdentifierIso2022kr=0x20010101;
       
   317 
       
   318 /**
       
   319 KOI8-R Russian
       
   320 @publishedAll
       
   321 @released
       
   322 */
       
   323 const TUint KCharacterSetIdentifierKOI8R=0x101F8778;
       
   324 
       
   325 /**
       
   326 KOI8-U Belorusian/Ukrainian Cyrillic
       
   327 @publishedAll
       
   328 @released 
       
   329 */
       
   330 const TUint KCharacterSetIdentifierKOI8U=0x101F8761;
       
   331 
       
   332 /**
       
   333 KSC5601 Korean
       
   334 @publishedAll
       
   335 @released 
       
   336 */
       
   337 const TUint KCharacterSetIdentifierKsc5601=0x200113CD;
       
   338 
       
   339 /**
       
   340 TIS_620 Thai
       
   341 @publishedAll
       
   342 @released 
       
   343 */
       
   344 const TUint KCharacterSetIdentifierTIS_620=0x101F8549;
       
   345 
       
   346 /**
       
   347 Code page 874 Thai
       
   348 @publishedAll
       
   349 @released 
       
   350 */
       
   351 const TUint KCharacterSetIdentifierWin874=0x101F854A;
       
   352 
       
   353 /**
       
   354 Code page 1250 Eastern European
       
   355 @publishedAll
       
   356 @released 
       
   357 */
       
   358 const TUint KCharacterSetIdentifierWin1250=0x100059D6;
       
   359 
       
   360 /**
       
   361 Code page 1251 Cyrillic
       
   362 @publishedAll
       
   363 @released 
       
   364 */
       
   365 const TUint KCharacterSetIdentifierWin1251=0x100059D7;
       
   366 
       
   367 /**
       
   368 Code page 1253 Greek
       
   369 @publishedAll
       
   370 @released 
       
   371 */
       
   372 const TUint KCharacterSetIdentifierWin1253=0x100059D8;
       
   373 
       
   374 /**
       
   375 Code page 1254 Turkish
       
   376 @publishedAll
       
   377 @released 
       
   378 */
       
   379 const TUint KCharacterSetIdentifierWin1254=0x100059D9;
       
   380 
       
   381 /**
       
   382 Code page 1255 Hebrew
       
   383 @publishedAll
       
   384 @released 
       
   385 */
       
   386 const TUint KCharacterSetIdentifierWin1255=0x101F8547;
       
   387 
       
   388 /**
       
   389 Code page 1256 Arabic
       
   390 @publishedAll
       
   391 @released 
       
   392 */
       
   393 const TUint KCharacterSetIdentifierWin1256=0x101F8548;
       
   394 
       
   395 /**
       
   396 Code page 1257 Baltic
       
   397 @publishedAll
       
   398 @released 
       
   399 */
       
   400 const TUint KCharacterSetIdentifierWin1257=0x100059DA;
       
   401 
       
   402 /**
       
   403 Windows-1258
       
   404 @publishedAll
       
   405 @released
       
   406 */ 
       
   407 const TUint KCharacterSetIdentifierWin1258=0x102073B8;
       
   408 
       
   409 // note that other character sets than those listed above may be available at run-time, and also that none of the above are necessarily available at run-time
       
   410 
       
   411 struct SCnvConversionData;
       
   412 class CDeepDestructingArrayOfCharactersSets;
       
   413 class CFileReader;
       
   414 class CStandardNamesAndMibEnums;
       
   415 class RFs;
       
   416 class CCharsetCnvCache;
       
   417 /** 
       
   418 Converts text between Unicode and other character sets. 
       
   419 
       
   420 The first stage of the conversion is to specify the non-Unicode character 
       
   421 set being converted to or from. This is done by calling one of the overloads 
       
   422 of PrepareToConvertToOrFromL().
       
   423 
       
   424 The second stage is to convert the text, using one of the overloads of 
       
   425 ConvertFromUnicode() or ConvertToUnicode().
       
   426 
       
   427 Where possible the first documented overload of PrepareToConvertToOrFromL() 
       
   428 should be used because the second overload panics if the specified character 
       
   429 set is not available: the first overload simply returns whether the character 
       
   430 set is available or not available. However if the conversions are to be 
       
   431 performed often, or if the user must select the character set for the 
       
   432 conversion from a list, the second overload may be more appropriate.
       
   433 
       
   434 The first overload is less efficient than the second, because it searches 
       
   435 through the file system for the selected character set every time it is invoked. 
       
   436 The second overload searches through an array of all available character sets. 
       
   437 In this method, the file system need only be searched once - when 
       
   438 CreateArrayOfCharacterSetsAvailableLC() or 
       
   439 CreateArrayOfCharacterSetsAvailableL() is used to create the array.
       
   440 
       
   441 The conversion functions allow users of this class to perform partial 
       
   442 conversions on an input descriptor, handling the situation where the input 
       
   443 descriptor is truncated mid way through a multi-byte character. This means 
       
   444 that you do not have to guess how big to make the output descriptor for a 
       
   445 given input descriptor, you can simply do the conversion in a loop using a 
       
   446 small output descriptor. The ability to handle truncated descriptors also 
       
   447 allows users of the class to convert information received in chunks from an 
       
   448 external source.
       
   449 
       
   450 The class also provides a number of utility functions. 
       
   451 @publishedAll
       
   452 @released
       
   453 */
       
   454 class CCnvCharacterSetConverter : public CBase
       
   455 	{
       
   456 public:
       
   457 	/** Indicates whether a character set is available or unavailable
       
   458 	for conversion. Used by the second overload of 
       
   459 	PrepareToConvertToOrFromL(). */
       
   460 	enum TAvailability
       
   461 		{
       
   462 		/** The requested character set can be converted. */
       
   463 		EAvailable,
       
   464 		/** The requested character set cannot be converted. */
       
   465 		ENotAvailable
       
   466 		};
       
   467 
       
   468 	/** Conversion error flags. At this stage there is only one error 
       
   469 	flag- others may be added in the future. */
       
   470 	enum TError
       
   471 		{
       
   472 		/** The input descriptor contains a single corrupt character. This 
       
   473 		might occur when the input descriptor only contains some of the bytes 
       
   474 		of a single multi-byte character. */
       
   475 		EErrorIllFormedInput=KErrCorrupt
       
   476 		};
       
   477 
       
   478 	/** Specifies the default endian-ness of the current character set. 
       
   479 	Used by SetDefaultEndiannessOfForeignCharacters(). */
       
   480 	enum TEndianness
       
   481 		{
       
   482 		/** The character set is big-endian. */
       
   483 		ELittleEndian,
       
   484 		/** The character set is little-endian. */
       
   485 		EBigEndian
       
   486 		};
       
   487 	
       
   488 	/** Downgrade for line and paragraph separators */
       
   489 	enum TDowngradeForExoticLineTerminatingCharacters
       
   490 		{
       
   491 		/** Paragraph/line separators should be downgraded (if necessary) 
       
   492 		into carriage return and line feed pairs. */
       
   493 		EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed,
       
   494 		/** Paragraph/line separators should be downgraded (if necessary) 
       
   495 		into a line feed only. */
       
   496 		EDowngradeExoticLineTerminatingCharactersToJustLineFeed
       
   497 		};
       
   498 
       
   499 	/** Output flag used to indicate whether or not a character in the source
       
   500 	descriptor is the first half of a surrogate pair, but is the last
       
   501 	character in the descriptor to convert.
       
   502 	 
       
   503 	Note: This enumeration can be used in the DoConvertToUnicode() and
       
   504 	DoConvertFromUnicode() functions. These are part of the
       
   505 	Character Conversion Plug-in Provider API and are for use by plug-in
       
   506 	conversion libraries only.
       
   507 	@since 6.0 */
       
   508 	enum
       
   509 		{
       
   510 		/** Appends the converted text to the output descriptor.*/
       
   511 		EInputConversionFlagAppend	=0x00010000,
       
   512 		/** By default, when the input descriptor passed to DoConvertFromUnicode()
       
   513 		or DoConvertToUnicode() consists of nothing but a truncated sequence, 
       
   514 		the error-code EErrorIllFormedInput is returned. 
       
   515 		If this behaviour is undesirable, the input flag  
       
   516 		EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable
       
   517 		should be set. */
       
   518 		EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable	=0x00020000,
       
   519 		/** Stops converting when the first unconvertible character is reached. */
       
   520 		EInputConversionFlagStopAtFirstUnconvertibleCharacter			=0x00040000,
       
   521 		/** Appends the default character set Escape sequence at end of converted text */
       
   522 		EInputConversionFlagMustEndInDefaultCharacterSet				=0x00080000,
       
   523 		/*defect fix: INC053609; According to RFC1468 we can assume the line starts 
       
   524 		in ASCII so there is no need to always insert an escape sequence*/
       
   525 		EInputConversionFlagAssumeStartInDefaultCharacterSet			=0x00100000
       
   526 		};
       
   527 	enum
       
   528 		{
       
   529 		/** Indicates whether or not the source descriptor ends in a truncated
       
   530 		sequence, e.g. the first half only of a surrogate pair. */
       
   531 		EOutputConversionFlagInputIsTruncated							=0x01000000
       
   532 		};
       
   533 
       
   534 		/** Initial value for the state argument in a set of related calls to
       
   535 		ConvertToUnicode(). */
       
   536 	enum {KStateDefault=0};
       
   537 	enum 
       
   538 		{
       
   539 		/** The lowest confidence value for a character set accepted by 
       
   540 		Autodetect*/
       
   541 		ELowestThreshold = 25
       
   542 		};
       
   543 		
       
   544 	/** Stores information about a non-Unicode character set. The information 
       
   545 	is used	to locate the conversion information required by 
       
   546 	ConvertFromUnicode() and ConvertToUnicode().
       
   547 
       
   548 	An array of these structs that contain all available character sets 
       
   549 	can be generated by CreateArrayOfCharacterSetsAvailableLC() and 
       
   550 	CreateArrayOfCharacterSetsAvailableL(), and is used by one of the 
       
   551 	overloads of PrepareToConvertToOrFromL(). */
       
   552 	struct SCharacterSet
       
   553 		{
       
   554 		/** Gets the character sets UID.
       
   555 	
       
   556 		@return The UID of the character set. */
       
   557 		inline TUint Identifier() const {return iIdentifier;}
       
   558 
       
   559 		/** Tests whether a filename given by the function SCharacterSet::Name() 
       
   560 		is a real file name (i.e. conversion is provided by a plug in DLL), or 
       
   561 		just the character set name (i.e. conversion is built into Symbian OS).
       
   562 		
       
   563 		Note: If the function returns ETrue then the path and filename can be 
       
   564 		parsed using TParse or TParsePtrC functions to obtain just the filename.
       
   565 		
       
   566 		@return ETrue if the name is a real filename. EFalse if it is just the 
       
   567 		character set name. */
       
   568 		inline TBool NameIsFileName() const {return iFlags&EFlagNameIsFileName;}
       
   569 
       
   570 		/** Gets the full path and filename of the DLL which implements 
       
   571 		conversion for the character set. 
       
   572 		
       
   573 		If the character set is one for which conversion is built into Symbian 
       
   574 		OS rather than implemented by a plug in DLL, the function just returns 
       
   575 		the name of the character set. The NameIsFileName() function can be 
       
   576 		used to determine whether or not it is legal to create a TParsePtrC 
       
   577 		object over the descriptor 	returned by Name().
       
   578 		
       
   579 		Notes:
       
   580 		
       
   581 		The name returned cannot be treated as an Internet-standard name, it 
       
   582 		is locale-independent and should be mapped to the locale-dependent name 
       
   583 		by software at a higher level before being shown to the user. Conversion 
       
   584 		from Internet-standard names of character sets to the UID identifiers 
       
   585 		is provided by the member function 
       
   586 		ConvertStandardNameOfCharacterSetToIdentifierL().
       
   587 		
       
   588 		Typically, to find the user-displayable name (as opposed to the 
       
   589 		internet-standard name) of a character set, you would do something 
       
   590 		like this:
       
   591 		
       
   592 		@code
       
   593 		const CCnvCharacterSetConverter::SCharacterSet& characterSet=...;
       
   594 		const TPtrC userDisplayable(characterSet.NameIsFileName()? TParsePtrC(characterSet.Name()).Name(): 
       
   595 		characterSet.Name()); 
       
   596 		@endcode
       
   597 
       
   598 		@return Full path and filename of the character set converter plug in 
       
   599 		DLL, or just the name of the character set. */
       
   600 		inline TPtrC Name() const {return *iName;}
       
   601 	private:
       
   602 		enum
       
   603 			{
       
   604 			EFlagNameIsFileName					=0x00000001,
       
   605 			EFlagFileIsConversionPlugInLibrary	=0x00000002
       
   606 			};
       
   607 	private:
       
   608 		inline TBool FileIsConversionPlugInLibrary() const {return iFlags&EFlagFileIsConversionPlugInLibrary;}
       
   609 	private:
       
   610 		TUint iIdentifier;
       
   611 		TUint iFlags;
       
   612 		HBufC* iName;
       
   613 	private:
       
   614 		friend class CCnvCharacterSetConverter;
       
   615 		friend class CDeepDestructingArrayOfCharactersSets;
       
   616 		}; //SCharacterSet
       
   617 	
       
   618 
       
   619 	/** 
       
   620 	Holds an ascending array of the indices of the characters in the 
       
   621 	source Unicode text which could not be converted by 
       
   622 	CCnvCharacterSetConverter::ConvertFromUnicode() into the foreign 
       
   623 	character set 
       
   624 	@publishedAll
       
   625 	@released
       
   626 	*/
       
   627 	class TArrayOfAscendingIndices
       
   628 		{
       
   629 	public:
       
   630 		/** The return value of CCnvCharacterSetConverter::AppendIndex(). */
       
   631 		enum TAppendResult
       
   632 			{
       
   633 			/** The append failed. */
       
   634 			EAppendFailed,
       
   635 			/** The append succeeded. */
       
   636 			EAppendSuccessful
       
   637 			};
       
   638 	public:
       
   639 		/** C++ constructor. The array is initialised to be of length zero. */
       
   640 		inline TArrayOfAscendingIndices() :iArrayOfIndices(0) {}
       
   641 	
       
   642 		IMPORT_C TAppendResult AppendIndex(TInt aIndex);
       
   643 		
       
   644 		/** Deletes a single index from the array.
       
   645 		
       
   646 		@param aIndexOfIndex The index of the index to delete. Must not be 
       
   647 		negative and must not be greater than the length of the array, or a 
       
   648 		panic occurs. */
       
   649 		inline void Remove(TInt aIndexOfIndex) {iArrayOfIndices.Delete(aIndexOfIndex, 1);}
       
   650 		
       
   651 		/** Deletes all indices from the array. */
       
   652 		inline void RemoveAll() {iArrayOfIndices.SetLength(0);}
       
   653 
       
   654 		/** Returns the number of indices in the array.
       
   655 	
       
   656 		@return The number of indices in the array. */
       
   657 		inline TInt NumberOfIndices() const {return iArrayOfIndices.Length();}
       
   658 
       
   659 		/** Gets the value of the specified index.
       
   660 	
       
   661 		@param aIndexOfIndex Index into the array.
       
   662 		@return The value of the index. */
       
   663 		inline TInt operator[](TInt aIndexOfIndex) const {return iArrayOfIndices[aIndexOfIndex];}
       
   664 	private:
       
   665 		enum {KMaximumNumberOfIndices=25};
       
   666 	private:
       
   667 		TBuf16<KMaximumNumberOfIndices> iArrayOfIndices;
       
   668 		};
       
   669 public:
       
   670 	IMPORT_C static CCnvCharacterSetConverter* NewL();
       
   671 	IMPORT_C static CCnvCharacterSetConverter* NewLC();
       
   672 	IMPORT_C virtual ~CCnvCharacterSetConverter();
       
   673 	IMPORT_C static CArrayFix<SCharacterSet>* CreateArrayOfCharacterSetsAvailableL(RFs& aFileServerSession);
       
   674 	IMPORT_C static CArrayFix<SCharacterSet>* CreateArrayOfCharacterSetsAvailableLC(RFs& aFileServerSession);
       
   675 	IMPORT_C TUint ConvertStandardNameOfCharacterSetToIdentifierL(const TDesC8& aStandardNameOfCharacterSet, RFs& aFileServerSession);
       
   676 	IMPORT_C HBufC8* ConvertCharacterSetIdentifierToStandardNameL(TUint aCharacterSetIdentifier, RFs& aFileServerSession);
       
   677 	IMPORT_C TUint ConvertMibEnumOfCharacterSetToIdentifierL(TInt aMibEnumOfCharacterSet, RFs& aFileServerSession);
       
   678 	IMPORT_C TInt ConvertCharacterSetIdentifierToMibEnumL(TUint aCharacterSetIdentifier, RFs& aFileServerSession);
       
   679 	IMPORT_C void PrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, RFs& aFileServerSession);
       
   680 	IMPORT_C TAvailability PrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, RFs& aFileServerSession);
       
   681 	// the following attribute-setting functions should be called (if at all) after calling PrepareToConvertToOrFromL and before calling ConvertFromUnicode and/or ConvertToUnicode
       
   682 	IMPORT_C void SetDefaultEndiannessOfForeignCharacters(TEndianness aEndianness);
       
   683 	IMPORT_C void SetDowngradeForExoticLineTerminatingCharacters(TDowngradeForExoticLineTerminatingCharacters aDowngradeForExoticLineTerminatingCharacters); // by default this attribute is set to EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed
       
   684 	IMPORT_C void SetReplacementForUnconvertibleUnicodeCharactersL(const TDesC8& aReplacementForUnconvertibleUnicodeCharacters); // must be a single character preceded by its escape sequence (if any), and must be little-endian if the endianness of the character-set is unspecified, otherwise in the same endianness as the character-set
       
   685 	
       
   686 	// the conversion functions return either one of the TError values above, or the number of unconverted elements left at the end of the input descriptor
       
   687 	IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode) const;
       
   688 	IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TInt& aNumberOfUnconvertibleCharacters) const;
       
   689 	IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstUnconvertibleCharacter) const;
       
   690 	IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) const;
       
   691 	IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState) const;
       
   692 	IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters) const;
       
   693 	IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) const;
       
   694 	IMPORT_C static void AutoDetectCharacterSetL(TInt& aConfidenceLevel, TUint& aCharacterSetIdentifier, const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample);
       
   695 	IMPORT_C void AutoDetectCharSetL(TInt& aConfidenceLevel, TUint& aCharacterSetIdentifier, const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample);
       
   696 	IMPORT_C static void ConvertibleToCharacterSetL(TInt& aConfidenceLevel, const TUint aCharacterSetIdentifier,const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample);
       
   697     IMPORT_C void ConvertibleToCharSetL(TInt& aConfidenceLevel, const TUint aCharacterSetIdentifier,const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample);
       
   698 	IMPORT_C void SetMaxCacheSize(TInt aSize);
       
   699 	// the following functions are only to be called by conversion plug-in libraries
       
   700 	IMPORT_C static TInt DoConvertFromUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
       
   701 	IMPORT_C static TInt DoConvertFromUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TUint& aOutputConversionFlags, TUint aInputConversionFlags);
       
   702 	IMPORT_C static TInt DoConvertToUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
       
   703 	IMPORT_C static TInt DoConvertToUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, TUint& aOutputConversionFlags, TUint aInputConversionFlags);
       
   704 	IMPORT_C static const SCnvConversionData& AsciiConversionData();
       
   705 	inline TDowngradeForExoticLineTerminatingCharacters GetDowngradeForExoticLineTerminatingCharacters () 
       
   706 		{
       
   707 		return iDowngradeForExoticLineTerminatingCharacters ;
       
   708 		} ; 
       
   709 
       
   710 private:
       
   711 	enum
       
   712 		{
       
   713 		EStoredFlagOwnsConversionData				=0x00000001,
       
   714 		EStoredFlagConversionPlugInLibraryIsLoaded	=0x00000002
       
   715 		};
       
   716 	enum TCharacterSetSearch
       
   717 		{
       
   718 		EStopCharacterSetSearch,
       
   719 		EContinueCharacterSetSearch
       
   720 		};
       
   721 	enum TConversionPlugInFunctionOrdinals
       
   722 		{
       
   723 		EReplacementForUnconvertibleUnicodeCharacters=1,
       
   724 		EConvertFromUnicode=2,
       
   725 		EConvertToUnicode=3,
       
   726 		EIsInThisCharacterSet=4
       
   727 		};
       
   728 		
       
   729 private:
       
   730 	CCnvCharacterSetConverter();
       
   731 	void ConstructL();
       
   732 	static CArrayFix<SCharacterSet>* DoCreateArrayOfCharacterSetsAvailableLC(RFs& aFileServerSession, TUint aIdentifierOfOnlyCharacterSetOfInterest);
       
   733 	static TCharacterSetSearch AppendHardCodedCharacterSetIfRequiredL(CArrayFix<SCharacterSet>& aArrayOfCharacterSets, TUint aIdentifierOfOnlyCharacterSetOfInterest, TUint aIdentifierOfHardCodedCharacterSet, const TDesC& aNameOfHardCodedCharacterSet);
       
   734 	void ScanForStandardNamesAndMibEnumsL(RFs& aFileServerSession);
       
   735 	void ScanForStandardNamesAndMibEnumsROMOnlyL(RFs& aFileServerSession);
       
   736 	TAvailability DoPrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, const CArrayFix<SCharacterSet>* aArrayOfCharacterSetsAvailable, RFs& aFileServerSession);
       
   737 	static void DeleteConversionData(const SCnvConversionData* aConversionData);
       
   738 	static void DeleteConversionData(TAny* aConversionData);
       
   739 	static TEndianness EndiannessOfForeignCharacters(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters);
       
   740 
       
   741 private:
       
   742 	TUint iStoredFlags;
       
   743 	TUint iCharacterSetIdentifierOfLoadedConversionData; // 0 or a UID of the loaded plugin
       
   744 	const SCnvConversionData* iConversionData;
       
   745 	TEndianness iDefaultEndiannessOfForeignCharacters;
       
   746 	TDowngradeForExoticLineTerminatingCharacters iDowngradeForExoticLineTerminatingCharacters;
       
   747 	TBuf8<KMaximumLengthOfReplacementForUnconvertibleUnicodeCharacters> iReplacementForUnconvertibleUnicodeCharacters;
       
   748 	CStandardNamesAndMibEnums* iStandardNamesAndMibEnums;
       
   749 	TBool iTlsDataConstructed;
       
   750 	CCharsetCnvCache* iCharsetCnvCache;
       
   751 	TBool iIsSystemStandardNamesAndMibEnumsScanned;
       
   752 	};
       
   753 
       
   754 #endif
       
   755