charconvfw/charconv_fw/inc/convutils.h
changeset 0 1fb32624e06b
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 2000-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 #if !defined(__CONVUTILS_H__)
       
    20 #define __CONVUTILS_H__
       
    21 
       
    22 #if !defined(__E32STD_H__)
       
    23 #include <e32std.h>
       
    24 #endif
       
    25 
       
    26 #if !defined(__E32BASE_H__)
       
    27 #include <e32base.h>
       
    28 #endif
       
    29 
       
    30 #if !defined(__CHARCONV_H__)
       
    31 #include <charconv.h>
       
    32 #endif
       
    33 
       
    34 struct SCnvConversionData;
       
    35 
       
    36  
       
    37 class CnvUtilities
       
    38 /** 
       
    39 Provides static character conversion utilities for complex encodings. Its functions 
       
    40 may be called from a plug-in DLL's implementation of ConvertFromUnicode() 
       
    41 and ConvertToUnicode().
       
    42 
       
    43 These utility functions are provided for use when converting to/from complex 
       
    44 character set encodings, including modal encodings. Modal encodings are those 
       
    45 where the interpretation of a given byte of data is dependent on the current 
       
    46 mode; mode changing is performed by escape sequences which occur in the byte 
       
    47 stream. A non-modal complex encoding is one in which characters are encoded 
       
    48 using variable numbers of bytes. The number of bytes used to encode a character 
       
    49 depends on the value of the initial byte.
       
    50 @publishedAll 
       
    51 @released
       
    52 */
       
    53 	{
       
    54 public:
       
    55 	// type definitions for converting from Unicode
       
    56 	
       
    57 	/**  A pointer to a function which "mangles" text when converting from
       
    58 	Unicode into a complex modal or non-modal foreign character set
       
    59 	encoding.
       
    60 
       
    61 	It might insert a shifting character, escape sequence, or other
       
    62 	special characters.If the target character set encoding is modal, the
       
    63 	implementation of this function may call the
       
    64 	CnvUtilities::ConvertFromIntermediateBufferInPlace()
       
    65 	utility function which is provided because many modal character sets
       
    66 	require an identical implementation of this function.
       
    67 
       
    68 	" convutils.lib " */
       
    69     typedef void (*FConvertFromIntermediateBufferInPlace)(TInt aStartPositionInDescriptor, TDes8& aDescriptor, TInt& aNumberOfCharactersThatDroppedOut);
       
    70 	struct SCharacterSet
       
    71 	/** Stores information about a non-Unicode character set. The information 
       
    72 	is used to locate the conversion information required by 
       
    73 	ConvertFromUnicode() and ConvertToUnicode().
       
    74 
       
    75 	An array of these structs  that contains all available character sets  
       
    76 	can be generated by CreateArrayOfCharacterSetsAvailableLC() and 
       
    77 	CreateArrayOfCharacterSetsAvailableL(), and is used by one of the 
       
    78 	overloads of PrepareToConvertToOrFromL(). */
       
    79 		{
       
    80 		/** The conversion data. */
       
    81 		const SCnvConversionData* iConversionData; // must *not* be set to NULL
       
    82 		/** A pointer to a function which "mangles" the text in a way 
       
    83 		appropriate to the target complex character set. For instance it 
       
    84 		might insert a shifting character, escape sequence, or other special 
       
    85 		characters. */
       
    86 		FConvertFromIntermediateBufferInPlace iConvertFromIntermediateBufferInPlace; // must *not* be set to NULL
       
    87 		/** The escape sequence which introduces the character set, i.e. it 
       
    88 		identifies this character set as the next one to use. Must not be NULL.
       
    89 		If the character set is non-modal, this should be set to an empty 
       
    90 		descriptor. */
       
    91 		const TDesC8* iEscapeSequence; // must *not* be set to NULL
       
    92 		};
       
    93 	// type definitions for converting to Unicode
       
    94 
       
    95 	/** A pointer to a function which calculates the number of consecutive
       
    96 	bytes in the remainder of the foreign descriptor which can be
       
    97 	converted using the current character set's conversion data.
       
    98 
       
    99 	Called when converting from a non-modal complex character set encoding
       
   100 	into Unicode. It may return a negative
       
   101 	CCnvCharacterSetConverter::TError value to indicate an
       
   102 	error in the encoding.
       
   103 
       
   104 	" convutils.lib " */
       
   105 	typedef TInt (*FNumberOfBytesAbleToConvert)(const TDesC8& aDescriptor); // may return negative CCnvCharacterSetConverter::TError values
       
   106 	
       
   107 	/** A pointer to a function which prepares the text for conversion into
       
   108 	Unicode.
       
   109 
       
   110 	For instance it might remove any shifting or other special characters.
       
   111 	Called when converting from a non-modal complex character set encoding
       
   112 	into Unicode.
       
   113 
       
   114 	" convutils.lib " */
       
   115  	typedef void (*FConvertToIntermediateBufferInPlace)(TDes8& aDescriptor);
       
   116 
       
   117 	struct SState
       
   118 	/** Character conversion data for one of the character sets which is 
       
   119 	specified in a modal character set encoding. An array of these structs 
       
   120 	is used when converting from a modal character set into Unicode, using 
       
   121 	CnvUtilities::ConvertToUnicodeFromModalForeign(). Neither of the members 
       
   122 	may be NULL. */
       
   123 		{
       
   124 		/** The escape sequence which introduces the character set, i.e. it 
       
   125 		identifies this character set as the next one to use. This must begin 
       
   126 		with KControlCharacterEscape. */
       
   127 		const TDesC8* iEscapeSequence; // must *not* be set to NULL and must begin with 0x1b
       
   128 		/** The conversion data. */
       
   129 		const SCnvConversionData* iConversionData; // must *not* be set to NULL
       
   130 		};
       
   131 	struct SMethod
       
   132 		{
       
   133 		/** A pointer to a function which calculates the number of consecutive 
       
   134 		bytes in the remainder of the foreign descriptor which can be converted 
       
   135 		using the current character set's conversion data. It may return a 
       
   136 		negative CCnvCharacterSetConverter::TError value to indicate an error 
       
   137 		in the encoding. */
       
   138 		FNumberOfBytesAbleToConvert iNumberOfBytesAbleToConvert; // must *not* be set to NULL
       
   139 		/** A pointer to a function which prepares the text for conversion 
       
   140 		into Unicode. For instance it might remove any shifting or other 
       
   141 		special characters. */
       
   142 		FConvertToIntermediateBufferInPlace iConvertToIntermediateBufferInPlace; // must *not* be set to NULL
       
   143 		/** The conversion data. */
       
   144 		const SCnvConversionData* iConversionData; // must *not* be set to NULL
       
   145 		/** The number of bytes per character. */
       
   146 		TInt16 iNumberOfBytesPerCharacter;
       
   147 		/** The number of core bytes per character. */
       
   148 		TInt16 iNumberOfCoreBytesPerCharacter;
       
   149 		};
       
   150 public:
       
   151 	// these functions may *not* have CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter set in aInputConversionFlags
       
   152 	IMPORT_C static TInt ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, const TArray<SCharacterSet>& aArrayOfCharacterSets);
       
   153 	IMPORT_C static TInt ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, const TArray<SCharacterSet>& aArrayOfCharacterSets, TUint& aOutputConversionFlags, TUint aInputConversionFlags);
       
   154 	IMPORT_C static void ConvertFromIntermediateBufferInPlace(TInt aStartPositionInDescriptor, TDes8& aDescriptor, TInt& aNumberOfCharactersThatDroppedOut, const TDesC8& aEscapeSequence, TInt aNumberOfBytesPerCharacter);
       
   155 	IMPORT_C static TInt ConvertToUnicodeFromModalForeign(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, const TArray<SState>& aArrayOfStates); // the first element of aArrayOfStates is taken to be the default state
       
   156 	IMPORT_C static TInt ConvertToUnicodeFromModalForeign(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, const TArray<SState>& aArrayOfStates, TUint& aOutputConversionFlags, TUint aInputConversionFlags); // the first element of aArrayOfStates is taken to be the default state
       
   157 	IMPORT_C static TInt ConvertToUnicodeFromHeterogeneousForeign(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, const TArray<SMethod>& aArrayOfMethods);
       
   158 	IMPORT_C static TInt ConvertToUnicodeFromHeterogeneousForeign(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, const TArray<SMethod>& aArrayOfMethods, TUint& aOutputConversionFlags, TUint aInputConversionFlags);
       
   159 private:
       
   160 	static void CheckArrayOfCharacterSets(const TArray<SCharacterSet>& aArrayOfCharacterSets);
       
   161 	static void CheckArrayOfStates(const TArray<SState>& aArrayOfStates);
       
   162 	static void CheckArrayOfMethods(const TArray<SMethod>& aArrayOfMethods);
       
   163 	static TInt LengthOfUnicodeCharacter(const TDesC16& aUnicode, TInt aIndex);
       
   164 	static TBool NextHomogeneousForeignRun(const SCnvConversionData*& aConversionData, TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TArray<SState>& aArrayOfStates, TUint& aOutputConversionFlags);
       
   165 	static TBool MatchesEscapeSequence(TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TDesC8& aEscapeSequence);
       
   166 	static TBool IsStartOf(const TDesC8& aStart, const TDesC8& aPotentiallyLongerDescriptor);
       
   167 	inline static TInt ReduceToNearestMultipleOf(TInt aNumber1, TInt aNumber2) {return (aNumber1/aNumber2)*aNumber2;}
       
   168 	};
       
   169 
       
   170 #endif
       
   171