charconvfw/charconvplugins/src/plugins/ucs2.cpp
changeset 0 1fb32624e06b
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 #include <e32std.h>
       
    20 #include <charconv.h>
       
    21 #include <convgeneratedcpp.h>
       
    22 #include "ucs2.h"
       
    23 #include <ecom/implementationproxy.h>
       
    24 #include <charactersetconverter.h>
       
    25 
       
    26 class CUCS2ConverterImpl : public CCharacterSetConverterPluginInterface
       
    27 	{
       
    28 
       
    29 public:
       
    30 	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
       
    31 
       
    32 	virtual TInt ConvertFromUnicode(
       
    33 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    34 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
    35 		TDes8& aForeign, 
       
    36 		const TDesC16& aUnicode, 
       
    37 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
       
    38 
       
    39 	virtual TInt ConvertToUnicode(
       
    40 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    41 		TDes16& aUnicode, 
       
    42 		const TDesC8& aForeign, 
       
    43 		TInt& aState, 
       
    44 		TInt& aNumberOfUnconvertibleCharacters, 
       
    45 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
       
    46 
       
    47 	virtual TBool IsInThisCharacterSetL(
       
    48 		TBool& aSetToTrue, 
       
    49 		TInt& aConfidenceLevel, 
       
    50 		const TDesC8& aSample);
       
    51 
       
    52 	static CUCS2ConverterImpl* NewL();
       
    53 	virtual ~CUCS2ConverterImpl();
       
    54 
       
    55 private:
       
    56 	CUCS2ConverterImpl();
       
    57 
       
    58 	};
       
    59 
       
    60 
       
    61 
       
    62 // The following code has been copied and modified from the plugin computer generated code
       
    63 // that is generated from the charconv/data/???.cpl && /???.txt files
       
    64 // *** code begins ***
       
    65 #define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
       
    66 
       
    67 #pragma warning (disable: 4049) // compiler limit : terminating line number emission
       
    68 
       
    69 _LIT8(KLit8ReplacementForUnconvertibleUnicodeCharacters, "\xff\xfd");
       
    70 
       
    71 GLDEF_C const TDesC8& ReplacementForUnconvertibleUnicodeCharacters_internal()
       
    72 	{
       
    73 	return KLit8ReplacementForUnconvertibleUnicodeCharacters;
       
    74 	}
       
    75 
       
    76 GLDEF_D const SCnvConversionData conversionData=
       
    77 	{
       
    78 	SCnvConversionData::EUnspecified,
       
    79 	{NULL,NULL},
       
    80 	{NULL,NULL},
       
    81 	{NULL,NULL},
       
    82 	NULL,
       
    83 	NULL
       
    84 	};
       
    85 
       
    86 
       
    87 const TInt    KByteOrderMark = 0xfeff;
       
    88 const TInt 	  KStateOffset = 0x1000;
       
    89 
       
    90 
       
    91 const TDesC8& CUCS2ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
       
    92 	{
       
    93 	return ReplacementForUnconvertibleUnicodeCharacters_internal();
       
    94 	}
       
    95 
       
    96 
       
    97 /**
       
    98  * Takes a 16 bit UCS2 descriptor with or without BOM and translates it to an
       
    99  * eight bit descriptor in Big Endian format.
       
   100  *
       
   101  * Note aDefaultEndiannessOfForeignCharacters is not used by this converter
       
   102  *
       
   103  * @param CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters
       
   104  * @param const TDesC8&
       
   105  * @param TDes8& aForeign
       
   106  * @param const TDesC16& aUnicode
       
   107  * @param CCnvCharacterSetConverter::TArrayOfAscendingIndices&
       
   108  *
       
   109  * returns number of converted characters
       
   110 */
       
   111 TInt CUCS2ConverterImpl::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /*aDefaultEndiannessOfForeignCharacters*/, 
       
   112 								 const TDesC8& /*aReplacementForUnconvertibleUnicodeCharacters*/, 
       
   113 								 TDes8& aForeign, 
       
   114 								 const TDesC16& aUnicode, 
       
   115 								 CCnvCharacterSetConverter::TArrayOfAscendingIndices& /*aIndicesOfUnconvertibleCharacters*/)
       
   116 	{
       
   117 	TInt numberOfUnicodeCharacters =0;
       
   118 	TInt nextChar;
       
   119 	
       
   120 	// start at the begining of the output buffer
       
   121 	aForeign.Zero();
       
   122 	
       
   123 	// while there is unicode data to convert and space in the output buffer
       
   124 	while ( (aForeign.Length() + 1 < aForeign.MaxLength()) && (numberOfUnicodeCharacters < aUnicode.Length()) )
       
   125 		{
       
   126 		nextChar = aUnicode[numberOfUnicodeCharacters];
       
   127 
       
   128 		// Note - this always converts to EBigEndian 
       
   129 		aForeign.Append((nextChar & 0xff00) >> 8);
       
   130 		aForeign.Append(nextChar & 0xff );
       
   131 			
       
   132 		numberOfUnicodeCharacters++;
       
   133 		}
       
   134 		
       
   135 	// returns the number of unconverted characters left at the end of the input descriptor  
       
   136 	return aUnicode.Length() - numberOfUnicodeCharacters;
       
   137 	}
       
   138 
       
   139 /**
       
   140  * Takes an 8 bit descriptor with or without a BOM and translates it to unicode 
       
   141  * Input endiness is determined by Byte Order Markers (BOM) in the source text.
       
   142  * If no BOM is present aDefaultEndiannessOfForeignCharacters is used.
       
   143  *
       
   144  * When the data is too large to fit in the output buffer, the endiness is saved in the state
       
   145  * variable between conversions
       
   146  *
       
   147  * @param aDefaultEndiannessOfForeignCharacters Default endiness if no BOMs present in the source
       
   148  * @param aUnicode Contains the converted text in the Unicode character set
       
   149  * @param aForeign The non-Unicode source text to be converted
       
   150  * @param aState Not used by this converter
       
   151  * @param aNumberOfUnconvertibleCharacters Contains the number of bytes which could not be converted to unicode
       
   152  * @param aIndexOfFirstByteOfFirstUnconvertibleCharacter The index of the first unconvertable byte or -1 if all converted.
       
   153  *
       
   154  * @return aNumberOfUnconvertibleCharacters The number of unconverted bytes left at the end of the input 
       
   155  * descriptor (e.g. because the output descriptor is not long enough to hold all the text), or one of the 
       
   156  * error values defined in TError. 
       
   157  * @internalTechnology 
       
   158  */
       
   159 TInt CUCS2ConverterImpl::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   160 						   TDes16& aUnicode,	
       
   161 						   const TDesC8& aForeign,
       
   162 						   TInt& aState,
       
   163 						   TInt& aNumberOfUnconvertibleCharacters, 
       
   164 						   TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) 
       
   165 	{
       
   166 	TInt numberOfBytesConverted = 0;
       
   167 	TInt numberOfUnicodeCharacters =0;
       
   168 	TChar nextChar;
       
   169 	
       
   170 	// work out what byte order to use
       
   171 	CCnvCharacterSetConverter::TEndianness byteOrderMark;
       
   172 	if ( aState==CCnvCharacterSetConverter::KStateDefault )
       
   173 		{
       
   174 		// this is the first call so use the default or BOM for byte order
       
   175 		byteOrderMark = aDefaultEndiannessOfForeignCharacters;
       
   176 		}
       
   177 	else
       
   178 		{
       
   179 		// this is not the first call so use the saved byte order
       
   180 		byteOrderMark = STATIC_CAST( CCnvCharacterSetConverter::TEndianness, aState - KStateOffset );
       
   181 		}
       
   182 		
       
   183 	if ( aForeign.Length() < 2)
       
   184 		{ // too small to do anything with		
       
   185 		return -1;
       
   186 		}
       
   187 	// If the state is KStateDefault (this is the first call) check for BOM markers
       
   188 	else if (aState==CCnvCharacterSetConverter::KStateDefault)
       
   189 		{
       
   190 		// is there a Little Endian BOM
       
   191 		if (aForeign[0]==0xff && aForeign[1]==0xfe )
       
   192 			{ 
       
   193 			byteOrderMark = CCnvCharacterSetConverter::ELittleEndian; 
       
   194 			}
       
   195 		else if (aForeign[0]==0xfe && aForeign[1]==0xff )
       
   196 			{
       
   197 			byteOrderMark = CCnvCharacterSetConverter::EBigEndian; 
       
   198 			}
       
   199 		// remember the detected state
       
   200 		aState = byteOrderMark + KStateOffset;
       
   201 		}
       
   202 
       
   203 	// start at begining of the output buffer provided
       
   204 	aUnicode.Zero();
       
   205 	
       
   206 	// while there is at least 2 bytes of data to convert and space in the output buffer
       
   207 	while ( (numberOfBytesConverted+1 < aForeign.Size()) && (numberOfUnicodeCharacters < aUnicode.MaxLength()) )
       
   208 		{
       
   209 		if (byteOrderMark == CCnvCharacterSetConverter::ELittleEndian )
       
   210 			{
       
   211 			// ELittleEndian 0x??00
       
   212 			nextChar = aForeign[numberOfBytesConverted] + ( aForeign[numberOfBytesConverted+1] << 8);
       
   213 			}
       
   214 		else
       
   215 			{
       
   216 			// EBigEndian 0x00??
       
   217 			nextChar = ( aForeign[numberOfBytesConverted] <<8 ) + aForeign[numberOfBytesConverted+1];
       
   218 			}
       
   219 			
       
   220 		// save the unicode character extracted	unless it's a BOM
       
   221 		if ( nextChar != KByteOrderMark )
       
   222 			{
       
   223 			aUnicode.Append( nextChar );
       
   224 			numberOfUnicodeCharacters++;	
       
   225 			}
       
   226 			
       
   227 		numberOfBytesConverted+=2;
       
   228 		}
       
   229 	
       
   230 	// there are no uncovertable characters with UCS2,
       
   231 	aNumberOfUnconvertibleCharacters = 0;
       
   232 	// a negative value indicates that all characters converted
       
   233 	aIndexOfFirstByteOfFirstUnconvertibleCharacter = -1;
       
   234 			
       
   235 	// returns the number of unconverted bytes left at the end of the input descriptor 
       
   236 	// Note there could be 1 byte left over if an odd number of bytes provided for conversion
       
   237 	return aForeign.Size() - numberOfBytesConverted;
       
   238 	}
       
   239 
       
   240 
       
   241 /**
       
   242  * This converter does not support autodetect so always returns a confidence value of 0.
       
   243  * @internalTechnology 
       
   244  */
       
   245 TBool CUCS2ConverterImpl::IsInThisCharacterSetL(TBool& aSetToTrue, TInt& aConfidenceLevel, const TDesC8&)
       
   246 	{
       
   247 	aSetToTrue=ETrue;
       
   248 	aConfidenceLevel=0;
       
   249 	return EFalse;
       
   250 	}
       
   251 
       
   252 CUCS2ConverterImpl* CUCS2ConverterImpl::NewL()
       
   253 	{
       
   254 	CUCS2ConverterImpl* self = new(ELeave) CUCS2ConverterImpl();
       
   255 	return self;
       
   256 	}
       
   257 
       
   258 CUCS2ConverterImpl::~CUCS2ConverterImpl()
       
   259 	{
       
   260 	}
       
   261 
       
   262 CUCS2ConverterImpl::CUCS2ConverterImpl()
       
   263 	{
       
   264 	}
       
   265 
       
   266 const TImplementationProxy ImplementationTable[] = 
       
   267 	{
       
   268 		IMPLEMENTATION_PROXY_ENTRY(0x101FF492,	CUCS2ConverterImpl::NewL)
       
   269 	};
       
   270 
       
   271 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
       
   272 	{
       
   273 	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
       
   274 
       
   275 	return ImplementationTable;
       
   276 	}
       
   277 
       
   278