xml/xmlfw/src/xmlframework/charsetconverter.cpp
changeset 0 e35f40988205
equal deleted inserted replaced
-1:000000000000 0:e35f40988205
       
     1 // Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 //
       
    15 
       
    16 #include <e32std.h>
       
    17 #include <utf.h>
       
    18 #include <charconv.h>
       
    19 
       
    20 #include <xml/plugins/charsetconverter.h>
       
    21 #include <xml/xmlframeworkerrors.h>
       
    22 
       
    23 using namespace Xml;
       
    24 
       
    25 /**
       
    26 The maximum number of bytes used for conversion at any time.
       
    27 This is also used to size the necessary buffers used in the conversions.
       
    28 
       
    29 @internalTechnology
       
    30 */
       
    31 const TInt KMaxReadableBytes = 512;
       
    32 
       
    33 
       
    34 
       
    35 LOCAL_C void DestroyHBufC16(TAny* aHBufC)
       
    36 /**
       
    37 This method is used when pointer reallocation is needed and the pointer needs to be 
       
    38 cleaned via the cleanup stack.
       
    39 
       
    40 @param				aHBufC the wide buffer.
       
    41 @internalTechnology
       
    42 
       
    43 */
       
    44 	{
       
    45 	delete *static_cast<HBufC**>(aHBufC);
       
    46 	}
       
    47 
       
    48 
       
    49 
       
    50 LOCAL_C void DestroyHBufC8(TAny* aHBufC)
       
    51 /**
       
    52 This method is used when pointer reallocation is needed and the pointer needs to be 
       
    53 cleaned via the cleanup stack.
       
    54 
       
    55 @param				aHBufC the narrow buffer.
       
    56 @internalTechnology
       
    57 
       
    58 */
       
    59 	{
       
    60 	delete *static_cast<HBufC8**>(aHBufC);
       
    61 	}
       
    62 
       
    63 
       
    64 
       
    65 CCharSetConverter::CCharSetConverter()
       
    66 /**
       
    67 Default Constructor
       
    68 
       
    69 */
       
    70 	{
       
    71 	// do nothing;
       
    72 	}
       
    73 
       
    74 
       
    75 
       
    76 /**
       
    77 This method creates an instance of this class.
       
    78 The framework is responsible for creating this object.
       
    79 
       
    80 @leave ... One of the system wide error codes e.g. KErrNoMemory
       
    81 @return	The new'ed object.
       
    82 @internalTechnology
       
    83 */
       
    84 CCharSetConverter* CCharSetConverter::NewL()
       
    85 	{
       
    86 	CCharSetConverter* self = new(ELeave) CCharSetConverter();
       
    87 	CleanupStack::PushL(self);
       
    88 	self->ConstructL();
       
    89 	CleanupStack::Pop(self);
       
    90 	return(self);
       
    91 	}
       
    92 
       
    93 
       
    94 
       
    95 void CCharSetConverter::ConstructL()
       
    96 /**
       
    97 This method provides some construction of this object.
       
    98 
       
    99 */
       
   100 	{
       
   101 	iCnvCharacterSetConverter = CCnvCharacterSetConverter::NewL();
       
   102 	User::LeaveIfError(iFs.Connect());
       
   103 	iConversionBuffer = User::Heap().AllocL(KMaxReadableBytes);
       
   104 	iConversionBufferSize = KMaxReadableBytes;
       
   105 	}
       
   106 
       
   107 
       
   108 
       
   109 CCharSetConverter::~CCharSetConverter()
       
   110 /**
       
   111 Destructor.
       
   112 The framework is responsible for destroying this object.
       
   113 
       
   114 @post				This object is properly destroyed.
       
   115 
       
   116 */
       
   117 	{
       
   118 	iFs.Close();
       
   119 	delete iCnvCharacterSetConverter;
       
   120 	delete iConversionBuffer;
       
   121 	}
       
   122 
       
   123 
       
   124 
       
   125 EXPORT_C void CCharSetConverter::PrepareCharConvL(TUint& aCharSetUid, const TDesC8& aEncoding)
       
   126 /**
       
   127 This method prepares CharConv to encode from the standard name.
       
   128 
       
   129 @post				CharConv has been prepared.
       
   130 
       
   131 @leave				KErrXmlUnsupportedCharacterSet - Charset not supported.
       
   132 @leave				KErrXmlUnavailableCharacterSet - Charset not available
       
   133 
       
   134 @param				aCharSetUid On return, contains the character set identifier 
       
   135 					of the encoding.
       
   136 @param				aEncoding the encoding to prepare for.
       
   137 */
       
   138 	{
       
   139 	// Get the charset uid
       
   140 	if ((aCharSetUid = 
       
   141 		iCnvCharacterSetConverter->ConvertStandardNameOfCharacterSetToIdentifierL(aEncoding, iFs)) == 0)
       
   142 		{
       
   143 		User::Leave(KErrXmlUnsupportedCharacterSet);
       
   144 		}
       
   145 
       
   146 
       
   147 	// Prepare charconv to use this charset
       
   148 	if (iCnvCharacterSetConverter->PrepareToConvertToOrFromL(aCharSetUid, iFs) == 
       
   149 			CCnvCharacterSetConverter::ENotAvailable )
       
   150 		{
       
   151 		User::Leave(KErrXmlUnavailableCharacterSet); // Unavailable
       
   152 		}
       
   153 	}
       
   154 
       
   155 
       
   156 
       
   157 EXPORT_C void CCharSetConverter::PrepareCharConvL(TUint& aCharSetUid, TInt aMibEnum)
       
   158 /**
       
   159 This method prepares CharConv to encode from the mib enum.
       
   160 
       
   161 @post				CharConv has been prepared.
       
   162 
       
   163 @leave				KErrXmlUnsupportedCharacterSet - Charset not supported.
       
   164 @leave				KErrXmlUnavailableCharacterSet - Charset not available
       
   165 
       
   166 @param				aCharSetUid On return, contains the character set identifier 
       
   167 					of the encoding.
       
   168 @param				aMibEnum The IANA specified mib enum for this encoding
       
   169 
       
   170 @see				http://www.iana.org/assignments/character-sets
       
   171 */
       
   172 	{
       
   173 	// Get the charset uid
       
   174 	if ((aCharSetUid = 
       
   175 		iCnvCharacterSetConverter->ConvertMibEnumOfCharacterSetToIdentifierL(aMibEnum, iFs)) == 0)
       
   176 		{
       
   177 		User::Leave(KErrXmlUnsupportedCharacterSet);  // May want to try something else?
       
   178 		}
       
   179 
       
   180 
       
   181 	// Prepare charconv to use this charset
       
   182 	if (iCnvCharacterSetConverter->PrepareToConvertToOrFromL(aCharSetUid, iFs) == 
       
   183 			CCnvCharacterSetConverter::ENotAvailable )
       
   184 		{
       
   185 		User::Leave(KErrXmlUnavailableCharacterSet); // Unavailable
       
   186 		}
       
   187 	}
       
   188 
       
   189 
       
   190 
       
   191 EXPORT_C TInt CCharSetConverter::ConvertToUnicodeL(TUint32 aSrcCharset, const TDesC8& aInputBuffer, 
       
   192 														 HBufC16*& aUnicodeConversion)
       
   193 /**
       
   194 This method converts the given bytes to unicode.
       
   195 If this function leaves, memory is cleaned up.
       
   196 This overload allocates memory for the output itself.
       
   197 
       
   198 @return				KErrNone if the conversion was succesfull
       
   199 					or one of the error values defined in TError.
       
   200 
       
   201 @leave				KErrXmlUnavailableCharacterSet - CharSet not available.
       
   202 
       
   203 @param				aSrcCharset The character set encoding to convert from.
       
   204 @param				aInputBuffer The characters to be converted.
       
   205 @param				aUnicodeConversion On return, contains the unicode conversion.
       
   206 */
       
   207 	{	
       
   208 	if (iCnvCharacterSetConverter->PrepareToConvertToOrFromL(aSrcCharset, iFs) == 
       
   209 			CCnvCharacterSetConverter::ENotAvailable)
       
   210 		{
       
   211 		User::Leave(KErrXmlUnavailableCharacterSet);
       
   212 		}
       
   213 
       
   214 	TInt maxLength = KMaxReadableBytes;	
       
   215 	aUnicodeConversion = HBufC16::NewL(maxLength);	
       
   216 	CleanupStack::PushL(TCleanupItem(DestroyHBufC16, &aUnicodeConversion));//push buffer's address
       
   217 
       
   218 
       
   219 	TInt state = CCnvCharacterSetConverter::KStateDefault;
       
   220 	TPtr16 remainingOutput(aUnicodeConversion->Des());
       
   221 	TInt unconverted = iCnvCharacterSetConverter->ConvertToUnicode(remainingOutput, aInputBuffer, state);	
       
   222 
       
   223 	// While there is still more data to convert
       
   224 	while (0 < unconverted)	
       
   225 		{
       
   226 		// Resize the buffer to hold more data
       
   227 		maxLength += KMaxReadableBytes;
       
   228 		aUnicodeConversion = aUnicodeConversion->ReAllocL(maxLength);
       
   229 
       
   230 		// Segment the writable area
       
   231 		TInt outputLength = aUnicodeConversion->Length();
       
   232 		TPtr16 remainingOutput1(&(aUnicodeConversion->Des())[0] + outputLength, 0, maxLength - outputLength);
       
   233 		remainingOutput.Set(remainingOutput1);
       
   234 
       
   235 		// Convert the data
       
   236 		unconverted = iCnvCharacterSetConverter->ConvertToUnicode(remainingOutput, aInputBuffer.Right(unconverted), state);
       
   237 		aUnicodeConversion->Des().SetLength(outputLength + remainingOutput.Length());
       
   238 		}
       
   239 
       
   240 	// Reallocate to a minimally-sized buffer	
       
   241 	if (unconverted == 0)
       
   242 		{
       
   243 		aUnicodeConversion = aUnicodeConversion->ReAllocL(aUnicodeConversion->Length());
       
   244 		}
       
   245 
       
   246 	CleanupStack::Pop(&aUnicodeConversion);//destroy the object pointed by the buffer wherever it is since we have got hold of the pointer (buffer)'s address
       
   247 	return unconverted;  // return error value if there is one.
       
   248 	}
       
   249 
       
   250 
       
   251 EXPORT_C TInt CCharSetConverter::ConvertToUnicodeL(TUint32 aSrcCharset, 
       
   252 												   const TDesC8& aInput,
       
   253 												   TPtr16& aOutput)
       
   254 /**
       
   255 This method converts the given bytes to unicode.
       
   256 If this function leaves, memory is cleaned up.
       
   257 This overload stores the conversion output in memory already allocated, for the sole use
       
   258 of the TPtr versions of overloaded ConvertToUnicodeL and ConvertFromUnicodeL functions. You must make sure you
       
   259 have finished with the output from a previous call to either (TPtr overload of) ConvertToUnicodeL
       
   260 or ConvertFromUnicodeL before calling either again, as the previous output will be overwritten with
       
   261 the new output.
       
   262 This version is more efficient than the HBufC alternative and so should be used whenever possible.
       
   263 
       
   264 @return				KErrNone if the conversion was succesfull
       
   265 					or one of the error values defined in TError.
       
   266 
       
   267 @leave				KErrXmlUnavailableCharacterSet - CharSet not available.
       
   268 
       
   269 @param				aSrcCharset The character set encoding to convert from.
       
   270 @param				aInput The characters to be converted.
       
   271 @param				aOutput On return, contains the unicode conversion.
       
   272 */
       
   273 	{	
       
   274 	if (iCnvCharacterSetConverter->PrepareToConvertToOrFromL(aSrcCharset, iFs) == 
       
   275 			CCnvCharacterSetConverter::ENotAvailable)
       
   276 		{
       
   277 		User::Leave(KErrXmlUnavailableCharacterSet);
       
   278 		}
       
   279 
       
   280 	TInt state = CCnvCharacterSetConverter::KStateDefault;
       
   281 
       
   282 	// Set up output descriptor reference: "Payload" is iConversionBuffer (a TAny *), it's initial
       
   283 	// length is zero (because it's empty) and it's initial maximum length is the maximum number of
       
   284 	// unicode characters which will fit into the current size of iConversion buffer 	
       
   285 	aOutput.Set((TUint16*)iConversionBuffer, 0, iConversionBufferSize/sizeof(TUint16));
       
   286 
       
   287 	// Convert the data, returning the amount of characters that are unconverted, due to the output buffer being full
       
   288 	TInt unconverted = iCnvCharacterSetConverter->ConvertToUnicode(aOutput, aInput, state);	
       
   289 
       
   290 	// While there is still more data to convert
       
   291 	while (0 < unconverted)	
       
   292 		{
       
   293 		TInt outputLength = aOutput.Length();
       
   294 
       
   295 		// Resize the buffer to hold more data
       
   296 		iConversionBufferSize += KMaxReadableBytes;
       
   297 		
       
   298 		iConversionBuffer = User::Heap().ReAllocL(iConversionBuffer,iConversionBufferSize);
       
   299 		if (iConversionBuffer == NULL) 	
       
   300 			User::Leave(KErrNoMemory);
       
   301 		
       
   302 		// Reconstruct the output descriptor to point to the new buffer, setting current
       
   303 		// length (the number of characters we've converted so far) and maximum length
       
   304 		// (the number of unicode characters which will fit into the newly extended 
       
   305 		// iConversionBuffer) appropriately. 
       
   306 		aOutput.Set((TUint16*)iConversionBuffer, outputLength, iConversionBufferSize/sizeof(TUint16));		
       
   307 
       
   308 		// Construct a modifiable pointer descriptor pointing to the the writable area of
       
   309 		// iConversionBuffer
       
   310 		TPtr16 remainingOutput(((TUint16*)iConversionBuffer)+outputLength, 0, aOutput.MaxLength() - outputLength);
       
   311 
       
   312 		// Try to convert another chunk of data
       
   313 		unconverted = iCnvCharacterSetConverter->ConvertToUnicode(remainingOutput, aInput.Right(unconverted), state);
       
   314 		
       
   315 		// Update the length of the output buffer to include the data we just converted.
       
   316 		aOutput.SetLength(remainingOutput.Length()+outputLength);
       
   317 		}
       
   318 		
       
   319 	return unconverted;  // return error value if there is one.
       
   320 	}
       
   321 
       
   322 
       
   323 			
       
   324 EXPORT_C TInt CCharSetConverter::ConvertFromUnicodeL(const TDesC16& aUnicodeConversion, 
       
   325 														   TUint32 aDestCharset, HBufC8*& aOutputBuffer)
       
   326 /**
       
   327 This method converts the given unicode to the specified encoding.
       
   328 If this function leaves, memory is cleaned up.
       
   329 This overload allocates memory for the output itself.
       
   330 
       
   331 @return				KErrNone if the conversion was succesfull
       
   332 					or one of the error values defined in TError.
       
   333 					
       
   334 @leave				KErrXmlUnavailableCharacterSet - Charset not available.
       
   335 
       
   336 @param				aUnicodeConversion The unicode to convert.
       
   337 @param				aDestCharset The character set encoding to convert to.
       
   338 @param				aOutputBuffer On return, contains the specified conversion.
       
   339 */
       
   340 	{
       
   341 	if (iCnvCharacterSetConverter->PrepareToConvertToOrFromL(aDestCharset, iFs) == 
       
   342 			CCnvCharacterSetConverter::ENotAvailable)
       
   343 		{
       
   344 		User::Leave(KErrXmlUnavailableCharacterSet);
       
   345 		}
       
   346 
       
   347 	TInt maxLength = KMaxReadableBytes;	
       
   348 	aOutputBuffer = HBufC8::NewL(maxLength);	
       
   349 	CleanupStack::PushL(TCleanupItem(DestroyHBufC8, &aOutputBuffer));//push buffer's address
       
   350 
       
   351 	TPtr8 remainingOutput(aOutputBuffer->Des());
       
   352 	TInt unconverted = iCnvCharacterSetConverter->ConvertFromUnicode(remainingOutput, aUnicodeConversion);	
       
   353 
       
   354 	// While there is still more data to convert
       
   355 	while (0 < unconverted)	
       
   356 		{
       
   357 		// Resize the buffer to hold more data
       
   358 		maxLength += KMaxReadableBytes;
       
   359 		aOutputBuffer = aOutputBuffer->ReAllocL(maxLength);
       
   360 
       
   361 		// Segment the writable area
       
   362 		TInt outputLength = aOutputBuffer->Length();
       
   363 		TPtr8 remainingOutput1(&(aOutputBuffer->Des())[0] + outputLength, 0, maxLength - outputLength);
       
   364 		remainingOutput.Set(remainingOutput1);
       
   365 
       
   366 		// Convert the data
       
   367 		unconverted = iCnvCharacterSetConverter->ConvertFromUnicode(remainingOutput, aUnicodeConversion.Right(unconverted));
       
   368 		aOutputBuffer->Des().SetLength(outputLength + remainingOutput.Length());
       
   369 		}
       
   370 
       
   371 	// Reallocate to a minimally-sized buffer	
       
   372 	if (unconverted == 0)
       
   373 		{
       
   374 		aOutputBuffer = aOutputBuffer->ReAllocL(aOutputBuffer->Length());
       
   375 		}
       
   376 
       
   377 	CleanupStack::Pop(&aOutputBuffer);//destroy the object pointed by the buffer wherever it is since we have got hold of the pointer (buffer)'s address
       
   378 
       
   379 	return unconverted;  // return error value if there is one.
       
   380 	}
       
   381 
       
   382 
       
   383 EXPORT_C TInt CCharSetConverter::ConvertFromUnicodeL(const TDesC16& aInput,
       
   384 													 TUint32 aDestCharset,
       
   385 													 TPtr8& aOutput)
       
   386 /**
       
   387 This method converts the given unicode to the specified encoding. 
       
   388 If this function leaves, memory is cleaned up.
       
   389 This overload stores the conversion output in memory already allocated, for the sole use
       
   390 of the TPtr versions of overloaded ConvertToUnicodeL and ConvertFromUnicodeL functions. You must make sure you
       
   391 have finished with the output from a previous call to either (TPtr overload of) ConvertToUnicodeL
       
   392 or ConvertFromUnicodeL before calling either again, as the previous output will be overwritten with
       
   393 the new output.
       
   394 This version is more efficient than the HBufC alternative and so should be used whenever possible.
       
   395 
       
   396 @return				KErrNone if the conversion was succesfull
       
   397 					or one of the error values defined in TError.
       
   398 					
       
   399 @leave				KErrXmlUnavailableCharacterSet - Charset not available.
       
   400 
       
   401 @param				aInput The unicode to convert.
       
   402 @param				aDestCharset The character set encoding to convert to.
       
   403 @param				aOutput The characters after conversion.
       
   404 */
       
   405 	{
       
   406 	if (iCnvCharacterSetConverter->PrepareToConvertToOrFromL(aDestCharset, iFs) == 
       
   407 			CCnvCharacterSetConverter::ENotAvailable)
       
   408 		{
       
   409 		User::Leave(KErrXmlUnavailableCharacterSet);
       
   410 		}
       
   411 	// Set up output descriptor reference: "Payload" is iConversionBuffer (a TAny *), it's initial
       
   412 	// length is zero (because it's empty).	
       
   413 	aOutput.Set((TUint8*)iConversionBuffer, 0, iConversionBufferSize);	
       
   414 
       
   415 	// Convert the data, returning the amount of characters that are unconverted, due to the output buffer being full
       
   416 	TInt unconverted = iCnvCharacterSetConverter->ConvertFromUnicode(aOutput, aInput);	
       
   417 
       
   418 	// While there is still more data to convert
       
   419 	while (0 < unconverted)	
       
   420 		{
       
   421 		TInt outputLength = aOutput.Length();
       
   422 		
       
   423 		// Resize the buffer to hold the remaining data
       
   424 		iConversionBufferSize += KMaxReadableBytes;
       
   425 		
       
   426 		iConversionBuffer = User::Heap().ReAllocL(iConversionBuffer,iConversionBufferSize);
       
   427 		if (iConversionBuffer == NULL) 	
       
   428 			User::Leave(KErrNoMemory);
       
   429 		aOutput.Set((TUint8*)iConversionBuffer,iConversionBufferSize,iConversionBufferSize);
       
   430 		
       
   431 		// Construct a modifiable pointer descriptor pointing to the the writable area of
       
   432 		// iConversionBuffer
       
   433 		TPtr8 remainingOutput(((TUint8*)iConversionBuffer) + outputLength, 0, iConversionBufferSize - outputLength);
       
   434 
       
   435 		// Try to convert another chunk of data
       
   436 		unconverted = iCnvCharacterSetConverter->ConvertFromUnicode(remainingOutput, aInput.Right(unconverted));
       
   437 	
       
   438 		// Update the length of the output buffer to include the data we just converted.
       
   439 		aOutput.SetLength(remainingOutput.Length()+outputLength);
       
   440 		}
       
   441 		
       
   442 	return unconverted;  // return error value if there is one.
       
   443 	}
       
   444 
       
   445 
       
   446 EXPORT_C void CCharSetConverter::PrepareToConvertToOrFromL(TUint32 aCharSetUid)
       
   447 /**
       
   448 This method is a helper function that prepares CharConv for a conversion.
       
   449 
       
   450 @see				CCnvCharacterSetConverter::PrepareToConvertToOrFromL
       
   451 @post				CharConv is ready for the conversion or not.
       
   452 
       
   453 @leave				KErrXmlUnavailableCharacterSet - Charset not available.
       
   454 
       
   455 @param				aCharSetUid The character set encoding to convert to.
       
   456 */
       
   457 	{
       
   458 	if (iCnvCharacterSetConverter->PrepareToConvertToOrFromL(aCharSetUid, iFs) == 
       
   459 			CCnvCharacterSetConverter::ENotAvailable )
       
   460 		{
       
   461 		User::Leave(KErrXmlUnavailableCharacterSet);
       
   462 		}
       
   463 	}
       
   464 
       
   465 
       
   466 
       
   467 EXPORT_C void CCharSetConverter::ConvertCharacterSetIdentifierToStandardNameL(TUint32 aCharSetUid, 
       
   468 																			  HBufC8*& aCharSet)
       
   469 /**
       
   470 This method is a helper function that obtains a standand character
       
   471 encoding name from a character set identifer.
       
   472 
       
   473 @see				CCnvCharacterSetConverter::ConvertCharacterSetIdentifierToStandardNameL
       
   474 
       
   475 @leave				KErrXmlUnsupportedCharacterSet If the character set is not known.
       
   476 
       
   477 @param				aCharSetUid The character set to obtain the name for.
       
   478 @param				aCharSet On return holds the Internet-standard name
       
   479 					or MIME name of the character set.
       
   480 					The name is encoded in 8 bit ASCII. 
       
   481 */
       
   482 	{
       
   483 	if ((aCharSet = 
       
   484 		iCnvCharacterSetConverter->
       
   485 			ConvertCharacterSetIdentifierToStandardNameL(aCharSetUid, iFs)) == NULL)
       
   486 		{
       
   487 		User::Leave(KErrXmlUnsupportedCharacterSet);
       
   488 		}
       
   489 	}
       
   490 
       
   491 
       
   492 
       
   493 EXPORT_C TInt CCharSetConverter::ConvertUcs4CharactersToEncodingL(TUint32* aUcs4Src, 
       
   494 																  TInt aUcs4Count, 
       
   495 																  TUint32 aDestCharset,
       
   496 																  HBufC8*& aConversion)
       
   497 /**
       
   498 This method converts ucs-4 characters to the desired non-modal encoding.
       
   499 aConversion should be NULL on calling of this function.
       
   500 If this function leaves, memory is cleaned up.
       
   501 There is no TPtr overload of this method, as currently it is only called a few times and so would not 
       
   502 produce any noticable benefits.
       
   503 
       
   504 @return				CCharSetConverter::ConvertFromUnicodeL.
       
   505 
       
   506 @leave				KErrXmlBadCharacterConversion
       
   507 
       
   508 @param				aUcs4Src list of ucs-4 characters.
       
   509 @param				aUcs4Count number of ucs4 characters.
       
   510 @param				aDestCharset the desired encoding.
       
   511 @param				aConversion On return, points to the converted encoding.
       
   512 */
       
   513 	{
       
   514 	// convert ucs-4 to ucs-2
       
   515 
       
   516 	// Find the length of the output
       
   517 	TText16 buf[2];
       
   518 	TInt length = 0;
       
   519 	TUint32* src = NULL;
       
   520 
       
   521 	for (src = aUcs4Src; src != (aUcs4Src + aUcs4Count); ++src)
       
   522 		{
       
   523 		// Convert a single character into the buffer, discard the result
       
   524 		// but increase the length by the number of UTF16 codes output.
       
   525 		length += Utf32ToUtf16(buf, *src) - buf;
       
   526 		}
       
   527 
       
   528 	HBufC16* utf16Out = HBufC16::NewL(length);
       
   529 	CleanupStack::PushL(utf16Out);
       
   530 
       
   531 	utf16Out->Des().SetLength(length);
       
   532     
       
   533 	TText16* p = &((utf16Out->Des())[0]);
       
   534 	
       
   535 
       
   536 	// go through characters converting to ucs2.
       
   537 	for (src = aUcs4Src; src != aUcs4Src + aUcs4Count; ++src)
       
   538 		{
       
   539 		// convert each ucs4 character
       
   540 		 p = Utf32ToUtf16(p, *src);
       
   541 		}
       
   542 
       
   543 	// convert from ucs2 to desired encoding
       
   544 	aConversion = NULL;
       
   545 	TInt ret = 0;
       
   546 
       
   547 	//HBufC overload of this method called, due to the need pass back the HBufC to the calling method
       
   548 	ret = CCharSetConverter::ConvertFromUnicodeL(*utf16Out, aDestCharset, aConversion);
       
   549 	CleanupStack::PushL(aConversion);
       
   550 
       
   551 	if(ret > KErrNone)
       
   552 		{
       
   553 		// CharConv couldn't convert all the bytes. Character encoding may be truncated.
       
   554 		User::Leave(KErrXmlBadCharacterConversion);
       
   555 		}
       
   556 
       
   557 	CleanupStack::Pop(aConversion);
       
   558 	CleanupStack::PopAndDestroy(utf16Out);
       
   559 	return(ret);
       
   560 	}
       
   561 
       
   562 
       
   563 
       
   564 TText16* CCharSetConverter::Utf32ToUtf16(TText16* aUtf16Out, TUint32 aUtf32)
       
   565 /**
       
   566 This method converts a ucs-4 character to unicode.
       
   567 
       
   568 @return				Pointer to the next free byte in the output buffer.
       
   569 
       
   570 @param				aUtf16Out On return, contains the unicode character conversion.
       
   571 @param				aUtf32 The ucs-4 character
       
   572 */
       
   573 	{
       
   574 	if (aUtf32 <= 0xFFFF)
       
   575 		{
       
   576 		// UTF32 (or UCS4) should not have characters in the range 
       
   577 		// D800-DBFF (high surrogate) and DC00-DFFF (low surrogate) in it,
       
   578 		// as these are the surrogates that make up the extension mechanism for 
       
   579 		// fitting Unicode into 16 bits.
       
   580 		// In principle, surrogates in UCS-4 should be ignored. 
       
   581 		// They are considered a bad thing because they might be an aliasing 
       
   582 		// problem: one thing looking like another. 
       
   583 		// In practice I don't think it is a problem here. 
       
   584 		// If you like, you could reject any character between D800 to DFFF.
       
   585 
       
   586 		// could weed out unpaired surrogates here, but...
       
   587 		*aUtf16Out = static_cast<TText16>(aUtf32);
       
   588 		return aUtf16Out + 1;
       
   589 		}
       
   590 
       
   591 	// A way to visualise the use of surrogate pairs is to imaging planes.
       
   592 	// The surrogate is located on plane zero and identifies the actual plane
       
   593 	// this character resides in.
       
   594 	// This is why for supplementary characters we must insert the surrogates
       
   595 	// so that charconv can convert correctly.
       
   596 	// 
       
   597 	// 0    D800 
       
   598 	// |    | DFFF 
       
   599 	// |    | | E000                                                                  10FFF
       
   600 	// |    | | |                                                                     |
       
   601 	// xxxxxxYxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
       
   602 	//       ^
       
   603 	// For Utf32 this means nothing. 
       
   604 	// 
       
   605 	// For Utf16 if the following bit pattern is located then it corresponds to a 
       
   606 	// supplementary character.
       
   607 	// 
       
   608 	//       ^
       
   609 	// D800           DC00           DFFF
       
   610 	// |              |              |
       
   611 	// yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
       
   612 	//       High           Low
       
   613 	// 
       
   614 	// 1101 10.. ........         1101 11.. ........
       
   615 	//        --10  bit--                --10  bit-- 
       
   616 	// 
       
   617 	// 
       
   618 	// Add 10000 to both 10-bit values and the offset to the correct character is obtained.
       
   619 	// 
       
   620 	// 
       
   621 	// So a test of this function would be to check that the value returned 
       
   622 	// matches a utf-8 character encoding manually calculated from the original ucs4
       
   623 	// value.
       
   624 
       
   625 
       
   626 	// We have a supplementary character consists of 5 nibbles (20 bits) 
       
   627 	// with no surrogates.
       
   628 	// We have to insert the surrogate pair on the values minus 0x10000.
       
   629 	// b0-b9 is the low order value, b10-b19 is the high order value.
       
   630 	// b19....b10 b9....b0
       
   631 	//   high       low
       
   632 	//
       
   633 	// Character values.
       
   634 	// Basic			0x0-0xFFFF
       
   635 	// Supplimentary	0x10000-0x10FFFF
       
   636 	// so 0x10000 >> 10 = 0x43FF
       
   637 
       
   638 	// To add the surrogate to the high order:
       
   639 	//
       
   640 	// ((utf32-0x10000)>>10)+0xD800
       
   641 	// = (utf32>>10)-(0x10000>>10)+0xD800
       
   642 	// = (utf32>>10)+(0xD800-0x40)
       
   643 	// = (utf32>>10)+0xD7C0
       
   644 
       
   645 	aUtf16Out[0] = static_cast<TText16>((aUtf32 >> 10)  + 0xD7C0);
       
   646 
       
   647 	// To add the surrogate to the low order:
       
   648 	//
       
   649 	// ((utf32-0x10000) & 0x3FF)+0xDC00
       
   650 	// = ((utf32 & 0x3FF) - (0x10000 & 0x3FF)) + 0xDC00
       
   651 	// = ((utf32 & 0x3FF) - (0)) + 0xDC00
       
   652 	// = (utf32 & 0x3FF) + 0xDC00
       
   653 
       
   654 	aUtf16Out[1] = static_cast<TText16>(0xDC00 | (aUtf32 & 0x3FF));
       
   655 
       
   656 	return aUtf16Out + 2;
       
   657 	}