changeset 1 0a7b44b10206
child 2 806186ab5e14
equal deleted inserted replaced
0:c55016431358 1:0a7b44b10206
     1 // Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
     2 // All rights reserved.
     3 // This component and the accompanying materials are made available
     4 // under the terms of the License "Symbian Foundation License v1.0"
     5 // which accompanies this distribution, and is available
     6 // at the URL "".
     7 //
     8 // Initial Contributors:
     9 // Nokia Corporation - initial contribution.
    10 //
    11 // Contributors:
    12 //
    13 // Description:
    14 //
    16 #include <e32std.h>
    17 #include <e32base.h>
    18 #include <utf.h>
    20 const TUint KNotInBase64Alphabet=KMaxTUint;
    22 enum TPanic
    23 	{
    24 	EPanicBad6BitNumber=1,
    25 	EPanicBadUtf7Pointers1,
    26 	EPanicBadUtf7Pointers2,
    27 	EPanicBadUtf7Pointers3,
    28 	EPanicBadUtf7Pointers4,
    29 	EPanicBadUtf7Pointers5,
    30 	EPanicBadUtf7Pointers6,
    31 	EPanicBadUtf7Pointers7,
    32 	EPanicBadUtf7Pointers8,
    33 	EPanicBadUtf7Pointers9,
    34 	EPanicBadUtf7Pointers10,
    35 	EPanicBadUtf7Pointers11,
    36 	EPanicNotInBase64Block,
    37 	EPanicBadUnicodePointers1,
    38 	EPanicBadUnicodePointers2,
    39 	EPanicBadUnicodePointers3,
    40 	EPanicBadUnicodePointers4,
    41 	EPanicBadUnicodePointers5,
    42 	EPanicBadUnicodePointers6,
    43 	EPanicBadUnicodePointers7,
    44 	EPanicBadUnicodePointers8,
    45 	EPanicBadUnicodePointers9,
    46 	EPanicBadUnicodePointers10,
    47 	EPanicBadBitBufferState1,
    48 	EPanicBadBitBufferState2,
    49 	EPanicBadBitBufferState3,
    50 	EPanicBadBitBufferState4,
    51 	EPanicBadBitBufferState5,
    52 	EPanicBadBitBufferState6,
    53 	EPanicBadBitBufferState7,
    54 	EPanicBadBitBufferState8,
    55 	EPanicBadBitBufferState9,
    56 	EPanicBadBitBufferState10,
    57 	EPanicBadBitBufferState11,
    58 	EPanicBadBitBufferState12,
    59 	EPanicBadBitBufferState13,
    60 	EPanicBadBitBufferState14,
    61 	EPanicBadBitBufferState15,
    62 	EPanicBadBitBufferState16,
    63 	EPanicBadBitBufferState17,
    64 	EPanicUnexpectedNumberOfLoopIterations,
    65 	EPanicInitialEscapeCharacterButNoBase64,
    66 	EPanicBase64SequenceDoesNotFallOnUnicodeCharacterBoundary,
    67 	EPanicBadUtf8Pointers1,
    68 	EPanicBadUtf8Pointers2,
    69 	EPanicBadUtf8Pointers3,
    70 	EPanicBadUtf8Pointers4,
    71 	EPanicBadUtf8Pointers5,
    72 	EPanicBadUtf8Pointers6,
    73 	EPanicBadUtf8Pointers7,
    74 	EPanicOutOfSyncUtf7Byte1,
    75 	EPanicOutOfSyncUtf7Byte2,
    76 	EPanicOutOfSyncBase64Decoding
    77 	};
    79 _LIT(KLitPanicText, "CHARCONV-UTF");
    81 LOCAL_C void Panic(TPanic aPanic)
    82 	{
    83 	User::Panic(KLitPanicText, aPanic);
    84 	}
    86 inline TUint EscapeCharacterForStartingBase64Block(TBool aIsImapUtf7) {return aIsImapUtf7? '&': '+';}
    88 LOCAL_C TUint Base64Decoding(TUint aMemberOfBase64Alphabet, TBool aIsImapUtf7)
    89 	{
    90 	if ((aMemberOfBase64Alphabet>='A') && (aMemberOfBase64Alphabet<='Z'))
    91 		{
    92 		return aMemberOfBase64Alphabet-'A';
    93 		}
    94 	if ((aMemberOfBase64Alphabet>='a') && (aMemberOfBase64Alphabet<='z'))
    95 		{
    96 		return aMemberOfBase64Alphabet-('a'-26);
    97 		}
    98 	if ((aMemberOfBase64Alphabet>='0') && (aMemberOfBase64Alphabet<='9'))
    99 		{
   100 		return aMemberOfBase64Alphabet+((26*2)-'0');
   101 		}
   102 	if (aMemberOfBase64Alphabet=='+')
   103 		{
   104 		return 62;
   105 		}
   106 	if (aMemberOfBase64Alphabet==STATIC_CAST(TUint, aIsImapUtf7? ',': '/'))
   107 		{
   108 		return 63;
   109 		}
   110 	return KNotInBase64Alphabet;
   111 	}
   113 LOCAL_C TUint Base64Encoding(TUint a6BitNumber, TBool aIsImapUtf7)
   114 	{
   115 	__ASSERT_DEBUG(a6BitNumber<64, Panic(EPanicBad6BitNumber));
   116 	if ((a6BitNumber==63) && aIsImapUtf7)
   117 		{
   118 		return ',';
   119 		}
   120 	static const TUint8 base64Alphabet[64]={'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'};
   121 	return base64Alphabet[a6BitNumber];
   122 	}
   124 LOCAL_C TUint8* PointerToEscapeCharacterStartingBase64Block(TUint8* aPointerToUtf7Byte, const TUint8* aPointerToFirstUtf7Byte, TBool aIsImapUtf7)
   125 	{
   126 	__ASSERT_DEBUG(aPointerToUtf7Byte>=aPointerToFirstUtf7Byte, Panic(EPanicBadUtf7Pointers1));
   127 	TUint8* pointerToCandidateEscapeCharacter=NULL;
   128 	FOREVER
   129 		{
   130 		const TUint utf7Byte=*aPointerToUtf7Byte;
   131 		if (utf7Byte==EscapeCharacterForStartingBase64Block(aIsImapUtf7))
   132 			{
   133 			pointerToCandidateEscapeCharacter=aPointerToUtf7Byte;
   134 			}
   135 		else if (Base64Decoding(utf7Byte, aIsImapUtf7)==KNotInBase64Alphabet)
   136 			{
   137 			break;
   138 			}
   139 		__ASSERT_DEBUG(aPointerToUtf7Byte>=aPointerToFirstUtf7Byte, Panic(EPanicBadUtf7Pointers2));
   140 		if (aPointerToUtf7Byte<=aPointerToFirstUtf7Byte)
   141 			{
   142 			break;
   143 			}
   144 		--aPointerToUtf7Byte;
   145 		}
   146 	__ASSERT_DEBUG(pointerToCandidateEscapeCharacter!=NULL, Panic(EPanicNotInBase64Block));
   147 	return pointerToCandidateEscapeCharacter;
   148 	}
   150 LOCAL_C TBool EncodeInUtf7Directly(TUint aUnicodeCharacter, TBool aIsImapUtf7, TBool aEncodeOptionalDirectCharactersInBase64)
   151 	{
   152 	if (aIsImapUtf7)
   153 		{
   154 		return (aUnicodeCharacter>=0x0020) && (aUnicodeCharacter<=0x007e);
   155 		}
   156 	if ((aUnicodeCharacter>=0x0021) && (aUnicodeCharacter<=0x007d))
   157 		{
   158 		if (aEncodeOptionalDirectCharactersInBase64)
   159 			{
   160 			return (((aUnicodeCharacter>=0x0041) && (aUnicodeCharacter<=0x005a)) ||
   161 					((aUnicodeCharacter>=0x0061) && (aUnicodeCharacter<=0x007a)) ||
   162 					((aUnicodeCharacter>=0x0027) && (aUnicodeCharacter<=0x0029)) ||
   163 					((aUnicodeCharacter>=0x002b) && (aUnicodeCharacter<=0x003a)) ||
   164 					(aUnicodeCharacter==0x003f));
   165 			}
   166 		return aUnicodeCharacter!=0x005c;
   167 		}
   168 	return (aUnicodeCharacter==0x0020) || (aUnicodeCharacter==0x0009) || (aUnicodeCharacter==0x000d) || (aUnicodeCharacter==0x000a);
   169 	}
   171 inline TBool BitBufferContainsNonZeroBits(TUint aBitBuffer, TInt aNumberOfBitsInBuffer)
   172 	{
   173 	return (aBitBuffer&((1<<aNumberOfBitsInBuffer)-1))!=0;
   174 	}
   178 /**  Converts Unicode text into UTF-7 encoding. The fucntion leaves with
   179 KErrCorrupt if the input string is corrupt.
   181 @param aUnicode A UCS-2 encoded input string.
   182 @param aEncodeOptionalDirectCharactersInBase64  If ETrue then
   183 characters from UTF-7 set O (optional direct characters) are encoded in
   184 Modified Base64. If EFalse the characters are encoded directly,
   185 as their ASCII equivalents.
   186 @return A descriptor containing the UTF-7 encoded output string. */
   187 EXPORT_C HBufC8* CnvUtfConverter::ConvertFromUnicodeToUtf7L(
   188 										const TDesC16& aUnicode,
   189 										TBool aEncodeOptionalDirectCharactersInBase64)
   190 	{
   191 	// If aUnicode is  Null string, return an empty HBufC
   192 	if (aUnicode.Length() == 0)
   193 		{
   194 		HBufC8* hBuf8 = HBufC8::NewL(1);
   195 		return hBuf8;
   196 		}
   198 	// Otherwise, convert and store result in a buffer, reallocating that buffer if needed.
   199 	TInt length = aUnicode.Length();
   200 	const TInt bufsize = 100;
   202 	TPtrC16 unicode (aUnicode);
   203 	TBuf8<bufsize> buf;
   204 	HBufC8* hBuf8 = HBufC8::NewLC(length);
   205 	TPtr8 utf7 = hBuf8->Des();
   207 	FOREVER
   208 		{
   209 		TInt unconverted = ConvertFromUnicodeToUtf7(buf, unicode, aEncodeOptionalDirectCharactersInBase64);
   210 		if( unconverted == EErrorIllFormedInput || unconverted < 0)
   211 			User::Leave(KErrCorrupt);
   213 		if (utf7.Length() + buf.Length() > utf7.MaxLength())
   214 			{
   215 			// Reallocate the hBuf8
   216 			hBuf8 = hBuf8->ReAllocL(utf7.Length() + buf.Length());
   217 			CleanupStack::Pop();
   218 			CleanupStack::PushL(hBuf8);
   219 			utf7.Set(hBuf8->Des());
   220 			}
   221 		utf7.Append(buf);
   222 		if (unconverted ==0)
   223 			break;
   224 		unicode.Set(unicode.Right(unconverted));
   225 		}
   226 	CleanupStack::Pop();
   227 	return hBuf8;
   229 	}
   231 /** Converts Unicode text into UTF-7 encoding.
   233 @param aUtf7 On return, contains the UTF-7 encoded output string.
   234 @param aUnicode A UCS-2 encoded input string.
   235 @param aEncodeOptionalDirectCharactersInBase64 If ETrue then characters from
   236 UTF-7 set O (optional direct characters) are encoded in Modified Base64. If
   237 EFalse the characters are encoded directly, as their ASCII equivalents.
   238 @return The number of unconverted characters left at the end of the input
   239 descriptor, or one of the error values defined in TError. */
   240 EXPORT_C TInt CnvUtfConverter::ConvertFromUnicodeToUtf7(
   241 										TDes8& aUtf7,
   242 										const TDesC16& aUnicode,
   243 										TBool aEncodeOptionalDirectCharactersInBase64)
   244 	{
   245 	return ConvertFromUnicodeToUtf7(aUtf7, aUnicode, EFalse, aEncodeOptionalDirectCharactersInBase64);
   246 	}
   248 TInt CnvUtfConverter::ConvertFromUnicodeToUtf7(TDes8& aUtf7,
   249 											   const TDesC16& aUnicode,
   250 											   TBool aIsImapUtf7,
   251 											   TBool aEncodeOptionalDirectCharactersInBase64)
   252 	{
   253 	if (aUnicode.Length()==0)
   254 		{
   255 		aUtf7.SetLength(0);
   256 		return 0;
   257 		}
   258 	if (aUtf7.MaxLength()==0)
   259 		{
   260 		return aUnicode.Length();
   261 		}
   262 	const TUint escapeCharacterForStartingBase64Block=EscapeCharacterForStartingBase64Block(aIsImapUtf7);
   263 	TUint8* pointerToPreviousUtf7Byte=CONST_CAST(TUint8*, aUtf7.Ptr()-1);
   264 	const TUint8* const pointerToLastUtf7Byte=pointerToPreviousUtf7Byte+aUtf7.MaxLength();
   265 	const TUint16* pointerToPreviousUnicodeCharacter=aUnicode.Ptr()-1;
   266 	const TUint16* const pointerToLastUnicodeCharacter=pointerToPreviousUnicodeCharacter+aUnicode.Length();
   267 	const TUint KIsInBase64Block=0x80000000u;
   268 	TUint bitBuffer=0;
   269 	TInt numberOfBitsInBuffer=0;
   270 	FOREVER
   271 		{
   272 		__ASSERT_DEBUG(pointerToPreviousUtf7Byte<=pointerToLastUtf7Byte, Panic(EPanicBadUtf7Pointers3));
   273 		__ASSERT_DEBUG(pointerToPreviousUnicodeCharacter<=pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers1));
   274 		TUint currentUnicodeCharacter=(pointerToPreviousUnicodeCharacter==pointerToLastUnicodeCharacter)? 0: *(pointerToPreviousUnicodeCharacter+1);
   275 		if ((pointerToPreviousUnicodeCharacter==pointerToLastUnicodeCharacter) || EncodeInUtf7Directly(currentUnicodeCharacter, aIsImapUtf7, aEncodeOptionalDirectCharactersInBase64))
   276 			{
   277 			__ASSERT_DEBUG((bitBuffer&KIsInBase64Block) || (numberOfBitsInBuffer==0), Panic(EPanicBadBitBufferState1));
   278 			__ASSERT_DEBUG((numberOfBitsInBuffer==0) || (numberOfBitsInBuffer==2) || (numberOfBitsInBuffer==4), Panic(EPanicBadBitBufferState2));
   279 			if (bitBuffer&KIsInBase64Block)
   280 				{
   281 				if (numberOfBitsInBuffer!=0)
   282 					{
   283 					if (pointerToLastUtf7Byte-pointerToPreviousUtf7Byte<2) // make sure there is enough space for the trailing '-' as well as the remains of the bitBuffer as the KIsInBase64Block flag is about to turned off, thus the trailing '-' may never get written
   284 						{
   285 						break;
   286 						}
   287 					++pointerToPreviousUtf7Byte;
   288 					*pointerToPreviousUtf7Byte=STATIC_CAST(TUint8, Base64Encoding((bitBuffer<<(6-numberOfBitsInBuffer))&0x3f, aIsImapUtf7));
   289 					}
   290 				else
   291 					{
   292 					if (pointerToPreviousUtf7Byte==pointerToLastUtf7Byte)
   293 						{
   294 						break;
   295 						}
   296 					}
   297 				++pointerToPreviousUtf7Byte;
   298 				*pointerToPreviousUtf7Byte='-';
   299 				bitBuffer=0;
   300 				numberOfBitsInBuffer=0;
   301 				}
   302 			__ASSERT_DEBUG(pointerToPreviousUnicodeCharacter<=pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers2));
   303 			if (pointerToPreviousUnicodeCharacter>=pointerToLastUnicodeCharacter)
   304 				{
   305 				break;
   306 				}
   307 			__ASSERT_DEBUG(pointerToPreviousUtf7Byte<=pointerToLastUtf7Byte, Panic(EPanicBadUtf7Pointers4));
   308 			if (pointerToLastUtf7Byte-pointerToPreviousUtf7Byte<((currentUnicodeCharacter==escapeCharacterForStartingBase64Block)? 2: 1))
   309 				{
   310 				break;
   311 				}
   312 			++pointerToPreviousUtf7Byte;
   313 			*pointerToPreviousUtf7Byte=STATIC_CAST(TUint8, currentUnicodeCharacter);
   314 			++pointerToPreviousUnicodeCharacter;
   315 			if (currentUnicodeCharacter==escapeCharacterForStartingBase64Block)
   316 				{
   317 				++pointerToPreviousUtf7Byte;
   318 				*pointerToPreviousUtf7Byte='-';
   319 				}
   320 			}
   321 		else
   322 			{
   323 			{
   324 			TInt numberOfUtf7BytesRequired=(numberOfBitsInBuffer+16)/6; // "(numberOfBitsInBuffer+16)/6" is the number of iterations that will happen in the while loop below
   325 			if (~bitBuffer&KIsInBase64Block)
   326 				{
   327 				++numberOfUtf7BytesRequired; // for the initial escapeCharacterForStartingBase64Block
   328 				}
   329 			if (pointerToLastUtf7Byte-pointerToPreviousUtf7Byte<numberOfUtf7BytesRequired)
   330 				{
   331 				break;
   332 				}
   333 			}
   334 			if (~bitBuffer&KIsInBase64Block)
   335 				{
   336 				__ASSERT_DEBUG(pointerToPreviousUtf7Byte<pointerToLastUtf7Byte, Panic(EPanicBadUtf7Pointers5));
   337 				++pointerToPreviousUtf7Byte;
   338 				*pointerToPreviousUtf7Byte=STATIC_CAST(TUint8, escapeCharacterForStartingBase64Block);
   339 				}
   340 			bitBuffer<<=16;
   341 			bitBuffer|=currentUnicodeCharacter;
   342 			numberOfBitsInBuffer+=16;
   343 			++pointerToPreviousUnicodeCharacter;
   344 			__ASSERT_DEBUG(numberOfBitsInBuffer<=20, Panic(EPanicBadBitBufferState3));
   345 			while (numberOfBitsInBuffer>=6)
   346 				{
   347 				numberOfBitsInBuffer-=6;
   348 				__ASSERT_DEBUG(pointerToPreviousUtf7Byte<pointerToLastUtf7Byte, Panic(EPanicBadUtf7Pointers6));
   349 				++pointerToPreviousUtf7Byte;
   350 				*pointerToPreviousUtf7Byte=STATIC_CAST(TUint8, Base64Encoding((bitBuffer>>numberOfBitsInBuffer)&0x3f, aIsImapUtf7));
   351 				}
   352 			bitBuffer&=((1<<numberOfBitsInBuffer)-1); // zero all the consumed bits - not strictly necessary but it leaves the buffer in a cleaner state
   353 			bitBuffer|=KIsInBase64Block;
   354 			}
   355 		}
   356 	__ASSERT_DEBUG((bitBuffer&KIsInBase64Block) || (numberOfBitsInBuffer==0), Panic(EPanicBadBitBufferState4));
   357 	__ASSERT_DEBUG((numberOfBitsInBuffer==0) || (numberOfBitsInBuffer==2) || (numberOfBitsInBuffer==4), Panic(EPanicBadBitBufferState5));
   358 	if (bitBuffer&KIsInBase64Block)
   359 		{
   360 #if defined(_DEBUG)
   361 		TInt numberOfLoopIterations=1;
   362 #endif
   363 		FOREVER // there should never be more than 2 iterations of this loop - the first "if" should always succeed the second time if it doesn't succeed the first time
   364 			{
   365 			__ASSERT_DEBUG(pointerToPreviousUtf7Byte<=pointerToLastUtf7Byte, Panic(EPanicBadUtf7Pointers7));
   366 			__ASSERT_DEBUG((numberOfBitsInBuffer==0) || (numberOfBitsInBuffer==2) || (numberOfBitsInBuffer==4), Panic(EPanicBadBitBufferState6));
   367 			__ASSERT_DEBUG(numberOfLoopIterations<=2, Panic(EPanicUnexpectedNumberOfLoopIterations));
   368 #if defined(_DEBUG)
   369 			++numberOfLoopIterations;
   370 #endif
   371 			if (pointerToLastUtf7Byte-pointerToPreviousUtf7Byte>=((numberOfBitsInBuffer==0)? 1: 2)) // if there's room to finish off the base-64 sequence by (i) flushing the bit-buffer and (ii) appending the trailing '-'
   372 				{
   373 				if (numberOfBitsInBuffer!=0)
   374 					{
   375 					__ASSERT_DEBUG(pointerToPreviousUtf7Byte<pointerToLastUtf7Byte, Panic(EPanicBadUtf7Pointers8));
   376 					++pointerToPreviousUtf7Byte;
   377 					*pointerToPreviousUtf7Byte=STATIC_CAST(TUint8, Base64Encoding((bitBuffer<<(6-numberOfBitsInBuffer))&0x3f, aIsImapUtf7));
   378 					}
   379 				__ASSERT_DEBUG(pointerToPreviousUtf7Byte<pointerToLastUtf7Byte, Panic(EPanicBadUtf7Pointers9));
   380 				++pointerToPreviousUtf7Byte;
   381 				*pointerToPreviousUtf7Byte='-';
   382 				break;
   383 				}
   384 			// it is now necessary to move back pointerToPreviousUtf7Byte so that the base-64 sequence can be terminated - note it must be terminated on a Unicode character boundary hence the reason why pointerToPreviousUnicodeCharacter may be moved back too
   385 			TUint8* pointerToEscapeCharacterStartingBase64Block=PointerToEscapeCharacterStartingBase64Block(pointerToPreviousUtf7Byte, aUtf7.Ptr(), aIsImapUtf7);
   386 			const TInt oldNumberOfBase64Characters=pointerToPreviousUtf7Byte-pointerToEscapeCharacterStartingBase64Block;
   387 			__ASSERT_DEBUG(oldNumberOfBase64Characters>0, Panic(EPanicInitialEscapeCharacterButNoBase64));
   388 			__ASSERT_DEBUG(((oldNumberOfBase64Characters*6)+numberOfBitsInBuffer)%16==0, Panic(EPanicBase64SequenceDoesNotFallOnUnicodeCharacterBoundary));
   389 			pointerToPreviousUnicodeCharacter-=((oldNumberOfBase64Characters*6)+numberOfBitsInBuffer)/16; // move back pointerToPreviousUnicodeCharacter to before the equivalent of the base-64 sequence
   390 			pointerToPreviousUtf7Byte=pointerToEscapeCharacterStartingBase64Block;
   391 			__ASSERT_DEBUG(*pointerToPreviousUtf7Byte==escapeCharacterForStartingBase64Block, Panic(EPanicBadUtf7Pointers10));
   392 			if (oldNumberOfBase64Characters<4) // if the new base-64 sequence will be so short that it won't even be able to contain the UTF-7 encoding of a single Unicode character
   393 				{
   394 				--pointerToPreviousUtf7Byte; // move back pointerToPreviousUtf7Byte to before the escapeCharacterForStartingBase64Block
   395 				break;
   396 				}
   397 			const TInt newNumberOfUnicodeCharacters=((oldNumberOfBase64Characters-1)*3)/8;
   398 			pointerToPreviousUnicodeCharacter+=newNumberOfUnicodeCharacters;
   399 			pointerToPreviousUtf7Byte+=((newNumberOfUnicodeCharacters*8)+2)/3;
   400 			const TInt numberOfBitsToBeZeroedInLastBase64Character=(newNumberOfUnicodeCharacters%3)*2;
   401 			if (numberOfBitsToBeZeroedInLastBase64Character!=0)
   402 				{
   403 				*pointerToPreviousUtf7Byte=STATIC_CAST(TUint8, Base64Encoding(Base64Decoding(*pointerToPreviousUtf7Byte, aIsImapUtf7)&0x3f&~((1<<numberOfBitsToBeZeroedInLastBase64Character)-1), aIsImapUtf7));
   404 				}
   405 			bitBuffer=KIsInBase64Block;
   406 			numberOfBitsInBuffer=0;
   407 			}
   408 		}
   409 	aUtf7.SetLength((pointerToPreviousUtf7Byte-aUtf7.Ptr())+1);
   410 	return pointerToLastUnicodeCharacter-pointerToPreviousUnicodeCharacter;
   411 	}
   415 /** Converts Unicode text into UTF-8 encoding.
   417 @param aUtf8 On return, contains the UTF-8 encoded output string.
   418 @param aUnicode The Unicode-encoded input string.
   419 @return The number of unconverted characters left at the end of the input
   420 descriptor, or one of the error values defined in TError. */
   421 EXPORT_C TInt CnvUtfConverter::ConvertFromUnicodeToUtf8(TDes8& aUtf8, const TDesC16& aUnicode)
   422 	{
   423 	return ConvertFromUnicodeToUtf8(aUtf8, aUnicode, EFalse);
   424 	}
   427 /**  Converts Unicode text into UTF-8 encoding.
   429 The variant of UTF-8 used internally by Java differs slightly from
   430 standard UTF-8. The TBool argument controls the UTF-8
   431 variant generated by this function. This function leaves with a
   432 KErrCorrupt if the input string is corrupt.
   434 @param aUnicode A UCS-2 encoded input string.
   435 @return A pointer to an HBufC8 containing the converted UTF8. */
   436 EXPORT_C HBufC8* CnvUtfConverter::ConvertFromUnicodeToUtf8L(const TDesC16& aUnicode)
   437  	{
   438 	// If aUnicode is  Null string, return an empty HBufC
   439 	if (aUnicode.Length() == 0)
   440 		{
   441 		HBufC8* hBuf8 = HBufC8::NewL(1);
   442 		return hBuf8;
   443 		}
   445 	// Otherwise, convert and store result in a buffer, reallocating that buffer if needed.
   446 	const TInt length = aUnicode.Length();
   447 	const TInt bufsize = 100;
   449 	TPtrC16 unicode (aUnicode);
   450 	TBuf8<bufsize> buf;
   451 	HBufC8* hBuf8 = HBufC8::NewLC(length);
   452 	TPtr8 utf8 = hBuf8->Des();
   454 	FOREVER
   455 		{
   456 		TInt unconverted = ConvertFromUnicodeToUtf8(buf, unicode);
   457 		if( unconverted == EErrorIllFormedInput || unconverted < 0)
   458 			User::Leave(KErrCorrupt);
   460 		if (utf8.Length() + buf.Length() > utf8.MaxLength())
   461 			{
   462 			// Reallocate the hBuf8
   463 			hBuf8 = hBuf8->ReAllocL(utf8.Length() + buf.Length());
   464 			CleanupStack::Pop();
   465 			CleanupStack::PushL(hBuf8);
   466 			utf8.Set(hBuf8->Des());
   467 			}
   468 		utf8.Append(buf);
   469 		if (unconverted ==0)
   470 			break;
   471 		unicode.Set(unicode.Right(unconverted));
   472 		}
   473 	CleanupStack::Pop();
   474 	return hBuf8;
   475 	}
   477 /** Converts Unicode text into UTF-8 encoding.
   479 Surrogate pairs can be input which will result in a valid 4 byte UTF-8 value.
   481 The variant of UTF-8 used internally by Java differs slightly from standard
   482 UTF-8. The TBool argument controls the UTF-8 variant generated by this function.
   484 @param aUtf8 On return, contains the UTF-8 encoded output string.
   485 @param aUnicode A UCS-2 encoded input string.
   486 @param aGenerateJavaConformantUtf8 EFalse for orthodox UTF-8. ETrue for Java
   487 UTF-8. The default is EFalse.
   488 @return The number of unconverted characters left at the end of the input descriptor,
   489 or one of the error values defined in TError. */
   490 TInt CnvUtfConverter::ConvertFromUnicodeToUtf8(TDes8& aUtf8,
   491 											   const TDesC16& aUnicode,
   492 											   TBool aGenerateJavaConformantUtf8)
   493 	{
   494 	if (aUnicode.Length() == 0)
   495 		{
   496 		aUtf8.SetLength(0);
   497 		return 0;
   498 		}
   499 	if (aUtf8.MaxLength() == 0)
   500 		{
   501 		return aUnicode.Length();
   502 		}
   504 	TUint8* pUtf8 = CONST_CAST(TUint8*, aUtf8.Ptr());
   505 	const TUint8* pointerToLastUtf8Byte = pUtf8 + (aUtf8.MaxLength() - 1);
   506 	TBool inputIsTruncated = EFalse;
   507 	const TUint16* pUnicode = aUnicode.Ptr();
   508 	const TUint16* pointerToLastUnicodeCharacter = pUnicode + (aUnicode.Length() - 1);
   510 	FOREVER
   511 		{
   512 		__ASSERT_DEBUG(pUtf8 <= pointerToLastUtf8Byte, Panic(EPanicBadUtf8Pointers1));
   513 		__ASSERT_DEBUG(pUnicode <= pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers3));
   515 		if (pUnicode[0] < 0x80)
   516 			{
   517 			// ascii - 1 byte
   519 			// internally java is different since the \x0000 character is
   520 			// translated into \xC0 \x80.
   522 			if ((aGenerateJavaConformantUtf8) && (pUnicode[0] == 0x0000))
   523 				{
   524 				if (pUtf8 == pointerToLastUtf8Byte)
   525 					{
   526 					pUtf8--;
   527 					pUnicode--;
   528 					break;
   529 					}
   530 				*pUtf8++ = STATIC_CAST(TUint8, 0xc0);
   531 				*pUtf8   = STATIC_CAST(TUint8, 0x80);
   532 				}
   533 			else
   534 				{
   535 				*pUtf8 = STATIC_CAST(TUint8, pUnicode[0]);
   536 				}
   537 			}
   538 		else if (pUnicode[0] < 0x800)
   539 			{
   540 			// U+0080..U+07FF - 2 bytes
   542 			if (pUtf8 == pointerToLastUtf8Byte)
   543 				{
   544 				pUtf8--;
   545 				pUnicode--;
   546 				break;
   547 				}
   549 			*pUtf8++ = STATIC_CAST(TUint8, 0xc0|(pUnicode[0]>>6));
   550 			*pUtf8   = STATIC_CAST(TUint8, 0x80|(pUnicode[0]&0x3f));
   552 			}
   554 		// check to see if we have a surrogate in the stream, surrogates encode code points outside
   555 		// the BMP and are 4 utf-8 chars, otherwise what we have here is 3 utf-8 chars.
   557 		else if (((pUnicode[0] & 0xfc00) == 0xd800) && !aGenerateJavaConformantUtf8)
   558 			{
   559 			// surrogate pair - 4 bytes in utf-8
   560 			// U+10000..U+10FFFF
   562 			__ASSERT_DEBUG(pUtf8 <= pointerToLastUtf8Byte, Panic(EPanicBadUtf8Pointers2));
   563 			// is there enough space to hold the character
   564 			if ((pointerToLastUtf8Byte - pUtf8) < 3)
   565 				{
   566 				pUtf8--;
   567 				pUnicode--;
   568 				break;  // no go to the exit condition
   569 				}
   571 			__ASSERT_DEBUG(pUnicode <= pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers4));
   572 			if (pUnicode >= pointerToLastUnicodeCharacter)
   573 				{
   574 				pUtf8--;
   575 				pUnicode--;
   576 				inputIsTruncated = ETrue;
   577 				break; // middle of a surrogate pair. go to end condition
   578 				}
   580 			if ((pUnicode[1] & 0xfc00) != 0xdc00)
   581 				{
   582 				return EErrorIllFormedInput;
   583 				}
   585 			// convert utf-16 surrogate to utf-32
   586 			TUint ch = ((pUnicode[0] - 0xD800) << 10 | (pUnicode[1] - 0xDC00)) + 0x10000;
   588 			// convert utf-32 to utf-8
   589             *pUtf8++ = STATIC_CAST(TUint8,0xf0 | (ch >> 18));
   590             *pUtf8++ = STATIC_CAST(TUint8,0x80 | ((ch >> 12) & 0x3f));
   591             *pUtf8++ = STATIC_CAST(TUint8,0x80 | ((ch >> 6) & 0x3f));
   592             *pUtf8   = STATIC_CAST(TUint8,0x80 | (ch & 0x3f));
   594             // we consumed 2 utf-16 values, move this pointer
   595 			pUnicode++;
   596 			}
   597 		else
   598 			{
   599 			// 3 byte - utf-8, U+800..U+FFFF rest of BMP.
   601 			if (pointerToLastUtf8Byte - pUtf8 < 2)
   602 				{
   603 				pUtf8--;
   604 				pUnicode--;
   605 				break;
   606 				}
   607 			*pUtf8++ = STATIC_CAST(TUint8, 0xe0|(pUnicode[0]>>12));
   608 			*pUtf8++ = STATIC_CAST(TUint8, 0x80|((pUnicode[0]>>6)&0x3f));
   609 			*pUtf8   = STATIC_CAST(TUint8, 0x80|(pUnicode[0]&0x3f));
   610 			}
   612 		if ((pUnicode == pointerToLastUnicodeCharacter) || (pUtf8 == pointerToLastUtf8Byte))
   613 			{
   614 			break;
   615 			}
   617 		pUtf8++;
   618 		pUnicode++;
   620 		}
   622 	if ((pUnicode < aUnicode.Ptr()) && inputIsTruncated)
   623 		{
   624 		return EErrorIllFormedInput;
   625 		}
   627 	aUtf8.SetLength((pUtf8 - aUtf8.Ptr())+1);
   628 	return pointerToLastUnicodeCharacter-pUnicode;
   629 	}
   633 /**  Converts text encoded using the Unicode transformation format UTF-7
   634 into the Unicode UCS-2 character set.
   636 @param aUtf7 The UTF-7 encoded input string.
   637 @return A pointer to an HBufC16 containing the converted Unicode string */
   638 EXPORT_C HBufC16* CnvUtfConverter::ConvertToUnicodeFromUtf7L(const TDesC8& aUtf7)
   639 	{
   640 		// If aUtf8 is an empty string return
   641 	if (aUtf7.Length()==0)
   642 		{
   643 		HBufC16* hBuf = HBufC16::NewL(1);
   644 		return hBuf;
   645 		}
   647 	// else convert aUtf8 to Unicode storing the result in a buffer, reallocating
   648 	// it when needed.
   649 	TInt length = aUtf7.Length();
   650 	const TInt bufsize = 100;
   651 	TInt state = KStateDefault;
   653 	TPtrC8 utf7 (aUtf7);
   654 	TBuf<bufsize> buf;
   655 	HBufC16* hBuf = HBufC16::NewLC(length);
   656 	TPtr unicode = hBuf->Des();
   658 	FOREVER
   659 		{
   660 		TInt unconverted = ConvertToUnicodeFromUtf7(buf, utf7, state);
   661 		if( unconverted == EErrorIllFormedInput || unconverted < 0)
   662 			User::Leave(KErrCorrupt);
   664 		if (unicode.Length() + buf.Length() > unicode.MaxLength())
   665 			{
   666 			// Reallocate hBuf
   667 			hBuf = hBuf->ReAllocL(unicode.Length() + buf.Length());
   668 			CleanupStack::Pop();
   669 			CleanupStack::PushL(hBuf);
   670 			unicode.Set(hBuf->Des());
   671 			}
   672 		unicode.Append(buf);
   673 		if (unconverted ==0)
   674 			break;
   675 		utf7.Set(utf7.Right(unconverted));
   676 		}
   677 	CleanupStack::Pop();
   678 	return hBuf;
   679 	}
   683 /** Converts text encoded using the Unicode transformation format UTF-7 into the
   684 Unicode UCS-2 character set.
   686 If the conversion is achieved using a series of calls to this function, where
   687 each call starts off where the previous call reached in the input descriptor,
   688 the state of the conversion is stored. The initial value of the state variable
   689 should be set as KStateDefault when the conversion is started, and afterwards
   690 simply passed unchanged into each function call.
   692 @param aUnicode On return, contains the Unicode encoded output string.
   693 @param aUtf7 The UTF-7 encoded input string.
   694 @param aState For the first call of the function set to KStateDefault. For
   695 subsequent calls, pass in the variable unchanged.
   696 @return The number of unconverted bytes left at the end of the input descriptor,
   697 or one of the error values defined in TError. */
   698 EXPORT_C TInt CnvUtfConverter::ConvertToUnicodeFromUtf7(TDes16& aUnicode,
   699 														const TDesC8& aUtf7,
   700 														TInt& aState)
   701 	{
   702 	return ConvertToUnicodeFromUtf7(aUnicode, aUtf7, EFalse, aState);
   703 	}
   705 TInt CnvUtfConverter::ConvertToUnicodeFromUtf7(TDes16& aUnicode,
   706 											   const TDesC8& aUtf7,
   707 											   TBool aIsImapUtf7,
   708 											   TInt& aState)
   709 	{
   710 	if (aUtf7.Length()==0)
   711 		{
   712 		aUnicode.SetLength(0);
   713 		return 0;
   714 		}
   715 	if (aUnicode.MaxLength()==0)
   716 		{
   717 		return aUtf7.Length();
   718 		}
   719 	const TUint escapeCharacterForStartingBase64Block=EscapeCharacterForStartingBase64Block(aIsImapUtf7);
   720 	TUint16* pointerToPreviousUnicodeCharacter=CONST_CAST(TUint16*, aUnicode.Ptr()-1);
   721 	const TUint16* pointerToLastUnicodeCharacter=pointerToPreviousUnicodeCharacter+aUnicode.MaxLength();
   722 	const TUint8* pointerToCurrentUtf7Byte=aUtf7.Ptr();
   723 	const TUint8* pointerToLastUtf7Byte=pointerToCurrentUtf7Byte+(aUtf7.Length()-1);
   724 	TUint currentUtf7Byte=*pointerToCurrentUtf7Byte;
   725 	const TUint KIsInBase64Block=0x80000000u;
   726 	TUint bitBuffer=STATIC_CAST(TUint, aState);
   727 	TInt numberOfBitsInBuffer=((bitBuffer&0xf0)>>4);
   728 	bitBuffer&=~0xf0; // turn off the bits that stored numberOfBitsInBuffer
   729 	if (bitBuffer&KIsInBase64Block)
   730 		{
   731 		__ASSERT_ALWAYS((numberOfBitsInBuffer==0) || (numberOfBitsInBuffer==2) || (numberOfBitsInBuffer==4) || ((numberOfBitsInBuffer<16) && (numberOfBitsInBuffer%2==0) && !BitBufferContainsNonZeroBits(bitBuffer, numberOfBitsInBuffer)), Panic(EPanicBadBitBufferState7));
   732 		__ASSERT_ALWAYS((bitBuffer&~(KIsInBase64Block|0x0000000f))==0, Panic(EPanicBadBitBufferState8));
   733 		}
   734 	else
   735 		{
   736 		__ASSERT_ALWAYS(bitBuffer==0, Panic(EPanicBadBitBufferState9));
   737 		__ASSERT_ALWAYS(numberOfBitsInBuffer==0, Panic(EPanicBadBitBufferState10));
   738 		}
   739 	aState=KStateDefault;
   740 	if (bitBuffer&KIsInBase64Block)
   741 		{
   742 		currentUtf7Byte=Base64Decoding(currentUtf7Byte, aIsImapUtf7);
   743 		}
   744 	TBool inputIsTruncated=EFalse;
   745 	FOREVER
   746 		{
   747 		__ASSERT_DEBUG(pointerToPreviousUnicodeCharacter<pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers5));
   748 		__ASSERT_DEBUG(pointerToCurrentUtf7Byte<=pointerToLastUtf7Byte, Panic(EPanicBadUtf7Pointers11));
   749 		__ASSERT_DEBUG((bitBuffer&KIsInBase64Block) || (currentUtf7Byte==*pointerToCurrentUtf7Byte), Panic(EPanicOutOfSyncUtf7Byte1));
   750 		__ASSERT_DEBUG((~bitBuffer&KIsInBase64Block) || (currentUtf7Byte==Base64Decoding(*pointerToCurrentUtf7Byte, aIsImapUtf7)), Panic(EPanicOutOfSyncUtf7Byte2));
   751 		__ASSERT_DEBUG((bitBuffer&KIsInBase64Block) || ((bitBuffer==0) && (numberOfBitsInBuffer==0)), Panic(EPanicBadBitBufferState11));
   752 		if ((~bitBuffer&KIsInBase64Block) && (currentUtf7Byte==escapeCharacterForStartingBase64Block))
   753 			{
   754 			if (pointerToCurrentUtf7Byte==pointerToLastUtf7Byte)
   755 				{
   756 				--pointerToCurrentUtf7Byte;
   757 				inputIsTruncated=ETrue;
   758 				goto end;
   759 				}
   760 			++pointerToCurrentUtf7Byte;
   761 			currentUtf7Byte=*pointerToCurrentUtf7Byte;
   762 			if (currentUtf7Byte=='-')
   763 				{
   764 				currentUtf7Byte=escapeCharacterForStartingBase64Block;
   765 				}
   766 			else
   767 				{
   768 				currentUtf7Byte=Base64Decoding(currentUtf7Byte, aIsImapUtf7);
   769 				if (currentUtf7Byte==KNotInBase64Alphabet)
   770 					{
   771 					return EErrorIllFormedInput;
   772 					}
   773 				bitBuffer=KIsInBase64Block;
   774 				}
   775 			}
   776 		if (bitBuffer&KIsInBase64Block)
   777 			{
   778 			FOREVER
   779 				{
   780 				__ASSERT_DEBUG(currentUtf7Byte==Base64Decoding(*pointerToCurrentUtf7Byte, aIsImapUtf7), Panic(EPanicOutOfSyncBase64Decoding));
   781 				__ASSERT_DEBUG((numberOfBitsInBuffer<16) || (BitBufferContainsNonZeroBits(bitBuffer, numberOfBitsInBuffer-16) && (numberOfBitsInBuffer<16+6)), Panic(EPanicBadBitBufferState12));
   782 				if (currentUtf7Byte==KNotInBase64Alphabet)
   783 					{
   784 					if (BitBufferContainsNonZeroBits(bitBuffer, numberOfBitsInBuffer))
   785 						{
   786 						return EErrorIllFormedInput;
   787 						}
   788 					bitBuffer=0;
   789 					numberOfBitsInBuffer=0;
   790 					currentUtf7Byte=*pointerToCurrentUtf7Byte;
   791 					if (currentUtf7Byte=='-')
   792 						{
   793 						if (pointerToCurrentUtf7Byte==pointerToLastUtf7Byte)
   794 							{
   795 							goto end;
   796 							}
   797 						++pointerToCurrentUtf7Byte;
   798 						currentUtf7Byte=*pointerToCurrentUtf7Byte;
   799 						}
   800 					break;
   801 					}
   802 				bitBuffer<<=6;
   803 				bitBuffer|=currentUtf7Byte;
   804 				bitBuffer|=KIsInBase64Block;
   805 				numberOfBitsInBuffer+=6;
   806 				// only flush the buffer if it contains a whole Unicode character and the remainder is either all zero-bits (hence would be a legal point to end the base-64 sequence) or at least 6 bits long (therefore would leave at least one UTF-7 byte unconverted at the end of the input descriptor)
   807 				if ((numberOfBitsInBuffer>=16+6) || ((numberOfBitsInBuffer>=16) && !BitBufferContainsNonZeroBits(bitBuffer, numberOfBitsInBuffer-16)))
   808 					{
   809 					numberOfBitsInBuffer-=16;
   810 					__ASSERT_DEBUG(pointerToPreviousUnicodeCharacter<pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers6));
   811 					++pointerToPreviousUnicodeCharacter;
   812 					*pointerToPreviousUnicodeCharacter=STATIC_CAST(TUint16, bitBuffer>>numberOfBitsInBuffer);
   813 					bitBuffer&=((1<<numberOfBitsInBuffer)-1); // zero all the consumed bits - must be done as bitBuffer is stored along with numberOfBitsInBuffer in aState if the output descriptor runs out of space or if the input descriptor was truncated
   814 					bitBuffer|=KIsInBase64Block; // turn it back on as the line above turned it off
   815 					if (pointerToPreviousUnicodeCharacter==pointerToLastUnicodeCharacter)
   816 						{
   817 						goto end;
   818 						}
   819 					}
   820 				if (pointerToCurrentUtf7Byte==pointerToLastUtf7Byte)
   821 					{
   822 					inputIsTruncated=ETrue;
   823 					goto end;
   824 					}
   825 				++pointerToCurrentUtf7Byte;
   826 				currentUtf7Byte=Base64Decoding(*pointerToCurrentUtf7Byte, aIsImapUtf7);
   827 				}
   828 			}
   829 		else
   830 			{
   831 			__ASSERT_DEBUG(pointerToPreviousUnicodeCharacter<pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers7));
   832 			++pointerToPreviousUnicodeCharacter;
   833 			*pointerToPreviousUnicodeCharacter=STATIC_CAST(TUint16, currentUtf7Byte);
   834 			if ((pointerToPreviousUnicodeCharacter==pointerToLastUnicodeCharacter) || (pointerToCurrentUtf7Byte==pointerToLastUtf7Byte))
   835 				{
   836 				goto end;
   837 				}
   838 			++pointerToCurrentUtf7Byte;
   839 			currentUtf7Byte=*pointerToCurrentUtf7Byte;
   840 			}
   841 		}
   842 end:
   843 	if (bitBuffer&KIsInBase64Block)
   844 		{
   845 		__ASSERT_DEBUG((numberOfBitsInBuffer<16) || (BitBufferContainsNonZeroBits(bitBuffer, numberOfBitsInBuffer-16) && (numberOfBitsInBuffer<16+6)), Panic(EPanicBadBitBufferState13));
   846 		if (BitBufferContainsNonZeroBits(bitBuffer, numberOfBitsInBuffer))
   847 			{
   848 			// rewind how far we've got in the UTF-7 descriptor to indicate to the user (by returning a value greater than zero) that not all of the input could be converted as it ended with a truncated base-64 sequence
   849 			__ASSERT_DEBUG(numberOfBitsInBuffer>=6, Panic(EPanicBadBitBufferState14));
   850 			pointerToCurrentUtf7Byte-=numberOfBitsInBuffer/6;
   851 			const TInt newNumberOfBitsInBuffer=numberOfBitsInBuffer%6;
   852 			bitBuffer&=~KIsInBase64Block; // temporarily turn off the KIsInBase64Block for the right-shift
   853 			bitBuffer>>=(numberOfBitsInBuffer-newNumberOfBitsInBuffer);
   854 			bitBuffer|=KIsInBase64Block; // must be turned back on again as the bit-buffer is packed into aState
   855 			numberOfBitsInBuffer=newNumberOfBitsInBuffer;
   856 			__ASSERT_DEBUG((numberOfBitsInBuffer==0) || (numberOfBitsInBuffer==2) || (numberOfBitsInBuffer==4), Panic(EPanicBadBitBufferState15));
   857 			}
   858 		__ASSERT_DEBUG((numberOfBitsInBuffer<16) && (numberOfBitsInBuffer%2==0), Panic(EPanicBadBitBufferState16));
   859 		aState=STATIC_CAST(TInt, bitBuffer);
   860 		aState|=(numberOfBitsInBuffer<<4);
   861 		__ASSERT_DEBUG(aState&KIsInBase64Block, Panic(EPanicBadBitBufferState17));
   862 		bitBuffer=0;
   863 		numberOfBitsInBuffer=0;
   864 		}
   865 	if ((pointerToCurrentUtf7Byte<aUtf7.Ptr()) && inputIsTruncated)
   866 		{
   867 		return EErrorIllFormedInput;
   868 		}
   869 	aUnicode.SetLength((pointerToPreviousUnicodeCharacter+1)-aUnicode.Ptr());
   870 	return pointerToLastUtf7Byte-pointerToCurrentUtf7Byte;
   871 	}
   875 /** Converts text encoded using the Unicode transformation format UTF-8
   876 into the Unicode UCS-2 character set. This function leaves with an
   877 error code of the input string is corrupted.
   879 @param aUtf8 The UTF-8 encoded input string
   880 @return A pointer to an HBufC16 with the converted Unicode string. */
   881 EXPORT_C HBufC16* CnvUtfConverter::ConvertToUnicodeFromUtf8L(const TDesC8& aUtf8)
   882  	{
   883 	// If aUtf8 is an empty string return
   884 	if (aUtf8.Length()==0)
   885 		{
   886 		HBufC16* hBuf = HBufC16::NewL(1);
   887 		return hBuf;
   888 		}
   890 	// else convert aUtf8 to Unicode storing the result in a buffer, reallocating
   891 	// it when needed.
   892 	TInt length = aUtf8.Length();
   893 	const TInt bufsize = 100;
   895 	TPtrC8 utf8 (aUtf8);
   896 	TBuf<bufsize> buf;
   897 	HBufC16* hBuf = HBufC16::NewLC(length);
   898 	TPtr unicode = hBuf->Des();
   900 	FOREVER
   901 		{
   902 		TInt unconverted = ConvertToUnicodeFromUtf8(buf, utf8);
   903 		if( unconverted == EErrorIllFormedInput || unconverted < 0)
   904 			User::Leave(KErrCorrupt);
   906 		if (unicode.Length() + buf.Length() > unicode.MaxLength())
   907 			{
   908 			// Reallocate hBuf
   909 			hBuf = hBuf->ReAllocL(unicode.Length() + buf.Length());
   910 			CleanupStack::Pop();
   911 			CleanupStack::PushL(hBuf);
   912 			unicode.Set(hBuf->Des());
   913 			}
   914 		unicode.Append(buf);
   915 		if (unconverted ==0)
   916 			break;
   917 		utf8.Set(utf8.Right(unconverted));
   918 		}
   919 	CleanupStack::Pop();
   920 	return hBuf;
   921 	}
   923 /** Converts text encoded using the Unicode transformation format UTF-8 into the
   924 Unicode UCS-2 character set.
   926 @param aUnicode On return, contains the Unicode encoded output string.
   927 @param aUtf8 The UTF-8 encoded input string
   928 @return The number of unconverted bytes left at the end of the input descriptor,
   929 or one of the error values defined in TError. */
   930 EXPORT_C TInt CnvUtfConverter::ConvertToUnicodeFromUtf8(TDes16& aUnicode, const TDesC8& aUtf8)
   931 	{
   932 	return ConvertToUnicodeFromUtf8(aUnicode, aUtf8, EFalse);
   933 	}
   935 static void UpdateUnconvertibleInfo(TInt& aNumberOfUnconvertibleCharacters,
   936 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, TUint8 aIndex)
   937 	{
   938 	if (aNumberOfUnconvertibleCharacters<=0)
   939 		{
   940 		aIndexOfFirstByteOfFirstUnconvertibleCharacter = aIndex;
   941 		}
   942 	++aNumberOfUnconvertibleCharacters;
   943 	}
   945 /** Converts text encoded using the Unicode transformation format UTF-8 into the
   946 Unicode UCS-2 character set.
   948 @param aUnicode On return, contains the Unicode encoded output string.
   949 @param aUtf8 The UTF-8 encoded input string
   950 @param aGenerateJavaConformantUtf8 EFalse for orthodox UTF-8. ETrue for Java
   951 @return The number of unconverted bytes left at the end of the input descriptor,
   952 or one of the error values defined in TError. */
   953 TInt CnvUtfConverter::ConvertToUnicodeFromUtf8(TDes16& aUnicode, const TDesC8& aUtf8, TBool aGenerateJavaConformantUtf8)
   954 	{
   955 	TInt dummyUnconverted, dummyUnconvertedIndex;
   956 	return ConvertToUnicodeFromUtf8(aUnicode, aUtf8, aGenerateJavaConformantUtf8, dummyUnconverted, dummyUnconvertedIndex);
   957 	}
   959 /** Converts text encoded using the Unicode transformation format UTF-8 into the
   960 Unicode UCS-2 character set. Surrogate pairs can be created when a valid 4 byte UTF-8 is input.
   962 The variant of UTF-8 used internally by Java differs slightly from standard
   963 UTF-8. The TBool argument controls the UTF-8 variant generated by this function.
   965 @param aUnicode On return, contains the Unicode encoded output string.
   966 @param aUtf8 The UTF-8 encoded input string
   967 @param aGenerateJavaConformantUtf8 EFalse for orthodox UTF-8. ETrue for Java
   968 UTF-8. The default is EFalse.
   969 @param aNumberOfUnconvertibleCharacters On return, contains the number of bytes
   970 which were not converted.
   971 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index
   972 of the first byte of the first unconvertible character. For instance if the
   973 first character in the input descriptor (aForeign) could not be converted,
   974 then this parameter is set to the first byte of that character, i.e. zero.
   975 A negative value is returned if all the characters were converted.
   976 @return The number of unconverted bytes left at the end of the input descriptor,
   977 or one of the error values defined in TError. */
   979 /* of note: conformance.  Unicode standard 5.0 section 3.9, table 3-7
   980  * Well formed UTF-8 Byte Sequences, full table.
   981  * +----------------------------------------------------------------+
   982  * | Code Points        | 1st byte | 2nd byte | 3rd byte | 4th byte |
   983  * +--------------------+----------+----------+----------+----------+
   984  * | U+0000..U+007F     | 00..7D   |          |          |          |  1 byte, ascii
   985  * | U+0080..U+07FF     | C2..DF   | 80..BF   |          |          |  2 bytes, error if 1st < 0xC2
   986  * | U+0800..U+0FFF     | E0       | A0..BF   | 80..BF   |          |  3 bytes, 1st == 0xE0, error if 2nd < 0xA0
   987  * | U+1000..U+CFFF     | E1..EC   | 80..BF   | 80..BF   |          |  normal
   988  * | U+D000..U+D7FF     | ED       | 80..9F   | 80..BF   |          |  3 bytes, 1st == 0xED, error if 2nd > 0x9F
   989  * | U+E000..U+FFFF     | EE..EF   | 80..BF   | 80..BF   |          |  normal
   990  * | U+10000..U+3FFFF   | F0       | 90..BF   | 80..BF   | 80..BF   |  4 bytes, 1st == 0xf0, error if 2nd < 0x90
   991  * | U+40000..U+FFFFF   | F1..F3   | 80..BF   | 80..BF   | 80..BF   |  normal
   992  * | U+100000..U+10FFFF | F4       | 80..8F   | 80..BF   | 80..BF   |  4 bytes, 1st == 0xF4, error if 2nd > 0x8F
   993  * +--------------------+----------+----------+----------+----------+
   994  *
   995  * As a consequence of the well-formedness conditions specified in table 3-7,
   996  * the following byte values are disallowed in UTF-8: C0-C1, F5-FF.
   997  */
   998 TInt CnvUtfConverter::ConvertToUnicodeFromUtf8(TDes16& aUnicode, const TDesC8& aUtf8, TBool aGenerateJavaConformantUtf8,
   999 		TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
  1000 	{
  1001 	aUnicode.SetLength(0);
  1003 	if ((aUtf8.Length() == 0) || (aUnicode.MaxLength() == 0))
  1004 		{
  1005 		return aUtf8.Length();
  1006 		}
  1008 	TUint16*           pUnicode = CONST_CAST(TUint16*, aUnicode.Ptr());
  1009 	const TUint16* pLastUnicode = pUnicode + (aUnicode.MaxLength() - 1);
  1010 	const TUint8*         pUtf8 = aUtf8.Ptr();
  1011 	const TUint8*     pLastUtf8 = pUtf8 + (aUtf8.Length() - 1);
  1012 	const TUint16 replacementcharacter = 0xFFFD;
  1013 	TUint currentUnicodeCharacter;
  1014 	TInt sequenceLength;
  1017 	FOREVER
  1018 		{
  1019 		TBool illFormed=EFalse;
  1021 		__ASSERT_DEBUG(pUnicode <= pLastUnicode, Panic(EPanicBadUnicodePointers8));
  1022 		__ASSERT_DEBUG(pUtf8 <= pLastUtf8, Panic(EPanicBadUtf8Pointers3));
  1024 		sequenceLength = 1;
  1026 		// ascii - optimisation (i.e. it isn't a sequence)
  1027 		if (pUtf8[0] < 0x80)
  1028 			{
  1029 			currentUnicodeCharacter = pUtf8[0];
  1030 			}
  1031 		else
  1032 			{
  1033 			// see if well formed utf-8, use table above for reference
  1034 			if ((pUtf8[0] >= 0xc2) && (pUtf8[0] <= 0xdf))
  1035 				{
  1036 				// 0xc1-0xc2 are not valid bytes
  1037 				sequenceLength = 2;
  1038 				}
  1039 			else if ((pUtf8[0] & 0xf0) == 0xe0)
  1040 				{
  1041 				sequenceLength = 3;
  1042 				}
  1043 			else if ((pUtf8[0] >= 0xf0) && (pUtf8[0] < 0xf5))
  1044 				{
  1045 				// 0xf5-0xff, are not valid bytes
  1046 				sequenceLength = 4;
  1047 				}
  1048 			else if ((pUtf8[0] == 0xc0) && aGenerateJavaConformantUtf8)
  1049 				{
  1050 				if ((pUtf8 == pLastUtf8) || (pUtf8[1] == 0x80))
  1051 					{
  1052 					// either we've split the 0xc0 0x80 (i.e. 0xc0 is
  1053 					// the last character in the string) or we've
  1054 					// discovered a valid 0xc0 0x80 sequence.
  1055 					sequenceLength = 2;
  1056 					}
  1057 				}
  1059 			/* checking to see if we got a valid sequence */
  1060 			if (sequenceLength == 1)
  1061 				{
  1062 				// bad value in the leading byte, 0xc0-0xc1,0x5f-0xff for example
  1063 				currentUnicodeCharacter = replacementcharacter;
  1064 				UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters,
  1065 						aIndexOfFirstByteOfFirstUnconvertibleCharacter,	pUtf8-aUtf8.Ptr());
  1066 				}
  1067 			else
  1068 				{
  1069 				// this is a check to see if the sequence goes beyond the input
  1070 				// stream.  if its not the first and only character in the input
  1071 				// stream this isn't an error, otherwise it is.
  1072 				if ((pUtf8 + sequenceLength - 1) >  pLastUtf8)
  1073 					{
  1074 					// check to see if this sequence was the first character
  1075 					if ((pUnicode - aUnicode.Ptr()) == 0)
  1076 						{
  1077 						return EErrorIllFormedInput;
  1078 						}
  1079 					break;
  1080 					}
  1082 				currentUnicodeCharacter = pUtf8[0] & (0x7F>>sequenceLength);
  1084 				/* check the trailing bytes, they should begin with 10 */
  1085 				TUint i = 1;
  1087 				do
  1088 					{
  1089 					if ((pUtf8[i] & 0xc0) == 0x80)
  1090 						{
  1091 						// add the trailing 6 bits to the current unicode char
  1092 						currentUnicodeCharacter = (currentUnicodeCharacter <<6 ) | (pUtf8[i] & 0x3F);
  1093 						}
  1094 					else
  1095 						{
  1096 						// ill formed character (doesn't have a lead 10)
  1097 						currentUnicodeCharacter = replacementcharacter;
  1098 						UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters,
  1099 								aIndexOfFirstByteOfFirstUnconvertibleCharacter,	pUtf8-aUtf8.Ptr());
  1100 						illFormed=ETrue;
  1101 						break;
  1102 						}
  1103 					i++;
  1104 					}
  1105 				while (i < (unsigned)sequenceLength);
  1106 				}
  1108 			/* conformance check.  bits of above table for reference.
  1109 			 * +----------------------------------------------------------------+
  1110 			 * | Code Points        | 1st byte | 2nd byte | 3rd byte | 4th byte |
  1111 			 * +--------------------+----------+----------+----------+----------+
  1112 			 * | U+0800..U+0FFF     | E0       | A0..BF   | 80..BF   |          |  3 bytes, 1st == 0xE0, 2nd < 0xA0
  1113 			 * | U+D000..U+D7FF     | ED       | 80..9F   | 80..BF   |          |  3 bytes, 1st == 0xED, 2nd > 0x9F
  1114 			 * | U+10000..U+3FFFF   | F0       | 90..BF   | 80..BF   | 80..BF   |  4 bytes, 1st == 0xf0, 2nd < 0x90
  1115 			 * | U+100000..U+10FFFF | F4       | 80..8F   | 80..BF   | 80..BF   |  4 bytes, 1st == 0xF4, 2nd > 0x8F
  1116 			 * +--------------------+----------+----------+----------+----------+
  1117 			 */
  1119 			if (currentUnicodeCharacter != replacementcharacter)
  1120 				{
  1121 				if (sequenceLength == 3)
  1122 					{
  1123 					if ((pUtf8[0] == 0xE0) && (pUtf8[1] < 0xA0))
  1124 						{
  1125 						currentUnicodeCharacter = replacementcharacter;
  1126 						UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters,
  1127 								aIndexOfFirstByteOfFirstUnconvertibleCharacter,	pUtf8-aUtf8.Ptr());
  1128 						illFormed=ETrue;
  1129 						}
  1130 					else if ((pUtf8[0] == 0xED) && (pUtf8[1] > 0x9F))
  1131 						{
  1132 						currentUnicodeCharacter = replacementcharacter;
  1133 						UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters,
  1134 								aIndexOfFirstByteOfFirstUnconvertibleCharacter,	pUtf8-aUtf8.Ptr());
  1135 						illFormed=ETrue;
  1136 						}
  1137 					}
  1138 				else if (sequenceLength == 4)
  1139 					{
  1140 					if ((pUtf8[0] == 0xF0) && (pUtf8[1] < 0x90))
  1141 						{
  1142 						currentUnicodeCharacter = replacementcharacter;
  1143 						UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters,
  1144 								aIndexOfFirstByteOfFirstUnconvertibleCharacter,	pUtf8-aUtf8.Ptr());
  1145 						illFormed=ETrue;
  1146 						}
  1147 					else if ((pUtf8[0] == 0xF4) && (pUtf8[1] > 0x8F))
  1148 						{
  1149 						currentUnicodeCharacter = replacementcharacter;
  1150 						UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters,
  1151 								aIndexOfFirstByteOfFirstUnconvertibleCharacter,	pUtf8-aUtf8.Ptr());
  1152 						illFormed=ETrue;
  1153 						}
  1154 					}
  1157 				/* last conformance check - Unicode 5.0 section 3.9 D92 Because surrogate code points
  1158 				 * are not Unicode scalar values, any UTF-8 byte sequence that would map to code
  1159 				 * points D800..DFFF is ill formed */
  1161 				if ((currentUnicodeCharacter >= 0xD800) && (currentUnicodeCharacter <= 0xDFFF))
  1162 					{
  1163 					currentUnicodeCharacter = replacementcharacter;
  1164 					UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters,
  1165 							aIndexOfFirstByteOfFirstUnconvertibleCharacter,	pUtf8-aUtf8.Ptr());
  1166 					illFormed=ETrue;
  1167 					}
  1168 				}
  1169 				// end conformance check
  1170 			}
  1172 		// would this character generate a surrogate pair in UTF-16?
  1173 		if (currentUnicodeCharacter > 0xFFFF)
  1174 			{
  1175 			// is there enough space to hold a surrogate pair in the output?
  1176 			if (pUnicode >= pLastUnicode)
  1177 				{
  1178 				break; // no, end processing.
  1179 				}
  1181 			TUint surrogate = (currentUnicodeCharacter>>10) + 0xD7C0;
  1182 			*pUnicode++ = STATIC_CAST(TUint16, surrogate);
  1184 			surrogate = (currentUnicodeCharacter & 0x3FF) + 0xDC00;
  1185 			*pUnicode++ = STATIC_CAST(TUint16, surrogate);
  1186 			}
  1187 		else
  1188 			{
  1189 			*pUnicode++ = STATIC_CAST(TUint16, currentUnicodeCharacter);
  1190 			}
  1192 		// move the input pointer
  1193 		if (currentUnicodeCharacter != replacementcharacter)
  1194 			{
  1195 			pUtf8 += sequenceLength;
  1196 			}
  1197 		else if(illFormed == EFalse)
  1198 			{
  1199 			pUtf8 += (sequenceLength);
  1200 			}
  1201 		else
  1202 			{
  1203 			// we had a character we didn't recognize (i.e. it was invalid)
  1204 			// so move to the next character in the input
  1205 			pUtf8++;
  1206 			}
  1208 		if ((pUtf8 > pLastUtf8) || (pUnicode > pLastUnicode))
  1209 			{
  1210 			break;  // we've either reached the end of the input or the end of output
  1211 			}
  1212 		}
  1214 	aUnicode.SetLength(pUnicode - aUnicode.Ptr());
  1215 	return (pLastUtf8 - pUtf8 + 1);
  1216 	}
  1218 /** Given a sample text this function attempts to determine whether or not
  1219  *  the same text is encoded using the UTF-8 standard encoding scheme.
  1221 @param TInt a confidence level, given at certain value.  if the given sample
  1222 			is UTF-8 this value will not be changed (unless > 100) then its
  1223 			set to 100.  Otherwise if the same isn't UTF-8, its set to 0.
  1224 @param TDesC8 sample text.
  1225 UTF-8. The default is EFalse.
  1226 @return void
  1227  */
  1229 /* of note: conformance.  Unicode standard 5.0 section 3.9, table 3-7
  1230  * Well formed UTF-8 Byte Sequences, full table.
  1231  * +----------------------------------------------------------------+
  1232  * | Code Points        | 1st byte | 2nd byte | 3rd byte | 4th byte |
  1233  * +--------------------+----------+----------+----------+----------+
  1234  * | U+0000..U+007F     | 00..7D   |          |          |          |  1 byte, ascii
  1235  * | U+0080..U+07FF     | C2..DF   | 80..BF   |          |          |  2 bytes, error if 1st < 0xC2
  1236  * | U+0800..U+0FFF     | E0       | A0..BF   | 80..BF   |          |  3 bytes, 1st == 0xE0, error if 2nd < 0xA0
  1237  * | U+1000..U+CFFF     | E1..EC   | 80..BF   | 80..BF   |          |  normal
  1238  * | U+D000..U+D7FF     | ED       | 80..9F   | 80..BF   |          |  3 bytes, 1st == 0xED, error if 2nd > 0x9F
  1239  * | U+E000..U+FFFF     | EE..EF   | 80..BF   | 80..BF   |          |  normal
  1240  * | U+10000..U+3FFFF   | F0       | 90..BF   | 80..BF   | 80..BF   |  4 bytes, 1st == 0xf0, error if 2nd < 0x90
  1241  * | U+40000..U+FFFFF   | F1..F3   | 80..BF   | 80..BF   | 80..BF   |  normal
  1242  * | U+100000..U+10FFFF | F4       | 80..8F   | 80..BF   | 80..BF   |  4 bytes, 1st == 0xF4, error if 2nd > 0x8F
  1243  * +--------------------+----------+----------+----------+----------+
  1244  *
  1245  * As a consequence of the well-formedness conditions specified in table 3-7,
  1246  * the following byte values are disallowed in UTF-8: C0-C1, F5-FF.
  1247  *
  1248  * Code Rules:
  1249  *   R1: If the string contains any non-UTF-8 characters the returned confidence
  1250  *       is 0.  Valid UTF-8 combinations are listed in the above table.
  1251  *   R2: Otherwise if the string starts with a UTF-8 BOM (byte order mark) in
  1252  *       the (see ) the returned confidence is 95.
  1253  *   R3: Otherwise the confidence returned is based upon the sample string
  1254  *       length.
  1255  *   R4: If the sample string is under 75 characters, the confidence is set to
  1256  *       75.
  1257  */
  1258 GLREF_C void IsCharacterSetUTF8(TInt& aConfidenceLevel, const TDesC8& aSample)
  1259 	{
  1261 	TInt sampleLength = aSample.Length();
  1263 	if (sampleLength == 0)
  1264 		{
  1265 		aConfidenceLevel = 89;
  1266 		return;
  1267 		}
  1268 	TInt bytesRemaining  = 0;
  1269 	TInt sequenceLength  = 0;
  1271 	aConfidenceLevel = sampleLength;
  1273 	const TUint8* buffer = &aSample[0];
  1275 	if (sampleLength < 95)
  1276 		{
  1277 		// check for the BOM
  1278 		if ((sampleLength >= 3) &&
  1279 			((buffer[0] == 0xEF) &&
  1280 			 (buffer[1] == 0xBB) &&
  1281 			 (buffer[2] == 0xBF))
  1282 			)
  1283 			{
  1284 			aConfidenceLevel = 95;
  1285 			}
  1286 		else if (sampleLength < 75)
  1287 			{
  1288 			aConfidenceLevel = 75;
  1289 			}
  1290 		}
  1292 	for (TInt index = 0;index != sampleLength;index++)
  1293 		{
  1295 		if (bytesRemaining > 0)
  1296 			{
  1297 			// bytesRemaining > 0, means that a byte representing the start of a
  1298 			// multibyte sequence was encountered and the bytesRemaining is the
  1299 			// number of bytes to follow.
  1301 			if ((buffer[index] & 0xc0) == 0x80)
  1302 				{
  1303 				// need to check for ill-formed sequences -- all are in the 2nd byte
  1305 				if ((sequenceLength == 3) && (bytesRemaining == 2))
  1306 					{
  1307 					if ((buffer[index - 1] == 0xe0) && (buffer[index] < 0xa0))
  1308 						{
  1309 						aConfidenceLevel = 0;
  1310 						break;
  1311 						}
  1312 					else if ((buffer[index - 1] == 0xed) && (buffer[index] > 0x9f))
  1313 						{
  1314 						aConfidenceLevel = 0;
  1315 						break;
  1316 						}
  1317 					}
  1318 				else if ((sequenceLength == 4) && (bytesRemaining == 3))
  1319 					{
  1320 					if ((buffer[index - 1] == 0xf0) && (buffer[index] < 0x90))
  1321 						{
  1322 						aConfidenceLevel = 0;
  1323 						break;
  1324 						}
  1325 					else if ((buffer[index - 1] == 0xf4) && (buffer[index] > 0x8f))
  1326 						{
  1327 						aConfidenceLevel = 0;
  1328 						break;
  1329 						}
  1330 					}
  1332 				--bytesRemaining;
  1333 				continue;
  1334 				}
  1335 			else
  1336 				{
  1337 				aConfidenceLevel = 0;
  1338 				break;
  1339 				}
  1340 			}
  1342 		if (bytesRemaining == 0)
  1343 			{
  1344 			if (buffer[index] < 0x80)
  1345 				{
  1346 				// The value of aSample[index] is in the range 0x00-0x7f
  1347 				//UTF8 maintains ASCII transparency. So it's a valid
  1348 				//UTF8. Do nothing, check next value.
  1349 				continue;
  1350 				}
  1351 			else if ((buffer[index] >= 0xc2) && (buffer[index] < 0xe0))
  1352 				{
  1353 				// valid start of a 2 byte sequence (see conformance note)
  1354 				sequenceLength = 2;
  1355 				bytesRemaining = 1;
  1356 				}
  1357 			else if ((buffer[index] & 0xf0) == 0xe0)
  1358 				{
  1359 				// valid start of a 3 byte sequence
  1360 				sequenceLength = 3;
  1361 				bytesRemaining = 2;
  1362 				}
  1363 			else if ((buffer[index] >= 0xf0) && (buffer[index] < 0xf5))
  1364 				{
  1365 				// valid start of a 4 byte sequence (see conformance note)
  1366 				sequenceLength = 4;
  1367 				bytesRemaining = 3;
  1368 				}
  1369 			else
  1370 				{
  1371 				// wasn't anything expected so must be an illegal/irregular UTF8 coded value
  1372 				aConfidenceLevel = 0;
  1373 				break;
  1374 				}
  1375 			}
  1376 		} // for
  1378 	aConfidenceLevel = (aConfidenceLevel > 0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;
  1379 	}
  1381 GLREF_C void IsCharacterSetUTF7(TInt& aConfidenceLevel, const TDesC8& aSample)
  1382 	{
  1383 	TInt sampleLength = aSample.Length();
  1384 	aConfidenceLevel = 70;
  1385 	for (TInt i=0; i<sampleLength; ++i)
  1386 		{
  1387 		// UTF-7 value ranges only 7 bits
  1388 		if((aSample[i]&0x80)!=0x00)
  1389 			{
  1390 			aConfidenceLevel= 0;
  1391 			break;
  1392 			}
  1394 		// there is no "~" in UTF-7 encoding. So if find either, it's not UTF-7
  1395 		else if (char(aSample[i])=='~')
  1396 			{
  1397 			aConfidenceLevel = 0;
  1398 			break;
  1399 			}
  1401 		// The SMS7Bit escape char value is 0x1b. Reduce confidence if it follows the following format
  1402 		else if ( (aSample[i]==0x1b) && (i <sampleLength-1) )
  1403 			{
  1404 			static const TInt smsExtensionTable[11] =
  1405 				{0x0a, 0x14, 0x1b, 0x28, 0x29, 0x2f, 0x3c, 0x3d, 0x3e, 0x40, 0x65};
  1406 			TInt increment1 = i+1;
  1407 			if (increment1>= sampleLength)
  1408 				break;
  1409 			for (TInt j=0; j < 11; ++j)
  1410 				{
  1411 				if (aSample[increment1] == smsExtensionTable[j])
  1412 					{
  1413 					aConfidenceLevel-=10;
  1414 					}
  1415 				}
  1416 			}
  1417 		// The UTF-7 escape char is 0x2b. The values that follow the escape sequence
  1418 		// the values following the escape char value must belong to the modified base64
  1419 		// or '-' else it is an ill-formed sequence, so probably not UTF-7
  1420 		else if ( (aSample[i]==0x2b)  && (i <sampleLength-1) )
  1421 			{
  1422 			TInt increment1 = i+1;
  1423 			if ((aSample[increment1] == 0x2b) || (aSample[increment1] == 0x2d) || (aSample[increment1] == 0x2f) ||
  1424 				((aSample[increment1] >= 0x41) && (aSample[increment1] <= 0x5a)) ||
  1425 				((aSample[increment1] >= 0x61) && (aSample[increment1] <= 0x7a)))
  1426 				{
  1427 				aConfidenceLevel+=5;
  1428 				}
  1429 			else
  1430 				{
  1431 				aConfidenceLevel-=15;
  1432 				}
  1433 			i++; // should this be here or up in the if loop ??
  1434 			}
  1435 		} //for
  1436 	aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;
  1437 	}