securityanddataprivacytools/securitytools/certapp/store--/utf.cpp
author Santosh V Patil <santosh.v.patil@nokia.com>
Fri, 13 Nov 2009 10:07:38 +0530
changeset 22 1afc808f187d
parent 8 35751d3474b7
permissions -rw-r--r--
Merged in the 2009wk45 code drop
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
8
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     1
/*
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     2
* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     3
* All rights reserved.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     4
* This component and the accompanying materials are made available
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     5
* under the terms of the License "Eclipse Public License v1.0"
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     6
* which accompanies this distribution, and is available
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     8
*
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     9
* Initial Contributors:
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    10
* Nokia Corporation - initial contribution.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    11
*
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    12
* Contributors:
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    13
*
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    14
* Description: 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    15
*
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    16
*/
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    17
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    18
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    19
#include <e32std.h>
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    20
#include <e32base.h>
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    21
#include <utf.h>
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    22
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    23
#define STATIC_CAST(t,v) static_cast<t>(v)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    24
#define CONST_CAST(t,v) const_cast<t>(v)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    25
#define FOREVER for(;;)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    26
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    27
const TUint KNotInBase64Alphabet=KMaxTUint;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    28
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    29
enum TPanic
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    30
	{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    31
	EPanicBad6BitNumber=1,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    32
	EPanicBadUtf7Pointers1,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    33
	EPanicBadUtf7Pointers2,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    34
	EPanicBadUtf7Pointers3,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    35
	EPanicBadUtf7Pointers4,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    36
	EPanicBadUtf7Pointers5,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    37
	EPanicBadUtf7Pointers6,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    38
	EPanicBadUtf7Pointers7,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    39
	EPanicBadUtf7Pointers8,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    40
	EPanicBadUtf7Pointers9,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    41
	EPanicBadUtf7Pointers10,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    42
	EPanicBadUtf7Pointers11,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    43
	EPanicNotInBase64Block,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    44
	EPanicBadUnicodePointers1,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    45
	EPanicBadUnicodePointers2,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    46
	EPanicBadUnicodePointers3,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    47
	EPanicBadUnicodePointers4,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    48
	EPanicBadUnicodePointers5,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    49
	EPanicBadUnicodePointers6,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    50
	EPanicBadUnicodePointers7,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    51
	EPanicBadUnicodePointers8,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    52
	EPanicBadUnicodePointers9,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    53
	EPanicBadUnicodePointers10,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    54
	EPanicBadBitBufferState1,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    55
	EPanicBadBitBufferState2,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    56
	EPanicBadBitBufferState3,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    57
	EPanicBadBitBufferState4,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    58
	EPanicBadBitBufferState5,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    59
	EPanicBadBitBufferState6,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    60
	EPanicBadBitBufferState7,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    61
	EPanicBadBitBufferState8,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    62
	EPanicBadBitBufferState9,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    63
	EPanicBadBitBufferState10,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    64
	EPanicBadBitBufferState11,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    65
	EPanicBadBitBufferState12,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    66
	EPanicBadBitBufferState13,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    67
	EPanicBadBitBufferState14,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    68
	EPanicBadBitBufferState15,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    69
	EPanicBadBitBufferState16,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    70
	EPanicBadBitBufferState17,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    71
	EPanicUnexpectedNumberOfLoopIterations,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    72
	EPanicInitialEscapeCharacterButNoBase64,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    73
	EPanicBase64SequenceDoesNotFallOnUnicodeCharacterBoundary,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    74
	EPanicBadUtf8Pointers1,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    75
	EPanicBadUtf8Pointers2,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    76
	EPanicBadUtf8Pointers3,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    77
	EPanicBadUtf8Pointers4,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    78
	EPanicBadUtf8Pointers5,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    79
	EPanicBadUtf8Pointers6,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    80
	EPanicBadUtf8Pointers7,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    81
	EPanicOutOfSyncUtf7Byte1,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    82
	EPanicOutOfSyncUtf7Byte2,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    83
	EPanicOutOfSyncBase64Decoding
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    84
	};
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    85
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    86
_LIT(KLitPanicText, "CHARCONV-UTF");
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    87
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    88
LOCAL_C void Panic(TPanic aPanic)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    89
	{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    90
	User::Panic(KLitPanicText, aPanic);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    91
	}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    92
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    93
inline TUint EscapeCharacterForStartingBase64Block(TBool aIsImapUtf7) {return aIsImapUtf7? '&': '+';}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    94
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    95
inline TBool BitBufferContainsNonZeroBits(TUint aBitBuffer, TInt aNumberOfBitsInBuffer)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    96
	{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    97
	return (aBitBuffer&((1<<aNumberOfBitsInBuffer)-1))!=0;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    98
	}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    99
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   100
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   101
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   102
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   103
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   104
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   105
 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   106
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   107
/** Converts Unicode text into UTF-8 encoding.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   108
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   109
@param aUtf8 On return, contains the UTF-8 encoded output string.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   110
@param aUnicode The Unicode-encoded input string.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   111
@return The number of unconverted characters left at the end of the input 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   112
descriptor, or one of the error values defined in TError. */
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   113
EXPORT_C TInt CnvUtfConverter::ConvertFromUnicodeToUtf8(TDes8& aUtf8, const TDesC16& aUnicode)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   114
	{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   115
	return ConvertFromUnicodeToUtf8(aUtf8, aUnicode, EFalse);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   116
	}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   117
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   118
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   119
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   120
/** Converts Unicode text into UTF-8 encoding. 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   121
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   122
Surrogate pairs can be input which will result in a valid 4 byte UTF-8 value.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   123
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   124
The variant of UTF-8 used internally by Java differs slightly from standard 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   125
UTF-8. The TBool argument controls the UTF-8 variant generated by this function.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   126
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   127
@param aUtf8 On return, contains the UTF-8 encoded output string.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   128
@param aUnicode A UCS-2 encoded input string.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   129
@param aGenerateJavaConformantUtf8 EFalse for orthodox UTF-8. ETrue for Java 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   130
UTF-8. The default is EFalse.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   131
@return The number of unconverted characters left at the end of the input descriptor, 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   132
or one of the error values defined in TError. */
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   133
TInt CnvUtfConverter::ConvertFromUnicodeToUtf8(TDes8& aUtf8, 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   134
											   const TDesC16& aUnicode, 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   135
											   TBool aGenerateJavaConformantUtf8)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   136
	{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   137
	if (aUnicode.Length() == 0)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   138
		{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   139
		aUtf8.SetLength(0);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   140
		return 0;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   141
		}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   142
	if (aUtf8.MaxLength() == 0)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   143
		{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   144
		return aUnicode.Length();
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   145
		}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   146
	
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   147
	TUint8* pUtf8 = CONST_CAST(TUint8*, aUtf8.Ptr());
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   148
	const TUint8* pointerToLastUtf8Byte = pUtf8 + (aUtf8.MaxLength() - 1);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   149
	TBool inputIsTruncated = EFalse;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   150
	const TUint16* pUnicode = aUnicode.Ptr();
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   151
	const TUint16* pointerToLastUnicodeCharacter = pUnicode + (aUnicode.Length() - 1);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   152
	
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   153
	FOREVER
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   154
		{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   155
		__ASSERT_DEBUG(pUtf8 <= pointerToLastUtf8Byte, Panic(EPanicBadUtf8Pointers1));
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   156
		__ASSERT_DEBUG(pUnicode <= pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers3));
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   157
	
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   158
		if (pUnicode[0] < 0x80)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   159
			{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   160
			// ascii - 1 byte
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   161
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   162
			// internally java is different since the \x0000 character is 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   163
			// translated into \xC0 \x80.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   164
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   165
			if ((aGenerateJavaConformantUtf8) && (pUnicode[0] == 0x0000))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   166
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   167
				if (pUtf8 == pointerToLastUtf8Byte)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   168
					{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   169
					pUtf8--;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   170
					pUnicode--;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   171
					break;			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   172
					}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   173
				*pUtf8++ = STATIC_CAST(TUint8, 0xc0);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   174
				*pUtf8   = STATIC_CAST(TUint8, 0x80);	
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   175
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   176
			else
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   177
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   178
				*pUtf8 = STATIC_CAST(TUint8, pUnicode[0]);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   179
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   180
			}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   181
		else if (pUnicode[0] < 0x800)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   182
			{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   183
			// U+0080..U+07FF - 2 bytes
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   184
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   185
			if (pUtf8 == pointerToLastUtf8Byte)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   186
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   187
				pUtf8--;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   188
				pUnicode--;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   189
				break;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   190
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   191
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   192
			*pUtf8++ = STATIC_CAST(TUint8, 0xc0|(pUnicode[0]>>6));
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   193
			*pUtf8   = STATIC_CAST(TUint8, 0x80|(pUnicode[0]&0x3f));
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   194
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   195
			}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   196
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   197
		// check to see if we have a surrogate in the stream, surrogates encode code points outside
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   198
		// the BMP and are 4 utf-8 chars, otherwise what we have here is 3 utf-8 chars.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   199
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   200
		else if (((pUnicode[0] & 0xfc00) == 0xd800) && !aGenerateJavaConformantUtf8)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   201
			{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   202
			// surrogate pair - 4 bytes in utf-8
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   203
			// U+10000..U+10FFFF
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   204
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   205
			__ASSERT_DEBUG(pUtf8 <= pointerToLastUtf8Byte, Panic(EPanicBadUtf8Pointers2));
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   206
			// is there enough space to hold the character
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   207
			if ((pointerToLastUtf8Byte - pUtf8) < 3)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   208
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   209
				pUtf8--;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   210
				pUnicode--;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   211
				break;  // no go to the exit condition
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   212
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   213
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   214
			__ASSERT_DEBUG(pUnicode <= pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers4));
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   215
			if (pUnicode >= pointerToLastUnicodeCharacter)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   216
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   217
				pUtf8--;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   218
				pUnicode--;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   219
				inputIsTruncated = ETrue;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   220
				break; // middle of a surrogate pair. go to end condition
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   221
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   222
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   223
			if ((pUnicode[1] & 0xfc00) != 0xdc00)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   224
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   225
				return EErrorIllFormedInput;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   226
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   227
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   228
			// convert utf-16 surrogate to utf-32
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   229
			TUint ch = ((pUnicode[0] - 0xD800) << 10 | (pUnicode[1] - 0xDC00)) + 0x10000;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   230
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   231
			// convert utf-32 to utf-8
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   232
            *pUtf8++ = STATIC_CAST(TUint8,0xf0 | (ch >> 18));   
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   233
            *pUtf8++ = STATIC_CAST(TUint8,0x80 | ((ch >> 12) & 0x3f));
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   234
            *pUtf8++ = STATIC_CAST(TUint8,0x80 | ((ch >> 6) & 0x3f));
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   235
            *pUtf8   = STATIC_CAST(TUint8,0x80 | (ch & 0x3f));
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   236
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   237
            // we consumed 2 utf-16 values, move this pointer
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   238
			pUnicode++;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   239
			}		
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   240
		else
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   241
			{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   242
			// 3 byte - utf-8, U+800..U+FFFF rest of BMP.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   243
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   244
			if (pointerToLastUtf8Byte - pUtf8 < 2)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   245
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   246
				pUtf8--;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   247
				pUnicode--;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   248
				break;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   249
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   250
			*pUtf8++ = STATIC_CAST(TUint8, 0xe0|(pUnicode[0]>>12));
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   251
			*pUtf8++ = STATIC_CAST(TUint8, 0x80|((pUnicode[0]>>6)&0x3f));
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   252
			*pUtf8   = STATIC_CAST(TUint8, 0x80|(pUnicode[0]&0x3f));
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   253
			}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   254
		
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   255
		if ((pUnicode == pointerToLastUnicodeCharacter) || (pUtf8 == pointerToLastUtf8Byte))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   256
			{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   257
			break;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   258
			}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   259
		
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   260
		pUtf8++;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   261
		pUnicode++;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   262
		
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   263
		}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   264
	
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   265
	if ((pUnicode < aUnicode.Ptr()) && inputIsTruncated)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   266
		{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   267
		return EErrorIllFormedInput;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   268
		}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   269
	
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   270
	aUtf8.SetLength((pUtf8 - aUtf8.Ptr())+1);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   271
	return pointerToLastUnicodeCharacter-pUnicode;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   272
	}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   273
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   274
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   275
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   276
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   277
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   278
 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   279
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   280
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   281
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   282
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   283
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   284
/** Converts text encoded using the Unicode transformation format UTF-8 into the 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   285
Unicode UCS-2 character set.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   286
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   287
@param aUnicode On return, contains the Unicode encoded output string.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   288
@param aUtf8 The UTF-8 encoded input string
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   289
@return The number of unconverted bytes left at the end of the input descriptor, 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   290
or one of the error values defined in TError. */
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   291
EXPORT_C TInt CnvUtfConverter::ConvertToUnicodeFromUtf8(TDes16& aUnicode, const TDesC8& aUtf8)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   292
	{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   293
	return ConvertToUnicodeFromUtf8(aUnicode, aUtf8, EFalse);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   294
	}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   295
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   296
static void UpdateUnconvertibleInfo(TInt& aNumberOfUnconvertibleCharacters,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   297
		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, TUint8 aIndex)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   298
	{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   299
	if (aNumberOfUnconvertibleCharacters<=0)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   300
		{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   301
		aIndexOfFirstByteOfFirstUnconvertibleCharacter = aIndex;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   302
		}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   303
	++aNumberOfUnconvertibleCharacters;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   304
	}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   305
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   306
/** Converts text encoded using the Unicode transformation format UTF-8 into the 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   307
Unicode UCS-2 character set.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   308
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   309
@param aUnicode On return, contains the Unicode encoded output string.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   310
@param aUtf8 The UTF-8 encoded input string
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   311
@param aGenerateJavaConformantUtf8 EFalse for orthodox UTF-8. ETrue for Java 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   312
@return The number of unconverted bytes left at the end of the input descriptor, 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   313
or one of the error values defined in TError. */
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   314
TInt CnvUtfConverter::ConvertToUnicodeFromUtf8(TDes16& aUnicode, const TDesC8& aUtf8, TBool aGenerateJavaConformantUtf8)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   315
	{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   316
	TInt dummyUnconverted, dummyUnconvertedIndex;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   317
	return ConvertToUnicodeFromUtf8(aUnicode, aUtf8, aGenerateJavaConformantUtf8, dummyUnconverted, dummyUnconvertedIndex);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   318
	}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   319
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   320
/** Converts text encoded using the Unicode transformation format UTF-8 into the 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   321
Unicode UCS-2 character set. Surrogate pairs can be created when a valid 4 byte UTF-8 is input.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   322
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   323
The variant of UTF-8 used internally by Java differs slightly from standard 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   324
UTF-8. The TBool argument controls the UTF-8 variant generated by this function.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   325
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   326
@param aUnicode On return, contains the Unicode encoded output string.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   327
@param aUtf8 The UTF-8 encoded input string
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   328
@param aGenerateJavaConformantUtf8 EFalse for orthodox UTF-8. ETrue for Java 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   329
UTF-8. The default is EFalse.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   330
@param aNumberOfUnconvertibleCharacters On return, contains the number of bytes 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   331
which were not converted.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   332
@param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   333
of the first byte of the first unconvertible character. For instance if the 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   334
first character in the input descriptor (aForeign) could not be converted, 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   335
then this parameter is set to the first byte of that character, i.e. zero. 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   336
A negative value is returned if all the characters were converted.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   337
@return The number of unconverted bytes left at the end of the input descriptor, 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   338
or one of the error values defined in TError. */
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   339
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   340
/* of note: conformance.  Unicode standard 5.0 section 3.9, table 3-7
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   341
 * Well formed UTF-8 Byte Sequences, full table.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   342
 * +----------------------------------------------------------------+
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   343
 * | Code Points        | 1st byte | 2nd byte | 3rd byte | 4th byte |
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   344
 * +--------------------+----------+----------+----------+----------+
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   345
 * | U+0000..U+007F     | 00..7D   |          |          |          |  1 byte, ascii
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   346
 * | U+0080..U+07FF     | C2..DF   | 80..BF   |          |          |  2 bytes, error if 1st < 0xC2 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   347
 * | U+0800..U+0FFF     | E0       | A0..BF   | 80..BF   |          |  3 bytes, 1st == 0xE0, error if 2nd < 0xA0
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   348
 * | U+1000..U+CFFF     | E1..EC   | 80..BF   | 80..BF   |          |  normal
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   349
 * | U+D000..U+D7FF     | ED       | 80..9F   | 80..BF   |          |  3 bytes, 1st == 0xED, error if 2nd > 0x9F
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   350
 * | U+E000..U+FFFF     | EE..EF   | 80..BF   | 80..BF   |          |  normal
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   351
 * | U+10000..U+3FFFF   | F0       | 90..BF   | 80..BF   | 80..BF   |  4 bytes, 1st == 0xf0, error if 2nd < 0x90
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   352
 * | U+40000..U+FFFFF   | F1..F3   | 80..BF   | 80..BF   | 80..BF   |  normal
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   353
 * | U+100000..U+10FFFF | F4       | 80..8F   | 80..BF   | 80..BF   |  4 bytes, 1st == 0xF4, error if 2nd > 0x8F
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   354
 * +--------------------+----------+----------+----------+----------+
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   355
 * 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   356
 * As a consequence of the well-formedness conditions specified in table 3-7,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   357
 * the following byte values are disallowed in UTF-8: C0-C1, F5-FF.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   358
 */
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   359
TInt CnvUtfConverter::ConvertToUnicodeFromUtf8(TDes16& aUnicode, const TDesC8& aUtf8, TBool aGenerateJavaConformantUtf8,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   360
		TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   361
	{	
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   362
	aUnicode.SetLength(0);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   363
	
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   364
	if ((aUtf8.Length() == 0) || (aUnicode.MaxLength() == 0))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   365
		{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   366
		return aUtf8.Length();
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   367
		}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   368
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   369
	TUint16*           pUnicode = CONST_CAST(TUint16*, aUnicode.Ptr());
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   370
	const TUint16* pLastUnicode = pUnicode + (aUnicode.MaxLength() - 1);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   371
	const TUint8*         pUtf8 = aUtf8.Ptr();   
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   372
	const TUint8*     pLastUtf8 = pUtf8 + (aUtf8.Length() - 1);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   373
	const TUint16 replacementcharacter = 0xFFFD;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   374
	TUint currentUnicodeCharacter;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   375
	TUint sequenceLength;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   376
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   377
	
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   378
	FOREVER
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   379
		{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   380
		TBool illFormed=EFalse;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   381
		
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   382
		__ASSERT_DEBUG(pUnicode <= pLastUnicode, Panic(EPanicBadUnicodePointers8));
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   383
		__ASSERT_DEBUG(pUtf8 <= pLastUtf8, Panic(EPanicBadUtf8Pointers3));
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   384
		
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   385
		sequenceLength = 1;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   386
		
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   387
		// ascii - optimisation (i.e. it isn't a sequence)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   388
		if (pUtf8[0] < 0x80)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   389
			{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   390
			currentUnicodeCharacter = pUtf8[0];
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   391
			}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   392
		else
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   393
			{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   394
			// see if well formed utf-8, use table above for reference	
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   395
			if ((pUtf8[0] >= 0xc2) && (pUtf8[0] <= 0xdf))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   396
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   397
				// 0xc1-0xc2 are not valid bytes
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   398
				sequenceLength = 2;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   399
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   400
			else if ((pUtf8[0] & 0xf0) == 0xe0)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   401
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   402
				sequenceLength = 3;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   403
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   404
			else if ((pUtf8[0] >= 0xf0) && (pUtf8[0] < 0xf5))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   405
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   406
				// 0xf5-0xff, are not valid bytes
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   407
				sequenceLength = 4;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   408
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   409
			else if ((pUtf8[0] == 0xc0) && aGenerateJavaConformantUtf8)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   410
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   411
				if ((pUtf8 == pLastUtf8) || (pUtf8[1] == 0x80))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   412
					{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   413
					// either we've split the 0xc0 0x80 (i.e. 0xc0 is
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   414
					// the last character in the string) or we've
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   415
					// discovered a valid 0xc0 0x80 sequence.  
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   416
					sequenceLength = 2;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   417
					}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   418
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   419
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   420
			/* checking to see if we got a valid sequence */
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   421
			if (sequenceLength == 1)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   422
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   423
				// bad value in the leading byte, 0xc0-0xc1,0x5f-0xff for example
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   424
				currentUnicodeCharacter = replacementcharacter;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   425
				UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   426
						aIndexOfFirstByteOfFirstUnconvertibleCharacter,	pUtf8-aUtf8.Ptr());
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   427
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   428
			else
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   429
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   430
				// this is a check to see if the sequence goes beyond the input 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   431
				// stream.  if its not the first and only character in the input
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   432
				// stream this isn't an error, otherwise it is.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   433
				if ((pUtf8 + sequenceLength - 1) >  pLastUtf8)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   434
					{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   435
					// check to see if this sequence was the first character
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   436
					if ((pUnicode - aUnicode.Ptr()) == 0)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   437
						{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   438
						return EErrorIllFormedInput;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   439
						}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   440
					break;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   441
					}			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   442
				
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   443
				currentUnicodeCharacter = pUtf8[0] & (0x7F>>sequenceLength);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   444
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   445
				/* check the trailing bytes, they should begin with 10 */
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   446
				TUint i = 1;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   447
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   448
				do
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   449
					{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   450
					if ((pUtf8[i] & 0xc0) == 0x80)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   451
						{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   452
						// add the trailing 6 bits to the current unicode char
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   453
						currentUnicodeCharacter = (currentUnicodeCharacter <<6 ) | (pUtf8[i] & 0x3F);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   454
						}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   455
					else
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   456
						{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   457
						// ill formed character (doesn't have a lead 10)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   458
						currentUnicodeCharacter = replacementcharacter;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   459
						UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   460
								aIndexOfFirstByteOfFirstUnconvertibleCharacter,	pUtf8-aUtf8.Ptr());
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   461
						illFormed=ETrue;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   462
						break; 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   463
						}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   464
					i++;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   465
					}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   466
				while (i < sequenceLength);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   467
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   468
				
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   469
			/* conformance check.  bits of above table for reference.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   470
			 * +----------------------------------------------------------------+
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   471
			 * | Code Points        | 1st byte | 2nd byte | 3rd byte | 4th byte |
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   472
			 * +--------------------+----------+----------+----------+----------+
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   473
			 * | U+0800..U+0FFF     | E0       | A0..BF   | 80..BF   |          |  3 bytes, 1st == 0xE0, 2nd < 0xA0
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   474
			 * | U+D000..U+D7FF     | ED       | 80..9F   | 80..BF   |          |  3 bytes, 1st == 0xED, 2nd > 0x9F
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   475
			 * | U+10000..U+3FFFF   | F0       | 90..BF   | 80..BF   | 80..BF   |  4 bytes, 1st == 0xf0, 2nd < 0x90
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   476
			 * | U+100000..U+10FFFF | F4       | 80..8F   | 80..BF   | 80..BF   |  4 bytes, 1st == 0xF4, 2nd > 0x8F
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   477
			 * +--------------------+----------+----------+----------+----------+
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   478
			 */
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   479
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   480
			if (currentUnicodeCharacter != replacementcharacter)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   481
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   482
				if (sequenceLength == 3)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   483
					{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   484
					if ((pUtf8[0] == 0xE0) && (pUtf8[1] < 0xA0))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   485
						{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   486
						currentUnicodeCharacter = replacementcharacter;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   487
						UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   488
								aIndexOfFirstByteOfFirstUnconvertibleCharacter,	pUtf8-aUtf8.Ptr());
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   489
						illFormed=ETrue;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   490
						}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   491
					else if ((pUtf8[0] == 0xED) && (pUtf8[1] > 0x9F))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   492
						{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   493
						currentUnicodeCharacter = replacementcharacter;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   494
						UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   495
								aIndexOfFirstByteOfFirstUnconvertibleCharacter,	pUtf8-aUtf8.Ptr());
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   496
						illFormed=ETrue;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   497
						}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   498
					}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   499
				else if (sequenceLength == 4)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   500
					{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   501
					if ((pUtf8[0] == 0xF0) && (pUtf8[1] < 0x90))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   502
						{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   503
						currentUnicodeCharacter = replacementcharacter;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   504
						UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   505
								aIndexOfFirstByteOfFirstUnconvertibleCharacter,	pUtf8-aUtf8.Ptr());
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   506
						illFormed=ETrue;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   507
						}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   508
					else if ((pUtf8[0] == 0xF4) && (pUtf8[1] > 0x8F))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   509
						{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   510
						currentUnicodeCharacter = replacementcharacter;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   511
						UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   512
								aIndexOfFirstByteOfFirstUnconvertibleCharacter,	pUtf8-aUtf8.Ptr());
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   513
						illFormed=ETrue;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   514
						}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   515
					}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   516
				
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   517
				
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   518
				/* last conformance check - Unicode 5.0 section 3.9 D92 Because surrogate code points
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   519
				 * are not Unicode scalar values, any UTF-8 byte sequence that would map to code 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   520
				 * points D800..DFFF is ill formed */
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   521
				
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   522
				if ((currentUnicodeCharacter >= 0xD800) && (currentUnicodeCharacter <= 0xDFFF))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   523
					{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   524
					currentUnicodeCharacter = replacementcharacter;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   525
					UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   526
							aIndexOfFirstByteOfFirstUnconvertibleCharacter,	pUtf8-aUtf8.Ptr());
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   527
					illFormed=ETrue;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   528
					}	
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   529
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   530
				// end conformance check
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   531
			}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   532
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   533
		// would this character generate a surrogate pair in UTF-16?
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   534
		if (currentUnicodeCharacter > 0xFFFF)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   535
			{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   536
			// is there enough space to hold a surrogate pair in the output?
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   537
			if (pUnicode >= pLastUnicode)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   538
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   539
				break; // no, end processing.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   540
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   541
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   542
			TUint surrogate = (currentUnicodeCharacter>>10) + 0xD7C0;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   543
			*pUnicode++ = STATIC_CAST(TUint16, surrogate);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   544
					
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   545
			surrogate = (currentUnicodeCharacter & 0x3FF) + 0xDC00;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   546
			*pUnicode++ = STATIC_CAST(TUint16, surrogate);			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   547
			}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   548
		else
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   549
			{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   550
			*pUnicode++ = STATIC_CAST(TUint16, currentUnicodeCharacter);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   551
			}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   552
		
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   553
		// move the input pointer
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   554
		if (currentUnicodeCharacter != replacementcharacter)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   555
			{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   556
			pUtf8 += sequenceLength;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   557
			}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   558
		else if(illFormed == EFalse)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   559
			{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   560
			pUtf8 += (sequenceLength);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   561
			}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   562
		else
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   563
			{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   564
			// we had a character we didn't recognize (i.e. it was invalid)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   565
			// so move to the next character in the input
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   566
			pUtf8++;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   567
			}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   568
		
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   569
		if ((pUtf8 > pLastUtf8) || (pUnicode > pLastUnicode))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   570
			{ 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   571
			break;  // we've either reached the end of the input or the end of output
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   572
			}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   573
		}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   574
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   575
	aUnicode.SetLength(pUnicode - aUnicode.Ptr());
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   576
	return (pLastUtf8 - pUtf8 + 1);
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   577
	}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   578
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   579
/** Given a sample text this function attempts to determine whether or not
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   580
 *  the same text is encoded using the UTF-8 standard encoding scheme.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   581
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   582
@param TInt a confidence level, given at certain value.  if the given sample
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   583
			is UTF-8 this value will not be changed (unless > 100) then its
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   584
			set to 100.  Otherwise if the same isn't UTF-8, its set to 0.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   585
@param TDesC8 sample text.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   586
UTF-8. The default is EFalse.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   587
@return void
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   588
 */
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   589
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   590
/* of note: conformance.  Unicode standard 5.0 section 3.9, table 3-7
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   591
 * Well formed UTF-8 Byte Sequences, full table.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   592
 * +----------------------------------------------------------------+
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   593
 * | Code Points        | 1st byte | 2nd byte | 3rd byte | 4th byte |
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   594
 * +--------------------+----------+----------+----------+----------+
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   595
 * | U+0000..U+007F     | 00..7D   |          |          |          |  1 byte, ascii
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   596
 * | U+0080..U+07FF     | C2..DF   | 80..BF   |          |          |  2 bytes, error if 1st < 0xC2 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   597
 * | U+0800..U+0FFF     | E0       | A0..BF   | 80..BF   |          |  3 bytes, 1st == 0xE0, error if 2nd < 0xA0
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   598
 * | U+1000..U+CFFF     | E1..EC   | 80..BF   | 80..BF   |          |  normal
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   599
 * | U+D000..U+D7FF     | ED       | 80..9F   | 80..BF   |          |  3 bytes, 1st == 0xED, error if 2nd > 0x9F
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   600
 * | U+E000..U+FFFF     | EE..EF   | 80..BF   | 80..BF   |          |  normal
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   601
 * | U+10000..U+3FFFF   | F0       | 90..BF   | 80..BF   | 80..BF   |  4 bytes, 1st == 0xf0, error if 2nd < 0x90
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   602
 * | U+40000..U+FFFFF   | F1..F3   | 80..BF   | 80..BF   | 80..BF   |  normal
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   603
 * | U+100000..U+10FFFF | F4       | 80..8F   | 80..BF   | 80..BF   |  4 bytes, 1st == 0xF4, error if 2nd > 0x8F
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   604
 * +--------------------+----------+----------+----------+----------+
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   605
 * 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   606
 * As a consequence of the well-formedness conditions specified in table 3-7,
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   607
 * the following byte values are disallowed in UTF-8: C0-C1, F5-FF.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   608
 * 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   609
 * Code Rules:
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   610
 *   R1: If the string contains any non-UTF-8 characters the returned confidence
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   611
 *       is 0.  Valid UTF-8 combinations are listed in the above table.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   612
 *   R2: Otherwise if the string starts with a UTF-8 BOM (byte order mark) in  
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   613
 *       the (see ) the returned confidence is 95.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   614
 *   R3: Otherwise the confidence returned is based upon the sample string 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   615
 *       length.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   616
 *   R4: If the sample string is under 75 characters, the confidence is set to 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   617
 *       75.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   618
 */
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   619
void IsCharacterSetUTF8(TInt& aConfidenceLevel, const TDesC8& aSample)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   620
	{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   621
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   622
	TInt sampleLength = aSample.Length();
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   623
	
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   624
	if (sampleLength == 0)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   625
		{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   626
		aConfidenceLevel = 89;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   627
		return;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   628
		}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   629
	TInt bytesRemaining  = 0;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   630
	TUint sequenceLength  = 0;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   631
	
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   632
	aConfidenceLevel = sampleLength;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   633
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   634
	const TUint8* buffer = &aSample[0];
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   635
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   636
	if (sampleLength < 95)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   637
		{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   638
		// check for the BOM
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   639
		if ((sampleLength >= 3) && 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   640
			((buffer[0] == 0xEF) &&
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   641
			 (buffer[1] == 0xBB) &&
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   642
			 (buffer[2] == 0xBF)) 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   643
			) 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   644
			{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   645
			aConfidenceLevel = 95;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   646
			}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   647
		else if (sampleLength < 75)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   648
			{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   649
			aConfidenceLevel = 75;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   650
			}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   651
		}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   652
	
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   653
	for (TInt index = 0;index != sampleLength;index++)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   654
		{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   655
		
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   656
		if (bytesRemaining > 0)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   657
			{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   658
			// bytesRemaining > 0, means that a byte representing the start of a 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   659
			// multibyte sequence was encountered and the bytesRemaining is the 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   660
			// number of bytes to follow. 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   661
			
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   662
			if ((buffer[index] & 0xc0) == 0x80) 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   663
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   664
				// need to check for ill-formed sequences -- all are in the 2nd byte
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   665
				
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   666
				if ((sequenceLength == 3) && (bytesRemaining == 2))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   667
					{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   668
					if ((buffer[index - 1] == 0xe0) && (buffer[index] < 0xa0))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   669
						{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   670
						aConfidenceLevel = 0;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   671
						break;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   672
						}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   673
					else if ((buffer[index - 1] == 0xed) && (buffer[index] > 0x9f))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   674
						{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   675
						aConfidenceLevel = 0;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   676
						break;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   677
						}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   678
					}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   679
				else if ((sequenceLength == 4) && (bytesRemaining == 3))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   680
					{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   681
					if ((buffer[index - 1] == 0xf0) && (buffer[index] < 0x90))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   682
						{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   683
						aConfidenceLevel = 0;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   684
						break;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   685
						}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   686
					else if ((buffer[index - 1] == 0xf4) && (buffer[index] > 0x8f))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   687
						{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   688
						aConfidenceLevel = 0;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   689
						break;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   690
						}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   691
					}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   692
				
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   693
				--bytesRemaining;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   694
				continue;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   695
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   696
			else
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   697
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   698
				aConfidenceLevel = 0;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   699
				break;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   700
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   701
			}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   702
		
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   703
		if (bytesRemaining == 0)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   704
			{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   705
			if (buffer[index] < 0x80)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   706
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   707
				// The value of aSample[index] is in the range 0x00-0x7f
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   708
				//UTF8 maintains ASCII transparency. So it's a valid
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   709
				//UTF8. Do nothing, check next value.
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   710
				continue;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   711
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   712
			else if ((buffer[index] >= 0xc2) && (buffer[index] < 0xe0))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   713
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   714
				// valid start of a 2 byte sequence (see conformance note)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   715
				sequenceLength = 2;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   716
				bytesRemaining = 1;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   717
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   718
			else if ((buffer[index] & 0xf0) == 0xe0)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   719
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   720
				// valid start of a 3 byte sequence
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   721
				sequenceLength = 3;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   722
				bytesRemaining = 2;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   723
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   724
			else if ((buffer[index] >= 0xf0) && (buffer[index] < 0xf5))
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   725
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   726
				// valid start of a 4 byte sequence (see conformance note)
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   727
				sequenceLength = 4;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   728
				bytesRemaining = 3;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   729
				}	
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   730
			else
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   731
				{
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   732
				// wasn't anything expected so must be an illegal/irregular UTF8 coded value
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   733
				aConfidenceLevel = 0;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   734
				break;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   735
				}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   736
			}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   737
		} // for 
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   738
	
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   739
	aConfidenceLevel = (aConfidenceLevel > 0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   740
	}
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   741
35751d3474b7 Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   742
// End of file