author | Santosh V Patil <santosh.v.patil@nokia.com> |
Fri, 13 Nov 2009 10:07:38 +0530 | |
changeset 22 | 1afc808f187d |
parent 8 | 35751d3474b7 |
permissions | -rw-r--r-- |
8
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
1 |
/* |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
2 |
* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
3 |
* All rights reserved. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
4 |
* This component and the accompanying materials are made available |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
5 |
* under the terms of the License "Eclipse Public License v1.0" |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
6 |
* which accompanies this distribution, and is available |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
7 |
* at the URL "http://www.eclipse.org/legal/epl-v10.html". |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
8 |
* |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
9 |
* Initial Contributors: |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
10 |
* Nokia Corporation - initial contribution. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
11 |
* |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
12 |
* Contributors: |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
13 |
* |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
14 |
* Description: |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
15 |
* |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
16 |
*/ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
17 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
18 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
19 |
#include <e32std.h> |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
20 |
#include <e32base.h> |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
21 |
#include <utf.h> |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
22 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
23 |
#define STATIC_CAST(t,v) static_cast<t>(v) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
24 |
#define CONST_CAST(t,v) const_cast<t>(v) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
25 |
#define FOREVER for(;;) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
26 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
27 |
const TUint KNotInBase64Alphabet=KMaxTUint; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
28 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
29 |
enum TPanic |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
30 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
31 |
EPanicBad6BitNumber=1, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
32 |
EPanicBadUtf7Pointers1, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
33 |
EPanicBadUtf7Pointers2, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
34 |
EPanicBadUtf7Pointers3, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
35 |
EPanicBadUtf7Pointers4, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
36 |
EPanicBadUtf7Pointers5, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
37 |
EPanicBadUtf7Pointers6, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
38 |
EPanicBadUtf7Pointers7, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
39 |
EPanicBadUtf7Pointers8, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
40 |
EPanicBadUtf7Pointers9, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
41 |
EPanicBadUtf7Pointers10, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
42 |
EPanicBadUtf7Pointers11, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
43 |
EPanicNotInBase64Block, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
44 |
EPanicBadUnicodePointers1, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
45 |
EPanicBadUnicodePointers2, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
46 |
EPanicBadUnicodePointers3, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
47 |
EPanicBadUnicodePointers4, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
48 |
EPanicBadUnicodePointers5, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
49 |
EPanicBadUnicodePointers6, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
50 |
EPanicBadUnicodePointers7, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
51 |
EPanicBadUnicodePointers8, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
52 |
EPanicBadUnicodePointers9, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
53 |
EPanicBadUnicodePointers10, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
54 |
EPanicBadBitBufferState1, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
55 |
EPanicBadBitBufferState2, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
56 |
EPanicBadBitBufferState3, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
57 |
EPanicBadBitBufferState4, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
58 |
EPanicBadBitBufferState5, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
59 |
EPanicBadBitBufferState6, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
60 |
EPanicBadBitBufferState7, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
61 |
EPanicBadBitBufferState8, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
62 |
EPanicBadBitBufferState9, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
63 |
EPanicBadBitBufferState10, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
64 |
EPanicBadBitBufferState11, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
65 |
EPanicBadBitBufferState12, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
66 |
EPanicBadBitBufferState13, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
67 |
EPanicBadBitBufferState14, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
68 |
EPanicBadBitBufferState15, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
69 |
EPanicBadBitBufferState16, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
70 |
EPanicBadBitBufferState17, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
71 |
EPanicUnexpectedNumberOfLoopIterations, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
72 |
EPanicInitialEscapeCharacterButNoBase64, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
73 |
EPanicBase64SequenceDoesNotFallOnUnicodeCharacterBoundary, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
74 |
EPanicBadUtf8Pointers1, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
75 |
EPanicBadUtf8Pointers2, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
76 |
EPanicBadUtf8Pointers3, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
77 |
EPanicBadUtf8Pointers4, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
78 |
EPanicBadUtf8Pointers5, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
79 |
EPanicBadUtf8Pointers6, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
80 |
EPanicBadUtf8Pointers7, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
81 |
EPanicOutOfSyncUtf7Byte1, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
82 |
EPanicOutOfSyncUtf7Byte2, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
83 |
EPanicOutOfSyncBase64Decoding |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
84 |
}; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
85 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
86 |
_LIT(KLitPanicText, "CHARCONV-UTF"); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
87 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
88 |
LOCAL_C void Panic(TPanic aPanic) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
89 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
90 |
User::Panic(KLitPanicText, aPanic); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
91 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
92 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
93 |
inline TUint EscapeCharacterForStartingBase64Block(TBool aIsImapUtf7) {return aIsImapUtf7? '&': '+';} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
94 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
95 |
inline TBool BitBufferContainsNonZeroBits(TUint aBitBuffer, TInt aNumberOfBitsInBuffer) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
96 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
97 |
return (aBitBuffer&((1<<aNumberOfBitsInBuffer)-1))!=0; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
98 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
99 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
100 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
101 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
102 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
103 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
104 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
105 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
106 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
107 |
/** Converts Unicode text into UTF-8 encoding. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
108 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
109 |
@param aUtf8 On return, contains the UTF-8 encoded output string. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
110 |
@param aUnicode The Unicode-encoded input string. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
111 |
@return The number of unconverted characters left at the end of the input |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
112 |
descriptor, or one of the error values defined in TError. */ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
113 |
EXPORT_C TInt CnvUtfConverter::ConvertFromUnicodeToUtf8(TDes8& aUtf8, const TDesC16& aUnicode) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
114 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
115 |
return ConvertFromUnicodeToUtf8(aUtf8, aUnicode, EFalse); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
116 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
117 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
118 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
119 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
120 |
/** Converts Unicode text into UTF-8 encoding. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
121 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
122 |
Surrogate pairs can be input which will result in a valid 4 byte UTF-8 value. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
123 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
124 |
The variant of UTF-8 used internally by Java differs slightly from standard |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
125 |
UTF-8. The TBool argument controls the UTF-8 variant generated by this function. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
126 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
127 |
@param aUtf8 On return, contains the UTF-8 encoded output string. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
128 |
@param aUnicode A UCS-2 encoded input string. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
129 |
@param aGenerateJavaConformantUtf8 EFalse for orthodox UTF-8. ETrue for Java |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
130 |
UTF-8. The default is EFalse. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
131 |
@return The number of unconverted characters left at the end of the input descriptor, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
132 |
or one of the error values defined in TError. */ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
133 |
TInt CnvUtfConverter::ConvertFromUnicodeToUtf8(TDes8& aUtf8, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
134 |
const TDesC16& aUnicode, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
135 |
TBool aGenerateJavaConformantUtf8) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
136 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
137 |
if (aUnicode.Length() == 0) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
138 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
139 |
aUtf8.SetLength(0); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
140 |
return 0; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
141 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
142 |
if (aUtf8.MaxLength() == 0) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
143 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
144 |
return aUnicode.Length(); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
145 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
146 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
147 |
TUint8* pUtf8 = CONST_CAST(TUint8*, aUtf8.Ptr()); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
148 |
const TUint8* pointerToLastUtf8Byte = pUtf8 + (aUtf8.MaxLength() - 1); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
149 |
TBool inputIsTruncated = EFalse; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
150 |
const TUint16* pUnicode = aUnicode.Ptr(); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
151 |
const TUint16* pointerToLastUnicodeCharacter = pUnicode + (aUnicode.Length() - 1); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
152 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
153 |
FOREVER |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
154 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
155 |
__ASSERT_DEBUG(pUtf8 <= pointerToLastUtf8Byte, Panic(EPanicBadUtf8Pointers1)); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
156 |
__ASSERT_DEBUG(pUnicode <= pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers3)); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
157 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
158 |
if (pUnicode[0] < 0x80) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
159 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
160 |
// ascii - 1 byte |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
161 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
162 |
// internally java is different since the \x0000 character is |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
163 |
// translated into \xC0 \x80. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
164 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
165 |
if ((aGenerateJavaConformantUtf8) && (pUnicode[0] == 0x0000)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
166 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
167 |
if (pUtf8 == pointerToLastUtf8Byte) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
168 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
169 |
pUtf8--; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
170 |
pUnicode--; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
171 |
break; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
172 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
173 |
*pUtf8++ = STATIC_CAST(TUint8, 0xc0); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
174 |
*pUtf8 = STATIC_CAST(TUint8, 0x80); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
175 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
176 |
else |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
177 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
178 |
*pUtf8 = STATIC_CAST(TUint8, pUnicode[0]); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
179 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
180 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
181 |
else if (pUnicode[0] < 0x800) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
182 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
183 |
// U+0080..U+07FF - 2 bytes |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
184 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
185 |
if (pUtf8 == pointerToLastUtf8Byte) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
186 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
187 |
pUtf8--; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
188 |
pUnicode--; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
189 |
break; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
190 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
191 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
192 |
*pUtf8++ = STATIC_CAST(TUint8, 0xc0|(pUnicode[0]>>6)); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
193 |
*pUtf8 = STATIC_CAST(TUint8, 0x80|(pUnicode[0]&0x3f)); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
194 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
195 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
196 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
197 |
// check to see if we have a surrogate in the stream, surrogates encode code points outside |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
198 |
// the BMP and are 4 utf-8 chars, otherwise what we have here is 3 utf-8 chars. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
199 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
200 |
else if (((pUnicode[0] & 0xfc00) == 0xd800) && !aGenerateJavaConformantUtf8) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
201 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
202 |
// surrogate pair - 4 bytes in utf-8 |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
203 |
// U+10000..U+10FFFF |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
204 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
205 |
__ASSERT_DEBUG(pUtf8 <= pointerToLastUtf8Byte, Panic(EPanicBadUtf8Pointers2)); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
206 |
// is there enough space to hold the character |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
207 |
if ((pointerToLastUtf8Byte - pUtf8) < 3) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
208 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
209 |
pUtf8--; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
210 |
pUnicode--; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
211 |
break; // no go to the exit condition |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
212 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
213 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
214 |
__ASSERT_DEBUG(pUnicode <= pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers4)); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
215 |
if (pUnicode >= pointerToLastUnicodeCharacter) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
216 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
217 |
pUtf8--; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
218 |
pUnicode--; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
219 |
inputIsTruncated = ETrue; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
220 |
break; // middle of a surrogate pair. go to end condition |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
221 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
222 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
223 |
if ((pUnicode[1] & 0xfc00) != 0xdc00) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
224 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
225 |
return EErrorIllFormedInput; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
226 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
227 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
228 |
// convert utf-16 surrogate to utf-32 |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
229 |
TUint ch = ((pUnicode[0] - 0xD800) << 10 | (pUnicode[1] - 0xDC00)) + 0x10000; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
230 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
231 |
// convert utf-32 to utf-8 |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
232 |
*pUtf8++ = STATIC_CAST(TUint8,0xf0 | (ch >> 18)); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
233 |
*pUtf8++ = STATIC_CAST(TUint8,0x80 | ((ch >> 12) & 0x3f)); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
234 |
*pUtf8++ = STATIC_CAST(TUint8,0x80 | ((ch >> 6) & 0x3f)); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
235 |
*pUtf8 = STATIC_CAST(TUint8,0x80 | (ch & 0x3f)); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
236 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
237 |
// we consumed 2 utf-16 values, move this pointer |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
238 |
pUnicode++; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
239 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
240 |
else |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
241 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
242 |
// 3 byte - utf-8, U+800..U+FFFF rest of BMP. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
243 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
244 |
if (pointerToLastUtf8Byte - pUtf8 < 2) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
245 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
246 |
pUtf8--; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
247 |
pUnicode--; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
248 |
break; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
249 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
250 |
*pUtf8++ = STATIC_CAST(TUint8, 0xe0|(pUnicode[0]>>12)); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
251 |
*pUtf8++ = STATIC_CAST(TUint8, 0x80|((pUnicode[0]>>6)&0x3f)); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
252 |
*pUtf8 = STATIC_CAST(TUint8, 0x80|(pUnicode[0]&0x3f)); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
253 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
254 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
255 |
if ((pUnicode == pointerToLastUnicodeCharacter) || (pUtf8 == pointerToLastUtf8Byte)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
256 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
257 |
break; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
258 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
259 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
260 |
pUtf8++; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
261 |
pUnicode++; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
262 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
263 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
264 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
265 |
if ((pUnicode < aUnicode.Ptr()) && inputIsTruncated) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
266 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
267 |
return EErrorIllFormedInput; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
268 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
269 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
270 |
aUtf8.SetLength((pUtf8 - aUtf8.Ptr())+1); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
271 |
return pointerToLastUnicodeCharacter-pUnicode; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
272 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
273 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
274 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
275 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
276 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
277 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
278 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
279 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
280 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
281 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
282 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
283 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
284 |
/** Converts text encoded using the Unicode transformation format UTF-8 into the |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
285 |
Unicode UCS-2 character set. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
286 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
287 |
@param aUnicode On return, contains the Unicode encoded output string. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
288 |
@param aUtf8 The UTF-8 encoded input string |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
289 |
@return The number of unconverted bytes left at the end of the input descriptor, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
290 |
or one of the error values defined in TError. */ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
291 |
EXPORT_C TInt CnvUtfConverter::ConvertToUnicodeFromUtf8(TDes16& aUnicode, const TDesC8& aUtf8) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
292 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
293 |
return ConvertToUnicodeFromUtf8(aUnicode, aUtf8, EFalse); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
294 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
295 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
296 |
static void UpdateUnconvertibleInfo(TInt& aNumberOfUnconvertibleCharacters, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
297 |
TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, TUint8 aIndex) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
298 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
299 |
if (aNumberOfUnconvertibleCharacters<=0) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
300 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
301 |
aIndexOfFirstByteOfFirstUnconvertibleCharacter = aIndex; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
302 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
303 |
++aNumberOfUnconvertibleCharacters; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
304 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
305 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
306 |
/** Converts text encoded using the Unicode transformation format UTF-8 into the |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
307 |
Unicode UCS-2 character set. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
308 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
309 |
@param aUnicode On return, contains the Unicode encoded output string. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
310 |
@param aUtf8 The UTF-8 encoded input string |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
311 |
@param aGenerateJavaConformantUtf8 EFalse for orthodox UTF-8. ETrue for Java |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
312 |
@return The number of unconverted bytes left at the end of the input descriptor, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
313 |
or one of the error values defined in TError. */ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
314 |
TInt CnvUtfConverter::ConvertToUnicodeFromUtf8(TDes16& aUnicode, const TDesC8& aUtf8, TBool aGenerateJavaConformantUtf8) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
315 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
316 |
TInt dummyUnconverted, dummyUnconvertedIndex; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
317 |
return ConvertToUnicodeFromUtf8(aUnicode, aUtf8, aGenerateJavaConformantUtf8, dummyUnconverted, dummyUnconvertedIndex); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
318 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
319 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
320 |
/** Converts text encoded using the Unicode transformation format UTF-8 into the |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
321 |
Unicode UCS-2 character set. Surrogate pairs can be created when a valid 4 byte UTF-8 is input. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
322 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
323 |
The variant of UTF-8 used internally by Java differs slightly from standard |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
324 |
UTF-8. The TBool argument controls the UTF-8 variant generated by this function. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
325 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
326 |
@param aUnicode On return, contains the Unicode encoded output string. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
327 |
@param aUtf8 The UTF-8 encoded input string |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
328 |
@param aGenerateJavaConformantUtf8 EFalse for orthodox UTF-8. ETrue for Java |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
329 |
UTF-8. The default is EFalse. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
330 |
@param aNumberOfUnconvertibleCharacters On return, contains the number of bytes |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
331 |
which were not converted. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
332 |
@param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
333 |
of the first byte of the first unconvertible character. For instance if the |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
334 |
first character in the input descriptor (aForeign) could not be converted, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
335 |
then this parameter is set to the first byte of that character, i.e. zero. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
336 |
A negative value is returned if all the characters were converted. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
337 |
@return The number of unconverted bytes left at the end of the input descriptor, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
338 |
or one of the error values defined in TError. */ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
339 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
340 |
/* of note: conformance. Unicode standard 5.0 section 3.9, table 3-7 |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
341 |
* Well formed UTF-8 Byte Sequences, full table. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
342 |
* +----------------------------------------------------------------+ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
343 |
* | Code Points | 1st byte | 2nd byte | 3rd byte | 4th byte | |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
344 |
* +--------------------+----------+----------+----------+----------+ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
345 |
* | U+0000..U+007F | 00..7D | | | | 1 byte, ascii |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
346 |
* | U+0080..U+07FF | C2..DF | 80..BF | | | 2 bytes, error if 1st < 0xC2 |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
347 |
* | U+0800..U+0FFF | E0 | A0..BF | 80..BF | | 3 bytes, 1st == 0xE0, error if 2nd < 0xA0 |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
348 |
* | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | | normal |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
349 |
* | U+D000..U+D7FF | ED | 80..9F | 80..BF | | 3 bytes, 1st == 0xED, error if 2nd > 0x9F |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
350 |
* | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | | normal |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
351 |
* | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF | 4 bytes, 1st == 0xf0, error if 2nd < 0x90 |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
352 |
* | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF | normal |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
353 |
* | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF | 4 bytes, 1st == 0xF4, error if 2nd > 0x8F |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
354 |
* +--------------------+----------+----------+----------+----------+ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
355 |
* |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
356 |
* As a consequence of the well-formedness conditions specified in table 3-7, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
357 |
* the following byte values are disallowed in UTF-8: C0-C1, F5-FF. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
358 |
*/ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
359 |
TInt CnvUtfConverter::ConvertToUnicodeFromUtf8(TDes16& aUnicode, const TDesC8& aUtf8, TBool aGenerateJavaConformantUtf8, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
360 |
TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
361 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
362 |
aUnicode.SetLength(0); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
363 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
364 |
if ((aUtf8.Length() == 0) || (aUnicode.MaxLength() == 0)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
365 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
366 |
return aUtf8.Length(); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
367 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
368 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
369 |
TUint16* pUnicode = CONST_CAST(TUint16*, aUnicode.Ptr()); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
370 |
const TUint16* pLastUnicode = pUnicode + (aUnicode.MaxLength() - 1); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
371 |
const TUint8* pUtf8 = aUtf8.Ptr(); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
372 |
const TUint8* pLastUtf8 = pUtf8 + (aUtf8.Length() - 1); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
373 |
const TUint16 replacementcharacter = 0xFFFD; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
374 |
TUint currentUnicodeCharacter; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
375 |
TUint sequenceLength; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
376 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
377 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
378 |
FOREVER |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
379 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
380 |
TBool illFormed=EFalse; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
381 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
382 |
__ASSERT_DEBUG(pUnicode <= pLastUnicode, Panic(EPanicBadUnicodePointers8)); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
383 |
__ASSERT_DEBUG(pUtf8 <= pLastUtf8, Panic(EPanicBadUtf8Pointers3)); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
384 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
385 |
sequenceLength = 1; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
386 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
387 |
// ascii - optimisation (i.e. it isn't a sequence) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
388 |
if (pUtf8[0] < 0x80) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
389 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
390 |
currentUnicodeCharacter = pUtf8[0]; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
391 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
392 |
else |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
393 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
394 |
// see if well formed utf-8, use table above for reference |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
395 |
if ((pUtf8[0] >= 0xc2) && (pUtf8[0] <= 0xdf)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
396 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
397 |
// 0xc1-0xc2 are not valid bytes |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
398 |
sequenceLength = 2; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
399 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
400 |
else if ((pUtf8[0] & 0xf0) == 0xe0) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
401 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
402 |
sequenceLength = 3; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
403 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
404 |
else if ((pUtf8[0] >= 0xf0) && (pUtf8[0] < 0xf5)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
405 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
406 |
// 0xf5-0xff, are not valid bytes |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
407 |
sequenceLength = 4; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
408 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
409 |
else if ((pUtf8[0] == 0xc0) && aGenerateJavaConformantUtf8) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
410 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
411 |
if ((pUtf8 == pLastUtf8) || (pUtf8[1] == 0x80)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
412 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
413 |
// either we've split the 0xc0 0x80 (i.e. 0xc0 is |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
414 |
// the last character in the string) or we've |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
415 |
// discovered a valid 0xc0 0x80 sequence. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
416 |
sequenceLength = 2; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
417 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
418 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
419 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
420 |
/* checking to see if we got a valid sequence */ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
421 |
if (sequenceLength == 1) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
422 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
423 |
// bad value in the leading byte, 0xc0-0xc1,0x5f-0xff for example |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
424 |
currentUnicodeCharacter = replacementcharacter; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
425 |
UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
426 |
aIndexOfFirstByteOfFirstUnconvertibleCharacter, pUtf8-aUtf8.Ptr()); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
427 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
428 |
else |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
429 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
430 |
// this is a check to see if the sequence goes beyond the input |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
431 |
// stream. if its not the first and only character in the input |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
432 |
// stream this isn't an error, otherwise it is. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
433 |
if ((pUtf8 + sequenceLength - 1) > pLastUtf8) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
434 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
435 |
// check to see if this sequence was the first character |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
436 |
if ((pUnicode - aUnicode.Ptr()) == 0) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
437 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
438 |
return EErrorIllFormedInput; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
439 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
440 |
break; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
441 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
442 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
443 |
currentUnicodeCharacter = pUtf8[0] & (0x7F>>sequenceLength); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
444 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
445 |
/* check the trailing bytes, they should begin with 10 */ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
446 |
TUint i = 1; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
447 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
448 |
do |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
449 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
450 |
if ((pUtf8[i] & 0xc0) == 0x80) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
451 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
452 |
// add the trailing 6 bits to the current unicode char |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
453 |
currentUnicodeCharacter = (currentUnicodeCharacter <<6 ) | (pUtf8[i] & 0x3F); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
454 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
455 |
else |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
456 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
457 |
// ill formed character (doesn't have a lead 10) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
458 |
currentUnicodeCharacter = replacementcharacter; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
459 |
UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
460 |
aIndexOfFirstByteOfFirstUnconvertibleCharacter, pUtf8-aUtf8.Ptr()); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
461 |
illFormed=ETrue; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
462 |
break; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
463 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
464 |
i++; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
465 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
466 |
while (i < sequenceLength); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
467 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
468 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
469 |
/* conformance check. bits of above table for reference. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
470 |
* +----------------------------------------------------------------+ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
471 |
* | Code Points | 1st byte | 2nd byte | 3rd byte | 4th byte | |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
472 |
* +--------------------+----------+----------+----------+----------+ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
473 |
* | U+0800..U+0FFF | E0 | A0..BF | 80..BF | | 3 bytes, 1st == 0xE0, 2nd < 0xA0 |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
474 |
* | U+D000..U+D7FF | ED | 80..9F | 80..BF | | 3 bytes, 1st == 0xED, 2nd > 0x9F |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
475 |
* | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF | 4 bytes, 1st == 0xf0, 2nd < 0x90 |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
476 |
* | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF | 4 bytes, 1st == 0xF4, 2nd > 0x8F |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
477 |
* +--------------------+----------+----------+----------+----------+ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
478 |
*/ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
479 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
480 |
if (currentUnicodeCharacter != replacementcharacter) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
481 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
482 |
if (sequenceLength == 3) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
483 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
484 |
if ((pUtf8[0] == 0xE0) && (pUtf8[1] < 0xA0)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
485 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
486 |
currentUnicodeCharacter = replacementcharacter; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
487 |
UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
488 |
aIndexOfFirstByteOfFirstUnconvertibleCharacter, pUtf8-aUtf8.Ptr()); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
489 |
illFormed=ETrue; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
490 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
491 |
else if ((pUtf8[0] == 0xED) && (pUtf8[1] > 0x9F)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
492 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
493 |
currentUnicodeCharacter = replacementcharacter; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
494 |
UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
495 |
aIndexOfFirstByteOfFirstUnconvertibleCharacter, pUtf8-aUtf8.Ptr()); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
496 |
illFormed=ETrue; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
497 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
498 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
499 |
else if (sequenceLength == 4) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
500 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
501 |
if ((pUtf8[0] == 0xF0) && (pUtf8[1] < 0x90)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
502 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
503 |
currentUnicodeCharacter = replacementcharacter; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
504 |
UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
505 |
aIndexOfFirstByteOfFirstUnconvertibleCharacter, pUtf8-aUtf8.Ptr()); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
506 |
illFormed=ETrue; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
507 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
508 |
else if ((pUtf8[0] == 0xF4) && (pUtf8[1] > 0x8F)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
509 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
510 |
currentUnicodeCharacter = replacementcharacter; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
511 |
UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
512 |
aIndexOfFirstByteOfFirstUnconvertibleCharacter, pUtf8-aUtf8.Ptr()); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
513 |
illFormed=ETrue; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
514 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
515 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
516 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
517 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
518 |
/* last conformance check - Unicode 5.0 section 3.9 D92 Because surrogate code points |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
519 |
* are not Unicode scalar values, any UTF-8 byte sequence that would map to code |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
520 |
* points D800..DFFF is ill formed */ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
521 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
522 |
if ((currentUnicodeCharacter >= 0xD800) && (currentUnicodeCharacter <= 0xDFFF)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
523 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
524 |
currentUnicodeCharacter = replacementcharacter; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
525 |
UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
526 |
aIndexOfFirstByteOfFirstUnconvertibleCharacter, pUtf8-aUtf8.Ptr()); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
527 |
illFormed=ETrue; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
528 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
529 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
530 |
// end conformance check |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
531 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
532 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
533 |
// would this character generate a surrogate pair in UTF-16? |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
534 |
if (currentUnicodeCharacter > 0xFFFF) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
535 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
536 |
// is there enough space to hold a surrogate pair in the output? |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
537 |
if (pUnicode >= pLastUnicode) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
538 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
539 |
break; // no, end processing. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
540 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
541 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
542 |
TUint surrogate = (currentUnicodeCharacter>>10) + 0xD7C0; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
543 |
*pUnicode++ = STATIC_CAST(TUint16, surrogate); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
544 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
545 |
surrogate = (currentUnicodeCharacter & 0x3FF) + 0xDC00; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
546 |
*pUnicode++ = STATIC_CAST(TUint16, surrogate); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
547 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
548 |
else |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
549 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
550 |
*pUnicode++ = STATIC_CAST(TUint16, currentUnicodeCharacter); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
551 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
552 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
553 |
// move the input pointer |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
554 |
if (currentUnicodeCharacter != replacementcharacter) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
555 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
556 |
pUtf8 += sequenceLength; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
557 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
558 |
else if(illFormed == EFalse) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
559 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
560 |
pUtf8 += (sequenceLength); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
561 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
562 |
else |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
563 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
564 |
// we had a character we didn't recognize (i.e. it was invalid) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
565 |
// so move to the next character in the input |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
566 |
pUtf8++; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
567 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
568 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
569 |
if ((pUtf8 > pLastUtf8) || (pUnicode > pLastUnicode)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
570 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
571 |
break; // we've either reached the end of the input or the end of output |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
572 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
573 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
574 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
575 |
aUnicode.SetLength(pUnicode - aUnicode.Ptr()); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
576 |
return (pLastUtf8 - pUtf8 + 1); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
577 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
578 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
579 |
/** Given a sample text this function attempts to determine whether or not |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
580 |
* the same text is encoded using the UTF-8 standard encoding scheme. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
581 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
582 |
@param TInt a confidence level, given at certain value. if the given sample |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
583 |
is UTF-8 this value will not be changed (unless > 100) then its |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
584 |
set to 100. Otherwise if the same isn't UTF-8, its set to 0. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
585 |
@param TDesC8 sample text. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
586 |
UTF-8. The default is EFalse. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
587 |
@return void |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
588 |
*/ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
589 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
590 |
/* of note: conformance. Unicode standard 5.0 section 3.9, table 3-7 |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
591 |
* Well formed UTF-8 Byte Sequences, full table. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
592 |
* +----------------------------------------------------------------+ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
593 |
* | Code Points | 1st byte | 2nd byte | 3rd byte | 4th byte | |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
594 |
* +--------------------+----------+----------+----------+----------+ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
595 |
* | U+0000..U+007F | 00..7D | | | | 1 byte, ascii |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
596 |
* | U+0080..U+07FF | C2..DF | 80..BF | | | 2 bytes, error if 1st < 0xC2 |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
597 |
* | U+0800..U+0FFF | E0 | A0..BF | 80..BF | | 3 bytes, 1st == 0xE0, error if 2nd < 0xA0 |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
598 |
* | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | | normal |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
599 |
* | U+D000..U+D7FF | ED | 80..9F | 80..BF | | 3 bytes, 1st == 0xED, error if 2nd > 0x9F |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
600 |
* | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | | normal |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
601 |
* | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF | 4 bytes, 1st == 0xf0, error if 2nd < 0x90 |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
602 |
* | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF | normal |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
603 |
* | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF | 4 bytes, 1st == 0xF4, error if 2nd > 0x8F |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
604 |
* +--------------------+----------+----------+----------+----------+ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
605 |
* |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
606 |
* As a consequence of the well-formedness conditions specified in table 3-7, |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
607 |
* the following byte values are disallowed in UTF-8: C0-C1, F5-FF. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
608 |
* |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
609 |
* Code Rules: |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
610 |
* R1: If the string contains any non-UTF-8 characters the returned confidence |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
611 |
* is 0. Valid UTF-8 combinations are listed in the above table. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
612 |
* R2: Otherwise if the string starts with a UTF-8 BOM (byte order mark) in |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
613 |
* the (see ) the returned confidence is 95. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
614 |
* R3: Otherwise the confidence returned is based upon the sample string |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
615 |
* length. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
616 |
* R4: If the sample string is under 75 characters, the confidence is set to |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
617 |
* 75. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
618 |
*/ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
619 |
void IsCharacterSetUTF8(TInt& aConfidenceLevel, const TDesC8& aSample) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
620 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
621 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
622 |
TInt sampleLength = aSample.Length(); |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
623 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
624 |
if (sampleLength == 0) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
625 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
626 |
aConfidenceLevel = 89; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
627 |
return; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
628 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
629 |
TInt bytesRemaining = 0; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
630 |
TUint sequenceLength = 0; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
631 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
632 |
aConfidenceLevel = sampleLength; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
633 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
634 |
const TUint8* buffer = &aSample[0]; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
635 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
636 |
if (sampleLength < 95) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
637 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
638 |
// check for the BOM |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
639 |
if ((sampleLength >= 3) && |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
640 |
((buffer[0] == 0xEF) && |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
641 |
(buffer[1] == 0xBB) && |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
642 |
(buffer[2] == 0xBF)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
643 |
) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
644 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
645 |
aConfidenceLevel = 95; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
646 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
647 |
else if (sampleLength < 75) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
648 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
649 |
aConfidenceLevel = 75; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
650 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
651 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
652 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
653 |
for (TInt index = 0;index != sampleLength;index++) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
654 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
655 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
656 |
if (bytesRemaining > 0) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
657 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
658 |
// bytesRemaining > 0, means that a byte representing the start of a |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
659 |
// multibyte sequence was encountered and the bytesRemaining is the |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
660 |
// number of bytes to follow. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
661 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
662 |
if ((buffer[index] & 0xc0) == 0x80) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
663 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
664 |
// need to check for ill-formed sequences -- all are in the 2nd byte |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
665 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
666 |
if ((sequenceLength == 3) && (bytesRemaining == 2)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
667 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
668 |
if ((buffer[index - 1] == 0xe0) && (buffer[index] < 0xa0)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
669 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
670 |
aConfidenceLevel = 0; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
671 |
break; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
672 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
673 |
else if ((buffer[index - 1] == 0xed) && (buffer[index] > 0x9f)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
674 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
675 |
aConfidenceLevel = 0; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
676 |
break; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
677 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
678 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
679 |
else if ((sequenceLength == 4) && (bytesRemaining == 3)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
680 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
681 |
if ((buffer[index - 1] == 0xf0) && (buffer[index] < 0x90)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
682 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
683 |
aConfidenceLevel = 0; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
684 |
break; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
685 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
686 |
else if ((buffer[index - 1] == 0xf4) && (buffer[index] > 0x8f)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
687 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
688 |
aConfidenceLevel = 0; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
689 |
break; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
690 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
691 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
692 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
693 |
--bytesRemaining; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
694 |
continue; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
695 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
696 |
else |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
697 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
698 |
aConfidenceLevel = 0; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
699 |
break; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
700 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
701 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
702 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
703 |
if (bytesRemaining == 0) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
704 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
705 |
if (buffer[index] < 0x80) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
706 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
707 |
// The value of aSample[index] is in the range 0x00-0x7f |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
708 |
//UTF8 maintains ASCII transparency. So it's a valid |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
709 |
//UTF8. Do nothing, check next value. |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
710 |
continue; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
711 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
712 |
else if ((buffer[index] >= 0xc2) && (buffer[index] < 0xe0)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
713 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
714 |
// valid start of a 2 byte sequence (see conformance note) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
715 |
sequenceLength = 2; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
716 |
bytesRemaining = 1; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
717 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
718 |
else if ((buffer[index] & 0xf0) == 0xe0) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
719 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
720 |
// valid start of a 3 byte sequence |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
721 |
sequenceLength = 3; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
722 |
bytesRemaining = 2; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
723 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
724 |
else if ((buffer[index] >= 0xf0) && (buffer[index] < 0xf5)) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
725 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
726 |
// valid start of a 4 byte sequence (see conformance note) |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
727 |
sequenceLength = 4; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
728 |
bytesRemaining = 3; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
729 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
730 |
else |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
731 |
{ |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
732 |
// wasn't anything expected so must be an illegal/irregular UTF8 coded value |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
733 |
aConfidenceLevel = 0; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
734 |
break; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
735 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
736 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
737 |
} // for |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
738 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
739 |
aConfidenceLevel = (aConfidenceLevel > 0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0; |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
740 |
} |
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
741 |
|
35751d3474b7
Revision: 200935
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
742 |
// End of file |