|
1 /* |
|
2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 #include <STDLIB.H> |
|
20 |
|
21 const int KErrorIllFormedInput=-1; |
|
22 |
|
23 int Utf8ToUnicode(wchar_t* aUnicode, const char* aUtf8) |
|
24 // must '\0'-terminate the output |
|
25 { |
|
26 wchar_t* startOfUnicode=aUnicode; |
|
27 for (;;) |
|
28 { |
|
29 unsigned int currentUtf8Byte=*aUtf8; |
|
30 if (currentUtf8Byte=='\0') |
|
31 { |
|
32 break; |
|
33 } |
|
34 if ((currentUtf8Byte&0x80)==0x00) |
|
35 { |
|
36 if (startOfUnicode!=NULL) |
|
37 { |
|
38 *aUnicode=(wchar_t)currentUtf8Byte; |
|
39 } |
|
40 } |
|
41 else if ((currentUtf8Byte&0xe0)==0xc0) |
|
42 { |
|
43 unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x1f)<<6); |
|
44 ++aUtf8; |
|
45 currentUtf8Byte=*aUtf8; |
|
46 if ((currentUtf8Byte&0xc0)!=0x80) |
|
47 { |
|
48 return KErrorIllFormedInput; |
|
49 } |
|
50 currentUnicodeCharacter|=(currentUtf8Byte&0x3f); |
|
51 if (startOfUnicode!=NULL) |
|
52 { |
|
53 *aUnicode=(wchar_t)currentUnicodeCharacter; |
|
54 } |
|
55 } |
|
56 else if ((currentUtf8Byte&0xf0)==0xe0) |
|
57 { |
|
58 unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x0f)<<12); |
|
59 ++aUtf8; |
|
60 currentUtf8Byte=*aUtf8; |
|
61 if ((currentUtf8Byte&0xc0)!=0x80) |
|
62 { |
|
63 return KErrorIllFormedInput; |
|
64 } |
|
65 currentUnicodeCharacter|=((currentUtf8Byte&0x3f)<<6); |
|
66 ++aUtf8; |
|
67 currentUtf8Byte=*aUtf8; |
|
68 if ((currentUtf8Byte&0xc0)!=0x80) |
|
69 { |
|
70 return KErrorIllFormedInput; |
|
71 } |
|
72 currentUnicodeCharacter|=(currentUtf8Byte&0x3f); |
|
73 if (startOfUnicode!=NULL) |
|
74 { |
|
75 *aUnicode=(wchar_t)currentUnicodeCharacter; |
|
76 } |
|
77 } |
|
78 else if ((currentUtf8Byte&0xf8)==0xf0) |
|
79 { |
|
80 unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x07)<<8); |
|
81 ++aUtf8; |
|
82 currentUtf8Byte=*aUtf8; |
|
83 if ((currentUtf8Byte&0xc0)!=0x80) |
|
84 { |
|
85 return KErrorIllFormedInput; |
|
86 } |
|
87 currentUnicodeCharacter|=((currentUtf8Byte&0x3f)<<2); |
|
88 if (currentUnicodeCharacter<0x0040) |
|
89 { |
|
90 return KErrorIllFormedInput; |
|
91 } |
|
92 currentUnicodeCharacter-=0x0040; |
|
93 if (currentUnicodeCharacter>=0x0400) |
|
94 { |
|
95 return KErrorIllFormedInput; |
|
96 } |
|
97 ++aUtf8; |
|
98 currentUtf8Byte=*aUtf8; |
|
99 if ((currentUtf8Byte&0xc0)!=0x80) |
|
100 { |
|
101 return KErrorIllFormedInput; |
|
102 } |
|
103 currentUnicodeCharacter|=((currentUtf8Byte&0x30)>>4); |
|
104 if (startOfUnicode!=NULL) |
|
105 { |
|
106 *aUnicode=(wchar_t)(0xd800|currentUnicodeCharacter); |
|
107 } |
|
108 currentUnicodeCharacter=((currentUtf8Byte&0x0f)<<6); |
|
109 ++aUtf8; |
|
110 currentUtf8Byte=*aUtf8; |
|
111 if ((currentUtf8Byte&0xc0)!=0x80) |
|
112 { |
|
113 return KErrorIllFormedInput; |
|
114 } |
|
115 currentUnicodeCharacter|=(currentUtf8Byte&0x3f); |
|
116 ++aUnicode; |
|
117 if (startOfUnicode!=NULL) |
|
118 { |
|
119 *aUnicode=(wchar_t)(0xdc00|currentUnicodeCharacter); |
|
120 } |
|
121 } |
|
122 else |
|
123 { |
|
124 return KErrorIllFormedInput; |
|
125 } |
|
126 ++aUnicode; |
|
127 ++aUtf8; |
|
128 } |
|
129 if (startOfUnicode!=NULL) |
|
130 { |
|
131 *aUnicode='\0'; |
|
132 } |
|
133 return aUnicode-startOfUnicode; |
|
134 } |
|
135 #include <STDIO.H> |
|
136 int UnicodeToUtf8(char* aUtf8, const wchar_t* aUnicode) |
|
137 // must '\0'-terminate the output |
|
138 { |
|
139 char* startOfUtf8=aUtf8; |
|
140 for (;;) |
|
141 { |
|
142 unsigned int currentUnicodeCharacter=*aUnicode; |
|
143 if (currentUnicodeCharacter=='\0') |
|
144 { |
|
145 break; |
|
146 } |
|
147 if ((currentUnicodeCharacter&0xff80)==0x0000) |
|
148 { |
|
149 if (startOfUtf8!=NULL) |
|
150 { |
|
151 *aUtf8=(char)currentUnicodeCharacter; |
|
152 } |
|
153 } |
|
154 else if ((currentUnicodeCharacter&0xf800)==0x0000) |
|
155 { |
|
156 if (startOfUtf8!=NULL) |
|
157 { |
|
158 *aUtf8=(char)(0xc0|(currentUnicodeCharacter>>6)); |
|
159 } |
|
160 ++aUtf8; |
|
161 if (startOfUtf8!=NULL) |
|
162 { |
|
163 *aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f)); |
|
164 } |
|
165 } |
|
166 else if ((currentUnicodeCharacter&0xfc00)==0xd800) |
|
167 { |
|
168 currentUnicodeCharacter+=0x0040; |
|
169 if (startOfUtf8!=NULL) |
|
170 { |
|
171 *aUtf8=(char)(0xf0|((currentUnicodeCharacter>>8)&0x07)); |
|
172 } |
|
173 ++aUtf8; |
|
174 if (startOfUtf8!=NULL) |
|
175 { |
|
176 *aUtf8=(char)(0x80|((currentUnicodeCharacter>>2)&0x3f)); |
|
177 } |
|
178 { |
|
179 unsigned int currentUtf8Byte=(0x80|((currentUnicodeCharacter&0x03)<<4)); |
|
180 ++aUnicode; |
|
181 currentUnicodeCharacter=*aUnicode; |
|
182 if ((currentUnicodeCharacter&0xfc00)!=0xdc00) |
|
183 { |
|
184 return KErrorIllFormedInput; |
|
185 } |
|
186 currentUtf8Byte|=((currentUnicodeCharacter>>6)&0x0f); |
|
187 ++aUtf8; |
|
188 if (startOfUtf8!=NULL) |
|
189 { |
|
190 *aUtf8=(char)currentUtf8Byte; |
|
191 } |
|
192 } |
|
193 ++aUtf8; |
|
194 if (startOfUtf8!=NULL) |
|
195 { |
|
196 *aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f)); |
|
197 } |
|
198 } |
|
199 else |
|
200 { |
|
201 if (startOfUtf8!=NULL) |
|
202 { |
|
203 *aUtf8=(char)(0xe0|(currentUnicodeCharacter>>12)); |
|
204 } |
|
205 ++aUtf8; |
|
206 if (startOfUtf8!=NULL) |
|
207 { |
|
208 *aUtf8=(char)(0x80|((currentUnicodeCharacter>>6)&0x3f)); |
|
209 } |
|
210 ++aUtf8; |
|
211 if (startOfUtf8!=NULL) |
|
212 { |
|
213 *aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f)); |
|
214 } |
|
215 } |
|
216 ++aUtf8; |
|
217 ++aUnicode; |
|
218 } |
|
219 if (startOfUtf8!=NULL) |
|
220 { |
|
221 *aUtf8='\0'; |
|
222 } |
|
223 return aUtf8-startOfUtf8; |
|
224 } |
|
225 |