|
1 /* GLIB - Library of useful routines for C programming |
|
2 * Copyright (C) 2001 Matthias Clasen <matthiasc@poet.de> |
|
3 * Portion Copyright © 2008-09 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. |
|
4 * This library is free software; you can redistribute it and/or |
|
5 * modify it under the terms of the GNU Lesser General Public |
|
6 * License as published by the Free Software Foundation; either |
|
7 * version 2 of the License, or (at your option) any later version. |
|
8 * |
|
9 * This library is distributed in the hope that it will be useful, |
|
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
12 * Lesser General Public License for more details. |
|
13 * |
|
14 * You should have received a copy of the GNU Lesser General Public |
|
15 * License along with this library; if not, write to the |
|
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
|
17 * Boston, MA 02111-1307, USA. |
|
18 */ |
|
19 |
|
20 #include "glib.h" |
|
21 #ifdef __SYMBIAN32__ |
|
22 #include "mrt2_glib2_test.h" |
|
23 #endif /*__SYMBIAN32__*/ |
|
24 |
|
25 #define UNICODE_VALID(Char) \ |
|
26 ((Char) < 0x110000 && \ |
|
27 (((Char) & 0xFFFFF800) != 0xD800) && \ |
|
28 ((Char) < 0xFDD0 || (Char) > 0xFDEF) && \ |
|
29 ((Char) & 0xFFFE) != 0xFFFE) |
|
30 |
|
31 |
|
32 |
|
33 static gboolean any_failed = FALSE; |
|
34 |
|
35 struct { |
|
36 const gchar *text; |
|
37 gint max_len; |
|
38 gint offset; |
|
39 gboolean valid; |
|
40 } test[] = { |
|
41 /* some tests to check max_len handling */ |
|
42 /* length 1 */ |
|
43 { "abcde", -1, 5, TRUE }, |
|
44 { "abcde", 3, 3, TRUE }, |
|
45 { "abcde", 5, 5, TRUE }, |
|
46 { "abcde", 7, 5, FALSE }, |
|
47 /* length 2 */ |
|
48 { "\xc2\xa9\xc2\xa9\xc2\xa9", -1, 6, TRUE }, |
|
49 { "\xc2\xa9\xc2\xa9\xc2\xa9", 1, 0, FALSE }, |
|
50 { "\xc2\xa9\xc2\xa9\xc2\xa9", 2, 2, TRUE }, |
|
51 { "\xc2\xa9\xc2\xa9\xc2\xa9", 3, 2, FALSE }, |
|
52 { "\xc2\xa9\xc2\xa9\xc2\xa9", 4, 4, TRUE }, |
|
53 { "\xc2\xa9\xc2\xa9\xc2\xa9", 5, 4, FALSE }, |
|
54 { "\xc2\xa9\xc2\xa9\xc2\xa9", 6, 6, TRUE }, |
|
55 { "\xc2\xa9\xc2\xa9\xc2\xa9", 7, 6, FALSE }, |
|
56 /* length 3 */ |
|
57 { "\xe2\x89\xa0\xe2\x89\xa0", -1, 6, TRUE }, |
|
58 { "\xe2\x89\xa0\xe2\x89\xa0", 1, 0, FALSE }, |
|
59 { "\xe2\x89\xa0\xe2\x89\xa0", 2, 0, FALSE }, |
|
60 { "\xe2\x89\xa0\xe2\x89\xa0", 3, 3, TRUE }, |
|
61 { "\xe2\x89\xa0\xe2\x89\xa0", 4, 3, FALSE }, |
|
62 { "\xe2\x89\xa0\xe2\x89\xa0", 5, 3, FALSE }, |
|
63 { "\xe2\x89\xa0\xe2\x89\xa0", 6, 6, TRUE }, |
|
64 { "\xe2\x89\xa0\xe2\x89\xa0", 7, 6, FALSE }, |
|
65 |
|
66 /* examples from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */ |
|
67 /* greek 'kosme' */ |
|
68 { "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE }, |
|
69 /* first sequence of each length */ |
|
70 { "\x00", -1, 0, TRUE }, |
|
71 { "\xc2\x80", -1, 2, TRUE }, |
|
72 { "\xe0\xa0\x80", -1, 3, TRUE }, |
|
73 { "\xf0\x90\x80\x80", -1, 4, TRUE }, |
|
74 { "\xf8\x88\x80\x80\x80", -1, 0, FALSE }, |
|
75 { "\xfc\x84\x80\x80\x80\x80", -1, 0, FALSE }, |
|
76 /* last sequence of each length */ |
|
77 { "\x7f", -1, 1, TRUE }, |
|
78 { "\xdf\xbf", -1, 2, TRUE }, |
|
79 { "\xef\xbf\xbf", -1, 0, FALSE }, |
|
80 { "\xf7\xbf\xbf\xbf", -1, 0, FALSE }, |
|
81 { "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE }, |
|
82 { "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE }, |
|
83 /* other boundary conditions */ |
|
84 { "\xed\x9f\xbf", -1, 3, TRUE }, |
|
85 { "\xee\x80\x80", -1, 3, TRUE }, |
|
86 { "\xef\xbf\xbd", -1, 3, TRUE }, |
|
87 { "\xf4\x8f\xbf\xbf", -1, 0, FALSE }, |
|
88 { "\xf4\x90\x80\x80", -1, 0, FALSE }, |
|
89 /* malformed sequences */ |
|
90 /* continuation bytes */ |
|
91 { "\x80", -1, 0, FALSE }, |
|
92 { "\xbf", -1, 0, FALSE }, |
|
93 { "\x80\xbf", -1, 0, FALSE }, |
|
94 { "\x80\xbf\x80", -1, 0, FALSE }, |
|
95 { "\x80\xbf\x80\xbf", -1, 0, FALSE }, |
|
96 { "\x80\xbf\x80\xbf\x80", -1, 0, FALSE }, |
|
97 { "\x80\xbf\x80\xbf\x80\xbf", -1, 0, FALSE }, |
|
98 { "\x80\xbf\x80\xbf\x80\xbf\x80", -1, 0, FALSE }, |
|
99 |
|
100 /* all possible continuation byte */ |
|
101 { "\x80", -1, 0, FALSE }, |
|
102 { "\x81", -1, 0, FALSE }, |
|
103 { "\x82", -1, 0, FALSE }, |
|
104 { "\x83", -1, 0, FALSE }, |
|
105 { "\x84", -1, 0, FALSE }, |
|
106 { "\x85", -1, 0, FALSE }, |
|
107 { "\x86", -1, 0, FALSE }, |
|
108 { "\x87", -1, 0, FALSE }, |
|
109 { "\x88", -1, 0, FALSE }, |
|
110 { "\x89", -1, 0, FALSE }, |
|
111 { "\x8a", -1, 0, FALSE }, |
|
112 { "\x8b", -1, 0, FALSE }, |
|
113 { "\x8c", -1, 0, FALSE }, |
|
114 { "\x8d", -1, 0, FALSE }, |
|
115 { "\x8e", -1, 0, FALSE }, |
|
116 { "\x8f", -1, 0, FALSE }, |
|
117 { "\x90", -1, 0, FALSE }, |
|
118 { "\x91", -1, 0, FALSE }, |
|
119 { "\x92", -1, 0, FALSE }, |
|
120 { "\x93", -1, 0, FALSE }, |
|
121 { "\x94", -1, 0, FALSE }, |
|
122 { "\x95", -1, 0, FALSE }, |
|
123 { "\x96", -1, 0, FALSE }, |
|
124 { "\x97", -1, 0, FALSE }, |
|
125 { "\x98", -1, 0, FALSE }, |
|
126 { "\x99", -1, 0, FALSE }, |
|
127 { "\x9a", -1, 0, FALSE }, |
|
128 { "\x9b", -1, 0, FALSE }, |
|
129 { "\x9c", -1, 0, FALSE }, |
|
130 { "\x9d", -1, 0, FALSE }, |
|
131 { "\x9e", -1, 0, FALSE }, |
|
132 { "\x9f", -1, 0, FALSE }, |
|
133 { "\xa0", -1, 0, FALSE }, |
|
134 { "\xa1", -1, 0, FALSE }, |
|
135 { "\xa2", -1, 0, FALSE }, |
|
136 { "\xa3", -1, 0, FALSE }, |
|
137 { "\xa4", -1, 0, FALSE }, |
|
138 { "\xa5", -1, 0, FALSE }, |
|
139 { "\xa6", -1, 0, FALSE }, |
|
140 { "\xa7", -1, 0, FALSE }, |
|
141 { "\xa8", -1, 0, FALSE }, |
|
142 { "\xa9", -1, 0, FALSE }, |
|
143 { "\xaa", -1, 0, FALSE }, |
|
144 { "\xab", -1, 0, FALSE }, |
|
145 { "\xac", -1, 0, FALSE }, |
|
146 { "\xad", -1, 0, FALSE }, |
|
147 { "\xae", -1, 0, FALSE }, |
|
148 { "\xaf", -1, 0, FALSE }, |
|
149 { "\xb0", -1, 0, FALSE }, |
|
150 { "\xb1", -1, 0, FALSE }, |
|
151 { "\xb2", -1, 0, FALSE }, |
|
152 { "\xb3", -1, 0, FALSE }, |
|
153 { "\xb4", -1, 0, FALSE }, |
|
154 { "\xb5", -1, 0, FALSE }, |
|
155 { "\xb6", -1, 0, FALSE }, |
|
156 { "\xb7", -1, 0, FALSE }, |
|
157 { "\xb8", -1, 0, FALSE }, |
|
158 { "\xb9", -1, 0, FALSE }, |
|
159 { "\xba", -1, 0, FALSE }, |
|
160 { "\xbb", -1, 0, FALSE }, |
|
161 { "\xbc", -1, 0, FALSE }, |
|
162 { "\xbd", -1, 0, FALSE }, |
|
163 { "\xbe", -1, 0, FALSE }, |
|
164 { "\xbf", -1, 0, FALSE }, |
|
165 /* lone start characters */ |
|
166 { "\xc0\x20", -1, 0, FALSE }, |
|
167 { "\xc1\x20", -1, 0, FALSE }, |
|
168 { "\xc2\x20", -1, 0, FALSE }, |
|
169 { "\xc3\x20", -1, 0, FALSE }, |
|
170 { "\xc4\x20", -1, 0, FALSE }, |
|
171 { "\xc5\x20", -1, 0, FALSE }, |
|
172 { "\xc6\x20", -1, 0, FALSE }, |
|
173 { "\xc7\x20", -1, 0, FALSE }, |
|
174 { "\xc8\x20", -1, 0, FALSE }, |
|
175 { "\xc9\x20", -1, 0, FALSE }, |
|
176 { "\xca\x20", -1, 0, FALSE }, |
|
177 { "\xcb\x20", -1, 0, FALSE }, |
|
178 { "\xcc\x20", -1, 0, FALSE }, |
|
179 { "\xcd\x20", -1, 0, FALSE }, |
|
180 { "\xce\x20", -1, 0, FALSE }, |
|
181 { "\xcf\x20", -1, 0, FALSE }, |
|
182 { "\xd0\x20", -1, 0, FALSE }, |
|
183 { "\xd1\x20", -1, 0, FALSE }, |
|
184 { "\xd2\x20", -1, 0, FALSE }, |
|
185 { "\xd3\x20", -1, 0, FALSE }, |
|
186 { "\xd4\x20", -1, 0, FALSE }, |
|
187 { "\xd5\x20", -1, 0, FALSE }, |
|
188 { "\xd6\x20", -1, 0, FALSE }, |
|
189 { "\xd7\x20", -1, 0, FALSE }, |
|
190 { "\xd8\x20", -1, 0, FALSE }, |
|
191 { "\xd9\x20", -1, 0, FALSE }, |
|
192 { "\xda\x20", -1, 0, FALSE }, |
|
193 { "\xdb\x20", -1, 0, FALSE }, |
|
194 { "\xdc\x20", -1, 0, FALSE }, |
|
195 { "\xdd\x20", -1, 0, FALSE }, |
|
196 { "\xde\x20", -1, 0, FALSE }, |
|
197 { "\xdf\x20", -1, 0, FALSE }, |
|
198 { "\xe0\x20", -1, 0, FALSE }, |
|
199 { "\xe1\x20", -1, 0, FALSE }, |
|
200 { "\xe2\x20", -1, 0, FALSE }, |
|
201 { "\xe3\x20", -1, 0, FALSE }, |
|
202 { "\xe4\x20", -1, 0, FALSE }, |
|
203 { "\xe5\x20", -1, 0, FALSE }, |
|
204 { "\xe6\x20", -1, 0, FALSE }, |
|
205 { "\xe7\x20", -1, 0, FALSE }, |
|
206 { "\xe8\x20", -1, 0, FALSE }, |
|
207 { "\xe9\x20", -1, 0, FALSE }, |
|
208 { "\xea\x20", -1, 0, FALSE }, |
|
209 { "\xeb\x20", -1, 0, FALSE }, |
|
210 { "\xec\x20", -1, 0, FALSE }, |
|
211 { "\xed\x20", -1, 0, FALSE }, |
|
212 { "\xee\x20", -1, 0, FALSE }, |
|
213 { "\xef\x20", -1, 0, FALSE }, |
|
214 { "\xf0\x20", -1, 0, FALSE }, |
|
215 { "\xf1\x20", -1, 0, FALSE }, |
|
216 { "\xf2\x20", -1, 0, FALSE }, |
|
217 { "\xf3\x20", -1, 0, FALSE }, |
|
218 { "\xf4\x20", -1, 0, FALSE }, |
|
219 { "\xf5\x20", -1, 0, FALSE }, |
|
220 { "\xf6\x20", -1, 0, FALSE }, |
|
221 { "\xf7\x20", -1, 0, FALSE }, |
|
222 { "\xf8\x20", -1, 0, FALSE }, |
|
223 { "\xf9\x20", -1, 0, FALSE }, |
|
224 { "\xfa\x20", -1, 0, FALSE }, |
|
225 { "\xfb\x20", -1, 0, FALSE }, |
|
226 { "\xfc\x20", -1, 0, FALSE }, |
|
227 { "\xfd\x20", -1, 0, FALSE }, |
|
228 /* missing continuation bytes */ |
|
229 { "\x20\xc0", -1, 1, FALSE }, |
|
230 { "\x20\xe0\x80", -1, 1, FALSE }, |
|
231 { "\x20\xf0\x80\x80", -1, 1, FALSE }, |
|
232 { "\x20\xf8\x80\x80\x80", -1, 1, FALSE }, |
|
233 { "\x20\xfc\x80\x80\x80\x80", -1, 1, FALSE }, |
|
234 { "\x20\xdf", -1, 1, FALSE }, |
|
235 { "\x20\xef\xbf", -1, 1, FALSE }, |
|
236 { "\x20\xf7\xbf\xbf", -1, 1, FALSE }, |
|
237 { "\x20\xfb\xbf\xbf\xbf", -1, 1, FALSE }, |
|
238 { "\x20\xfd\xbf\xbf\xbf\xbf", -1, 1, FALSE }, |
|
239 /* impossible bytes */ |
|
240 { "\x20\xfe\x20", -1, 1, FALSE }, |
|
241 { "\x20\xff\x20", -1, 1, FALSE }, |
|
242 /* overlong sequences */ |
|
243 { "\x20\xc0\xaf\x20", -1, 1, FALSE }, |
|
244 { "\x20\xe0\x80\xaf\x20", -1, 1, FALSE }, |
|
245 { "\x20\xf0\x80\x80\xaf\x20", -1, 1, FALSE }, |
|
246 { "\x20\xf8\x80\x80\x80\xaf\x20", -1, 1, FALSE }, |
|
247 { "\x20\xfc\x80\x80\x80\x80\xaf\x20", -1, 1, FALSE }, |
|
248 { "\x20\xc1\xbf\x20", -1, 1, FALSE }, |
|
249 { "\x20\xe0\x9f\xbf\x20", -1, 1, FALSE }, |
|
250 { "\x20\xf0\x8f\xbf\xbf\x20", -1, 1, FALSE }, |
|
251 { "\x20\xf8\x87\xbf\xbf\xbf\x20", -1, 1, FALSE }, |
|
252 { "\x20\xfc\x83\xbf\xbf\xbf\xbf\x20", -1, 1, FALSE }, |
|
253 { "\x20\xc0\x80\x20", -1, 1, FALSE }, |
|
254 { "\x20\xe0\x80\x80\x20", -1, 1, FALSE }, |
|
255 { "\x20\xf0\x80\x80\x80\x20", -1, 1, FALSE }, |
|
256 { "\x20\xf8\x80\x80\x80\x80\x20", -1, 1, FALSE }, |
|
257 { "\x20\xfc\x80\x80\x80\x80\x80\x20", -1, 1, FALSE }, |
|
258 /* illegal code positions */ |
|
259 { "\x20\xed\xa0\x80\x20", -1, 1, FALSE }, |
|
260 { "\x20\xed\xad\xbf\x20", -1, 1, FALSE }, |
|
261 { "\x20\xed\xae\x80\x20", -1, 1, FALSE }, |
|
262 { "\x20\xed\xaf\xbf\x20", -1, 1, FALSE }, |
|
263 { "\x20\xed\xb0\x80\x20", -1, 1, FALSE }, |
|
264 { "\x20\xed\xbe\x80\x20", -1, 1, FALSE }, |
|
265 { "\x20\xed\xbf\xbf\x20", -1, 1, FALSE }, |
|
266 { "\x20\xed\xa0\x80\xed\xb0\x80\x20", -1, 1, FALSE }, |
|
267 { "\x20\xed\xa0\x80\xed\xbf\xbf\x20", -1, 1, FALSE }, |
|
268 { "\x20\xed\xad\xbf\xed\xb0\x80\x20", -1, 1, FALSE }, |
|
269 { "\x20\xed\xad\xbf\xed\xbf\xbf\x20", -1, 1, FALSE }, |
|
270 { "\x20\xed\xae\x80\xed\xb0\x80\x20", -1, 1, FALSE }, |
|
271 { "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE }, |
|
272 { "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE }, |
|
273 { "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE }, |
|
274 { "\x20\xef\xbf\xbe\x20", -1, 1, FALSE }, |
|
275 { "\x20\xef\xbf\xbf\x20", -1, 1, FALSE }, |
|
276 |
|
277 { NULL, } |
|
278 }; |
|
279 |
|
280 static void |
|
281 do_test (gint index, |
|
282 const gchar *text, |
|
283 gint max_len, |
|
284 gint offset, |
|
285 gboolean valid) |
|
286 { |
|
287 const gchar *end; |
|
288 gboolean result; |
|
289 |
|
290 result = g_utf8_validate (text, max_len, &end); |
|
291 |
|
292 if (result != valid || end - text != offset) |
|
293 { |
|
294 GString *str; |
|
295 const gchar *p; |
|
296 |
|
297 any_failed = TRUE; |
|
298 |
|
299 str = g_string_new (0); |
|
300 for (p = text; *p; p++) |
|
301 g_string_append_printf (str, "\\x%02hhx", *p); |
|
302 g_print ("%d: g_utf8_validate (\"%s\", %d) failed, " |
|
303 "expected %s %d, got %s %d\n", |
|
304 index, |
|
305 str->str, max_len, |
|
306 valid ? "TRUE" : "FALSE", offset, |
|
307 result ? "TRUE" : "FALSE", (gint) (end - text)); |
|
308 g_string_free (str, FALSE); |
|
309 } |
|
310 } |
|
311 |
|
312 int |
|
313 main (int argc, char *argv[]) |
|
314 { |
|
315 gint i; |
|
316 |
|
317 #ifdef __SYMBIAN32__ |
|
318 g_log_set_handler (NULL, G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG, &mrtLogHandler, NULL); |
|
319 g_set_print_handler(mrtPrintHandler); |
|
320 #endif /*__SYMBIAN32__*/ |
|
321 |
|
322 |
|
323 for (i = 0; test[i].text; i++) |
|
324 do_test (i, test[i].text, test[i].max_len, |
|
325 test[i].offset, test[i].valid); |
|
326 |
|
327 #ifdef __SYMBIAN32__ |
|
328 assert_failed = any_failed; |
|
329 testResultXml("utf8-validate"); |
|
330 #endif /* EMULATOR */ |
|
331 |
|
332 return any_failed ? 1 : 0; |
|
333 } |