|
1 /* |
|
2 * Copyright (c) 2008 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 #undef G_DISABLE_ASSERT |
|
19 #undef G_LOG_DOMAIN |
|
20 |
|
21 #include <stdarg.h> |
|
22 #include <stdio.h> |
|
23 #include <stdlib.h> |
|
24 #include <string.h> |
|
25 #include <glib.h> |
|
26 |
|
27 #ifdef __SYMBIAN32__ |
|
28 #include "mrt2_glib2_test.h" |
|
29 #endif /*__SYMBIAN32__*/ |
|
30 static gint exit_status = 0; |
|
31 |
|
32 static void |
|
33 croak (char *format, ...) |
|
34 { |
|
35 va_list va; |
|
36 |
|
37 va_start (va, format); |
|
38 vfprintf (stderr, format, va); |
|
39 va_end (va); |
|
40 |
|
41 exit (1); |
|
42 } |
|
43 |
|
44 static void |
|
45 fail (char *format, ...) |
|
46 { |
|
47 va_list va; |
|
48 |
|
49 va_start (va, format); |
|
50 vfprintf (stderr, format, va); |
|
51 va_end (va); |
|
52 |
|
53 exit_status |= 1; |
|
54 } |
|
55 |
|
56 typedef enum |
|
57 { |
|
58 VALID, |
|
59 INCOMPLETE, |
|
60 NOTUNICODE, |
|
61 OVERLONG, |
|
62 MALFORMED |
|
63 } Status; |
|
64 |
|
65 static gboolean |
|
66 ucs4_equal (gunichar *a, gunichar *b) |
|
67 { |
|
68 while (*a && *b && (*a == *b)) |
|
69 { |
|
70 a++; |
|
71 b++; |
|
72 } |
|
73 |
|
74 return (*a == *b); |
|
75 } |
|
76 |
|
77 static gboolean |
|
78 utf16_equal (gunichar2 *a, gunichar2 *b) |
|
79 { |
|
80 while (*a && *b && (*a == *b)) |
|
81 { |
|
82 a++; |
|
83 b++; |
|
84 } |
|
85 |
|
86 return (*a == *b); |
|
87 } |
|
88 |
|
89 static gint |
|
90 utf16_count (gunichar2 *a) |
|
91 { |
|
92 gint result = 0; |
|
93 |
|
94 while (a[result]) |
|
95 result++; |
|
96 |
|
97 return result; |
|
98 } |
|
99 |
|
100 static void |
|
101 process (gint line, |
|
102 gchar *utf8, |
|
103 Status status, |
|
104 gunichar *ucs4, |
|
105 gint ucs4_len) |
|
106 { |
|
107 const gchar *end; |
|
108 gboolean is_valid = g_utf8_validate (utf8, -1, &end); |
|
109 GError *error = NULL; |
|
110 glong items_read, items_written; |
|
111 |
|
112 switch (status) |
|
113 { |
|
114 case VALID: |
|
115 if (!is_valid) |
|
116 { |
|
117 fail ("line %d: valid but g_utf8_validate returned FALSE\n", line); |
|
118 return; |
|
119 } |
|
120 break; |
|
121 case NOTUNICODE: |
|
122 case INCOMPLETE: |
|
123 case OVERLONG: |
|
124 case MALFORMED: |
|
125 if (is_valid) |
|
126 { |
|
127 fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line); |
|
128 return; |
|
129 } |
|
130 break; |
|
131 } |
|
132 |
|
133 if (status == INCOMPLETE) |
|
134 { |
|
135 gunichar *ucs4_result; |
|
136 |
|
137 ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error); |
|
138 |
|
139 if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT)) |
|
140 { |
|
141 fail ("line %d: incomplete input not properly detected\n", line); |
|
142 return; |
|
143 } |
|
144 g_clear_error (&error); |
|
145 |
|
146 ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error); |
|
147 |
|
148 if (!ucs4_result || items_read == strlen (utf8)) |
|
149 { |
|
150 fail ("line %d: incomplete input not properly detected\n", line); |
|
151 return; |
|
152 } |
|
153 |
|
154 g_free (ucs4_result); |
|
155 } |
|
156 |
|
157 if (status == VALID || status == NOTUNICODE) |
|
158 { |
|
159 gunichar *ucs4_result; |
|
160 gchar *utf8_result; |
|
161 |
|
162 ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error); |
|
163 if (!ucs4_result) |
|
164 { |
|
165 fail ("line %d: conversion to ucs4 failed: %s\n", line, error->message); |
|
166 return; |
|
167 } |
|
168 |
|
169 if (!ucs4_equal (ucs4_result, ucs4) || |
|
170 items_read != strlen (utf8) || |
|
171 items_written != ucs4_len) |
|
172 { |
|
173 fail ("line %d: results of conversion to ucs4 do not match expected.\n", line); |
|
174 return; |
|
175 } |
|
176 |
|
177 g_free (ucs4_result); |
|
178 |
|
179 ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written); |
|
180 |
|
181 if (!ucs4_equal (ucs4_result, ucs4) || |
|
182 items_written != ucs4_len) |
|
183 { |
|
184 fail ("line %d: results of conversion to ucs4 do not match expected.\n", line); |
|
185 return; |
|
186 } |
|
187 |
|
188 utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error); |
|
189 if (!utf8_result) |
|
190 { |
|
191 fail ("line %d: conversion back to utf8 failed: %s", line, error->message); |
|
192 return; |
|
193 } |
|
194 |
|
195 if (strcmp (utf8_result, utf8) != 0 || |
|
196 items_read != ucs4_len || |
|
197 items_written != strlen (utf8)) |
|
198 { |
|
199 fail ("line %d: conversion back to utf8 did not match original\n", line); |
|
200 return; |
|
201 } |
|
202 |
|
203 g_free (utf8_result); |
|
204 g_free (ucs4_result); |
|
205 } |
|
206 |
|
207 if (status == VALID) |
|
208 { |
|
209 gunichar2 *utf16_expected_tmp; |
|
210 gunichar2 *utf16_expected; |
|
211 gunichar2 *utf16_from_utf8; |
|
212 gunichar2 *utf16_from_ucs4; |
|
213 gunichar *ucs4_result; |
|
214 gsize bytes_written; |
|
215 gint n_chars; |
|
216 gchar *utf8_result; |
|
217 |
|
218 #if defined(G_PLATFORM_WIN32) || defined(__SYMBIAN32__) |
|
219 #define TARGET "UTF-16LE" |
|
220 #else |
|
221 #define TARGET "UTF-16" |
|
222 #endif |
|
223 |
|
224 if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8", |
|
225 NULL, &bytes_written, NULL))) |
|
226 { |
|
227 fail ("line %d: could not convert to UTF-16 via g_convert\n", line); |
|
228 return; |
|
229 } |
|
230 |
|
231 /* zero-terminate and remove BOM |
|
232 */ |
|
233 n_chars = bytes_written / 2; |
|
234 if (utf16_expected_tmp[0] == 0xfeff) /* BOM */ |
|
235 { |
|
236 n_chars--; |
|
237 utf16_expected = g_new (gunichar2, n_chars + 1); |
|
238 memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars); |
|
239 } |
|
240 else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */ |
|
241 { |
|
242 fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line); |
|
243 return; |
|
244 } |
|
245 else |
|
246 { |
|
247 utf16_expected = g_new (gunichar2, n_chars + 1); |
|
248 memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars); |
|
249 } |
|
250 |
|
251 utf16_expected[n_chars] = '\0'; |
|
252 |
|
253 if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error))) |
|
254 { |
|
255 fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); |
|
256 return; |
|
257 } |
|
258 |
|
259 if (items_read != strlen (utf8) || |
|
260 utf16_count (utf16_from_utf8) != items_written) |
|
261 { |
|
262 fail ("line %d: length error in conversion to ucs16\n", line); |
|
263 return; |
|
264 } |
|
265 |
|
266 if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error))) |
|
267 { |
|
268 fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); |
|
269 return; |
|
270 } |
|
271 |
|
272 if (items_read != ucs4_len || |
|
273 utf16_count (utf16_from_ucs4) != items_written) |
|
274 { |
|
275 fail ("line %d: length error in conversion to ucs16\n", line); |
|
276 return; |
|
277 } |
|
278 |
|
279 if (!utf16_equal (utf16_from_utf8, utf16_expected) || |
|
280 !utf16_equal (utf16_from_ucs4, utf16_expected)) |
|
281 { |
|
282 fail ("line %d: results of conversion to ucs16 do not match\n", line); |
|
283 return; |
|
284 } |
|
285 |
|
286 if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error))) |
|
287 { |
|
288 fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message); |
|
289 return; |
|
290 } |
|
291 |
|
292 if (items_read != utf16_count (utf16_from_utf8) || |
|
293 items_written != strlen (utf8)) |
|
294 { |
|
295 fail ("line %d: length error in conversion from ucs16 to utf8\n", line); |
|
296 return; |
|
297 } |
|
298 |
|
299 if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error))) |
|
300 { |
|
301 fail ("line %d: conversion back to utf8/ucs4 failed\n", line); |
|
302 return; |
|
303 } |
|
304 |
|
305 if (items_read != utf16_count (utf16_from_utf8) || |
|
306 items_written != ucs4_len) |
|
307 { |
|
308 fail ("line %d: length error in conversion from ucs16 to ucs4\n", line); |
|
309 return; |
|
310 } |
|
311 |
|
312 if (strcmp (utf8, utf8_result) != 0 || |
|
313 !ucs4_equal (ucs4, ucs4_result)) |
|
314 { |
|
315 fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line); |
|
316 return; |
|
317 } |
|
318 |
|
319 g_free (utf16_expected_tmp); |
|
320 g_free (utf16_expected); |
|
321 g_free (utf16_from_utf8); |
|
322 g_free (utf16_from_ucs4); |
|
323 g_free (utf8_result); |
|
324 g_free (ucs4_result); |
|
325 } |
|
326 } |
|
327 |
|
328 int |
|
329 main (int argc, char **argv) |
|
330 { |
|
331 gchar *srcdir = getenv ("srcdir"); |
|
332 gchar *testfile; |
|
333 gchar *contents; |
|
334 GError *error = NULL; |
|
335 gchar *p, *end; |
|
336 char *tmp; |
|
337 gint state = 0; |
|
338 gint line = 1; |
|
339 gint start_line = 0; /* Quiet GCC */ |
|
340 gchar *utf8 = NULL; /* Quiet GCC */ |
|
341 GArray *ucs4; |
|
342 Status status = VALID; /* Quiet GCC */ |
|
343 |
|
344 #ifdef __SYMBIAN32__ |
|
345 |
|
346 g_log_set_handler (NULL, G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG, &mrtLogHandler, NULL); |
|
347 g_set_print_handler(mrtPrintHandler); |
|
348 #endif /*__SYMBIAN32__*/ |
|
349 if (!srcdir) |
|
350 srcdir = "c:"; |
|
351 |
|
352 testfile = g_strconcat (srcdir, G_DIR_SEPARATOR_S "utf8.txt", NULL); |
|
353 |
|
354 g_file_get_contents (testfile, &contents, NULL, &error); |
|
355 if (error) |
|
356 { |
|
357 croak ("Cannot open utf8.txt: %s", error->message); |
|
358 |
|
359 #ifdef __SYMBIAN32__ |
|
360 testResultXml("unicode-encoding"); |
|
361 #endif /* EMULATOR */ |
|
362 |
|
363 exit(1); |
|
364 } |
|
365 |
|
366 ucs4 = g_array_new (TRUE, FALSE, sizeof(gunichar)); |
|
367 |
|
368 p = contents; |
|
369 |
|
370 /* Loop over lines */ |
|
371 while (*p) |
|
372 { |
|
373 while (*p && (*p == ' ' || *p == '\t')) |
|
374 p++; |
|
375 |
|
376 end = p; |
|
377 while (*end && (*end != '\r' && *end != '\n')) |
|
378 end++; |
|
379 |
|
380 if (!*p || *p == '#' || *p == '\r' || *p == '\n') |
|
381 goto next_line; |
|
382 |
|
383 tmp = g_strstrip (g_strndup (p, end - p)); |
|
384 |
|
385 switch (state) |
|
386 { |
|
387 case 0: |
|
388 /* UTF-8 string */ |
|
389 start_line = line; |
|
390 utf8 = tmp; |
|
391 tmp = NULL; |
|
392 break; |
|
393 |
|
394 case 1: |
|
395 /* Status */ |
|
396 if (!strcmp (tmp, "VALID")) |
|
397 status = VALID; |
|
398 else if (!strcmp (tmp, "INCOMPLETE")) |
|
399 status = INCOMPLETE; |
|
400 else if (!strcmp (tmp, "NOTUNICODE")) |
|
401 status = NOTUNICODE; |
|
402 else if (!strcmp (tmp, "OVERLONG")) |
|
403 status = OVERLONG; |
|
404 else if (!strcmp (tmp, "MALFORMED")) |
|
405 status = MALFORMED; |
|
406 else |
|
407 croak ("Invalid status on line %d\n", line); |
|
408 |
|
409 if (status != VALID && status != NOTUNICODE) |
|
410 state++; /* No UCS-4 data */ |
|
411 |
|
412 break; |
|
413 |
|
414 case 2: |
|
415 /* UCS-4 version */ |
|
416 |
|
417 p = strtok (tmp, " \t"); |
|
418 while (p) |
|
419 { |
|
420 gchar *endptr; |
|
421 |
|
422 gunichar ch = strtoul (p, &endptr, 16); |
|
423 if (*endptr != '\0') |
|
424 croak ("Invalid UCS-4 character on line %d\n", line); |
|
425 |
|
426 g_array_append_val (ucs4, ch); |
|
427 |
|
428 p = strtok (NULL, " \t"); |
|
429 } |
|
430 |
|
431 break; |
|
432 } |
|
433 |
|
434 g_free (tmp); |
|
435 state = (state + 1) % 3; |
|
436 |
|
437 if (state == 0) |
|
438 { |
|
439 process (start_line, utf8, status, (gunichar *)ucs4->data, ucs4->len); |
|
440 g_array_set_size (ucs4, 0); |
|
441 g_free (utf8); |
|
442 } |
|
443 |
|
444 next_line: |
|
445 p = end; |
|
446 if (*p && *p == '\r') |
|
447 p++; |
|
448 if (*p && *p == '\n') |
|
449 p++; |
|
450 |
|
451 line++; |
|
452 } |
|
453 |
|
454 #ifdef __SYMBIAN32__ |
|
455 testResultXml("unicode-encoding"); |
|
456 #endif /* EMULATOR */ |
|
457 return exit_status; |
|
458 } |