|
1 /* |
|
2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 #undef G_DISABLE_ASSERT |
|
18 #undef G_LOG_DOMAIN |
|
19 |
|
20 #include <stdarg.h> |
|
21 #include <stdio.h> |
|
22 #include <stdlib.h> |
|
23 #include <string.h> |
|
24 #include <glib.h> |
|
25 |
|
26 #ifdef SYMBIAN |
|
27 #include "mrt2_glib2_test.h" |
|
28 #endif /*SYMBIAN*/ |
|
29 static gint exit_status = 0; |
|
30 |
|
31 static void |
|
32 croak (char *format, ...) |
|
33 { |
|
34 va_list va; |
|
35 |
|
36 va_start (va, format); |
|
37 vfprintf (stderr, format, va); |
|
38 va_end (va); |
|
39 |
|
40 exit (1); |
|
41 } |
|
42 |
|
43 static void |
|
44 fail (char *format, ...) |
|
45 { |
|
46 va_list va; |
|
47 |
|
48 va_start (va, format); |
|
49 vfprintf (stderr, format, va); |
|
50 va_end (va); |
|
51 |
|
52 exit_status |= 1; |
|
53 } |
|
54 |
|
55 typedef enum |
|
56 { |
|
57 VALID, |
|
58 INCOMPLETE, |
|
59 NOTUNICODE, |
|
60 OVERLONG, |
|
61 MALFORMED |
|
62 } Status; |
|
63 |
|
64 static gboolean |
|
65 ucs4_equal (gunichar *a, gunichar *b) |
|
66 { |
|
67 while (*a && *b && (*a == *b)) |
|
68 { |
|
69 a++; |
|
70 b++; |
|
71 } |
|
72 |
|
73 return (*a == *b); |
|
74 } |
|
75 |
|
76 static gboolean |
|
77 utf16_equal (gunichar2 *a, gunichar2 *b) |
|
78 { |
|
79 while (*a && *b && (*a == *b)) |
|
80 { |
|
81 a++; |
|
82 b++; |
|
83 } |
|
84 |
|
85 return (*a == *b); |
|
86 } |
|
87 |
|
88 static gint |
|
89 utf16_count (gunichar2 *a) |
|
90 { |
|
91 gint result = 0; |
|
92 |
|
93 while (a[result]) |
|
94 result++; |
|
95 |
|
96 return result; |
|
97 } |
|
98 |
|
99 static void |
|
100 process (gint line, |
|
101 gchar *utf8, |
|
102 Status status, |
|
103 gunichar *ucs4, |
|
104 gint ucs4_len) |
|
105 { |
|
106 const gchar *end; |
|
107 gboolean is_valid = g_utf8_validate (utf8, -1, &end); |
|
108 GError *error = NULL; |
|
109 glong items_read, items_written; |
|
110 |
|
111 switch (status) |
|
112 { |
|
113 case VALID: |
|
114 if (!is_valid) |
|
115 { |
|
116 fail ("line %d: valid but g_utf8_validate returned FALSE\n", line); |
|
117 return; |
|
118 } |
|
119 break; |
|
120 case NOTUNICODE: |
|
121 case INCOMPLETE: |
|
122 case OVERLONG: |
|
123 case MALFORMED: |
|
124 if (is_valid) |
|
125 { |
|
126 fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line); |
|
127 return; |
|
128 } |
|
129 break; |
|
130 } |
|
131 |
|
132 if (status == INCOMPLETE) |
|
133 { |
|
134 gunichar *ucs4_result; |
|
135 |
|
136 ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error); |
|
137 |
|
138 if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT)) |
|
139 { |
|
140 fail ("line %d: incomplete input not properly detected\n", line); |
|
141 return; |
|
142 } |
|
143 g_clear_error (&error); |
|
144 |
|
145 ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error); |
|
146 |
|
147 if (!ucs4_result || items_read == strlen (utf8)) |
|
148 { |
|
149 fail ("line %d: incomplete input not properly detected\n", line); |
|
150 return; |
|
151 } |
|
152 |
|
153 g_free (ucs4_result); |
|
154 } |
|
155 |
|
156 if (status == VALID || status == NOTUNICODE) |
|
157 { |
|
158 gunichar *ucs4_result; |
|
159 gchar *utf8_result; |
|
160 |
|
161 ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error); |
|
162 if (!ucs4_result) |
|
163 { |
|
164 fail ("line %d: conversion to ucs4 failed: %s\n", line, error->message); |
|
165 return; |
|
166 } |
|
167 |
|
168 if (!ucs4_equal (ucs4_result, ucs4) || |
|
169 items_read != strlen (utf8) || |
|
170 items_written != ucs4_len) |
|
171 { |
|
172 fail ("line %d: results of conversion to ucs4 do not match expected.\n", line); |
|
173 return; |
|
174 } |
|
175 |
|
176 g_free (ucs4_result); |
|
177 |
|
178 ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written); |
|
179 |
|
180 if (!ucs4_equal (ucs4_result, ucs4) || |
|
181 items_written != ucs4_len) |
|
182 { |
|
183 fail ("line %d: results of conversion to ucs4 do not match expected.\n", line); |
|
184 return; |
|
185 } |
|
186 |
|
187 utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error); |
|
188 if (!utf8_result) |
|
189 { |
|
190 fail ("line %d: conversion back to utf8 failed: %s", line, error->message); |
|
191 return; |
|
192 } |
|
193 |
|
194 if (strcmp (utf8_result, utf8) != 0 || |
|
195 items_read != ucs4_len || |
|
196 items_written != strlen (utf8)) |
|
197 { |
|
198 fail ("line %d: conversion back to utf8 did not match original\n", line); |
|
199 return; |
|
200 } |
|
201 |
|
202 g_free (utf8_result); |
|
203 g_free (ucs4_result); |
|
204 } |
|
205 |
|
206 if (status == VALID) |
|
207 { |
|
208 gunichar2 *utf16_expected_tmp; |
|
209 gunichar2 *utf16_expected; |
|
210 gunichar2 *utf16_from_utf8; |
|
211 gunichar2 *utf16_from_ucs4; |
|
212 gunichar *ucs4_result; |
|
213 gsize bytes_written; |
|
214 gint n_chars; |
|
215 gchar *utf8_result; |
|
216 |
|
217 #if defined(G_PLATFORM_WIN32) || defined(SYMBIAN) |
|
218 #define TARGET "UTF-16LE" |
|
219 #else |
|
220 #define TARGET "UTF-16" |
|
221 #endif |
|
222 |
|
223 if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8", |
|
224 NULL, &bytes_written, NULL))) |
|
225 { |
|
226 fail ("line %d: could not convert to UTF-16 via g_convert\n", line); |
|
227 return; |
|
228 } |
|
229 |
|
230 /* zero-terminate and remove BOM |
|
231 */ |
|
232 n_chars = bytes_written / 2; |
|
233 if (utf16_expected_tmp[0] == 0xfeff) /* BOM */ |
|
234 { |
|
235 n_chars--; |
|
236 utf16_expected = g_new (gunichar2, n_chars + 1); |
|
237 memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars); |
|
238 } |
|
239 else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */ |
|
240 { |
|
241 fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line); |
|
242 return; |
|
243 } |
|
244 else |
|
245 { |
|
246 utf16_expected = g_new (gunichar2, n_chars + 1); |
|
247 memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars); |
|
248 } |
|
249 |
|
250 utf16_expected[n_chars] = '\0'; |
|
251 |
|
252 if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error))) |
|
253 { |
|
254 fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); |
|
255 return; |
|
256 } |
|
257 |
|
258 if (items_read != strlen (utf8) || |
|
259 utf16_count (utf16_from_utf8) != items_written) |
|
260 { |
|
261 fail ("line %d: length error in conversion to ucs16\n", line); |
|
262 return; |
|
263 } |
|
264 |
|
265 if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error))) |
|
266 { |
|
267 fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); |
|
268 return; |
|
269 } |
|
270 |
|
271 if (items_read != ucs4_len || |
|
272 utf16_count (utf16_from_ucs4) != items_written) |
|
273 { |
|
274 fail ("line %d: length error in conversion to ucs16\n", line); |
|
275 return; |
|
276 } |
|
277 |
|
278 if (!utf16_equal (utf16_from_utf8, utf16_expected) || |
|
279 !utf16_equal (utf16_from_ucs4, utf16_expected)) |
|
280 { |
|
281 fail ("line %d: results of conversion to ucs16 do not match\n", line); |
|
282 return; |
|
283 } |
|
284 |
|
285 if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error))) |
|
286 { |
|
287 fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message); |
|
288 return; |
|
289 } |
|
290 |
|
291 if (items_read != utf16_count (utf16_from_utf8) || |
|
292 items_written != strlen (utf8)) |
|
293 { |
|
294 fail ("line %d: length error in conversion from ucs16 to utf8\n", line); |
|
295 return; |
|
296 } |
|
297 |
|
298 if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error))) |
|
299 { |
|
300 fail ("line %d: conversion back to utf8/ucs4 failed\n", line); |
|
301 return; |
|
302 } |
|
303 |
|
304 if (items_read != utf16_count (utf16_from_utf8) || |
|
305 items_written != ucs4_len) |
|
306 { |
|
307 fail ("line %d: length error in conversion from ucs16 to ucs4\n", line); |
|
308 return; |
|
309 } |
|
310 |
|
311 if (strcmp (utf8, utf8_result) != 0 || |
|
312 !ucs4_equal (ucs4, ucs4_result)) |
|
313 { |
|
314 fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line); |
|
315 return; |
|
316 } |
|
317 |
|
318 g_free (utf16_expected_tmp); |
|
319 g_free (utf16_expected); |
|
320 g_free (utf16_from_utf8); |
|
321 g_free (utf16_from_ucs4); |
|
322 g_free (utf8_result); |
|
323 g_free (ucs4_result); |
|
324 } |
|
325 } |
|
326 |
|
327 int |
|
328 main (int argc, char **argv) |
|
329 { |
|
330 gchar *srcdir = getenv ("srcdir"); |
|
331 gchar *testfile; |
|
332 gchar *contents; |
|
333 GError *error = NULL; |
|
334 gchar *p, *end; |
|
335 char *tmp; |
|
336 gint state = 0; |
|
337 gint line = 1; |
|
338 gint start_line = 0; /* Quiet GCC */ |
|
339 gchar *utf8 = NULL; /* Quiet GCC */ |
|
340 GArray *ucs4; |
|
341 Status status = VALID; /* Quiet GCC */ |
|
342 |
|
343 #ifdef SYMBIAN |
|
344 |
|
345 g_log_set_handler (NULL, G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG, &mrtLogHandler, NULL); |
|
346 g_set_print_handler(mrtPrintHandler); |
|
347 #endif /*SYMBIAN*/ |
|
348 if (!srcdir) |
|
349 srcdir = "c:"; |
|
350 |
|
351 testfile = g_strconcat (srcdir, G_DIR_SEPARATOR_S "utf8.txt", NULL); |
|
352 |
|
353 g_file_get_contents (testfile, &contents, NULL, &error); |
|
354 if (error) |
|
355 { |
|
356 croak ("Cannot open utf8.txt: %s", error->message); |
|
357 |
|
358 #ifdef SYMBIAN |
|
359 testResultXml("unicode-encoding"); |
|
360 #endif /* EMULATOR */ |
|
361 |
|
362 exit(1); |
|
363 } |
|
364 |
|
365 ucs4 = g_array_new (TRUE, FALSE, sizeof(gunichar)); |
|
366 |
|
367 p = contents; |
|
368 |
|
369 /* Loop over lines */ |
|
370 while (*p) |
|
371 { |
|
372 while (*p && (*p == ' ' || *p == '\t')) |
|
373 p++; |
|
374 |
|
375 end = p; |
|
376 while (*end && (*end != '\r' && *end != '\n')) |
|
377 end++; |
|
378 |
|
379 if (!*p || *p == '#' || *p == '\r' || *p == '\n') |
|
380 goto next_line; |
|
381 |
|
382 tmp = g_strstrip (g_strndup (p, end - p)); |
|
383 |
|
384 switch (state) |
|
385 { |
|
386 case 0: |
|
387 /* UTF-8 string */ |
|
388 start_line = line; |
|
389 utf8 = tmp; |
|
390 tmp = NULL; |
|
391 break; |
|
392 |
|
393 case 1: |
|
394 /* Status */ |
|
395 if (!strcmp (tmp, "VALID")) |
|
396 status = VALID; |
|
397 else if (!strcmp (tmp, "INCOMPLETE")) |
|
398 status = INCOMPLETE; |
|
399 else if (!strcmp (tmp, "NOTUNICODE")) |
|
400 status = NOTUNICODE; |
|
401 else if (!strcmp (tmp, "OVERLONG")) |
|
402 status = OVERLONG; |
|
403 else if (!strcmp (tmp, "MALFORMED")) |
|
404 status = MALFORMED; |
|
405 else |
|
406 croak ("Invalid status on line %d\n", line); |
|
407 |
|
408 if (status != VALID && status != NOTUNICODE) |
|
409 state++; /* No UCS-4 data */ |
|
410 |
|
411 break; |
|
412 |
|
413 case 2: |
|
414 /* UCS-4 version */ |
|
415 |
|
416 p = strtok (tmp, " \t"); |
|
417 while (p) |
|
418 { |
|
419 gchar *endptr; |
|
420 |
|
421 gunichar ch = strtoul (p, &endptr, 16); |
|
422 if (*endptr != '\0') |
|
423 croak ("Invalid UCS-4 character on line %d\n", line); |
|
424 |
|
425 g_array_append_val (ucs4, ch); |
|
426 |
|
427 p = strtok (NULL, " \t"); |
|
428 } |
|
429 |
|
430 break; |
|
431 } |
|
432 |
|
433 g_free (tmp); |
|
434 state = (state + 1) % 3; |
|
435 |
|
436 if (state == 0) |
|
437 { |
|
438 process (start_line, utf8, status, (gunichar *)ucs4->data, ucs4->len); |
|
439 g_array_set_size (ucs4, 0); |
|
440 g_free (utf8); |
|
441 } |
|
442 |
|
443 next_line: |
|
444 p = end; |
|
445 if (*p && *p == '\r') |
|
446 p++; |
|
447 if (*p && *p == '\n') |
|
448 p++; |
|
449 |
|
450 line++; |
|
451 } |
|
452 |
|
453 #ifdef SYMBIAN |
|
454 testResultXml("unicode-encoding"); |
|
455 #endif /* EMULATOR */ |
|
456 return exit_status; |
|
457 } |