|
1 /* Portion Copyright © 2008-09 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.*/ |
|
2 #undef G_DISABLE_ASSERT |
|
3 #undef G_LOG_DOMAIN |
|
4 |
|
5 #include <stdarg.h> |
|
6 #include <stdio.h> |
|
7 #include <stdlib.h> |
|
8 #include <string.h> |
|
9 #include <glib.h> |
|
10 |
|
11 #ifdef SYMBIAN |
|
12 #include "mrt2_glib2_test.h" |
|
13 #endif /*SYMBIAN*/ |
|
14 static gint exit_status = 0; |
|
15 |
|
16 static void |
|
17 croak (char *format, ...) |
|
18 { |
|
19 va_list va; |
|
20 |
|
21 va_start (va, format); |
|
22 vfprintf (stderr, format, va); |
|
23 va_end (va); |
|
24 |
|
25 exit (1); |
|
26 } |
|
27 |
|
28 static void |
|
29 fail (char *format, ...) |
|
30 { |
|
31 va_list va; |
|
32 |
|
33 va_start (va, format); |
|
34 vfprintf (stderr, format, va); |
|
35 va_end (va); |
|
36 |
|
37 exit_status |= 1; |
|
38 } |
|
39 |
|
40 typedef enum |
|
41 { |
|
42 VALID, |
|
43 INCOMPLETE, |
|
44 NOTUNICODE, |
|
45 OVERLONG, |
|
46 MALFORMED |
|
47 } Status; |
|
48 |
|
49 static gboolean |
|
50 ucs4_equal (gunichar *a, gunichar *b) |
|
51 { |
|
52 while (*a && *b && (*a == *b)) |
|
53 { |
|
54 a++; |
|
55 b++; |
|
56 } |
|
57 |
|
58 return (*a == *b); |
|
59 } |
|
60 |
|
61 static gboolean |
|
62 utf16_equal (gunichar2 *a, gunichar2 *b) |
|
63 { |
|
64 while (*a && *b && (*a == *b)) |
|
65 { |
|
66 a++; |
|
67 b++; |
|
68 } |
|
69 |
|
70 return (*a == *b); |
|
71 } |
|
72 |
|
73 static gint |
|
74 utf16_count (gunichar2 *a) |
|
75 { |
|
76 gint result = 0; |
|
77 |
|
78 while (a[result]) |
|
79 result++; |
|
80 |
|
81 return result; |
|
82 } |
|
83 |
|
84 static void |
|
85 process (gint line, |
|
86 gchar *utf8, |
|
87 Status status, |
|
88 gunichar *ucs4, |
|
89 gint ucs4_len) |
|
90 { |
|
91 const gchar *end; |
|
92 gboolean is_valid = g_utf8_validate (utf8, -1, &end); |
|
93 GError *error = NULL; |
|
94 glong items_read, items_written; |
|
95 |
|
96 switch (status) |
|
97 { |
|
98 case VALID: |
|
99 if (!is_valid) |
|
100 { |
|
101 fail ("line %d: valid but g_utf8_validate returned FALSE\n", line); |
|
102 return; |
|
103 } |
|
104 break; |
|
105 case NOTUNICODE: |
|
106 case INCOMPLETE: |
|
107 case OVERLONG: |
|
108 case MALFORMED: |
|
109 if (is_valid) |
|
110 { |
|
111 fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line); |
|
112 return; |
|
113 } |
|
114 break; |
|
115 } |
|
116 |
|
117 if (status == INCOMPLETE) |
|
118 { |
|
119 gunichar *ucs4_result; |
|
120 |
|
121 ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error); |
|
122 |
|
123 if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT)) |
|
124 { |
|
125 fail ("line %d: incomplete input not properly detected\n", line); |
|
126 return; |
|
127 } |
|
128 g_clear_error (&error); |
|
129 |
|
130 ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error); |
|
131 |
|
132 if (!ucs4_result || items_read == strlen (utf8)) |
|
133 { |
|
134 fail ("line %d: incomplete input not properly detected\n", line); |
|
135 return; |
|
136 } |
|
137 |
|
138 g_free (ucs4_result); |
|
139 } |
|
140 |
|
141 if (status == VALID || status == NOTUNICODE) |
|
142 { |
|
143 gunichar *ucs4_result; |
|
144 gchar *utf8_result; |
|
145 |
|
146 ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error); |
|
147 if (!ucs4_result) |
|
148 { |
|
149 fail ("line %d: conversion to ucs4 failed: %s\n", line, error->message); |
|
150 return; |
|
151 } |
|
152 |
|
153 if (!ucs4_equal (ucs4_result, ucs4) || |
|
154 items_read != strlen (utf8) || |
|
155 items_written != ucs4_len) |
|
156 { |
|
157 fail ("line %d: results of conversion to ucs4 do not match expected.\n", line); |
|
158 return; |
|
159 } |
|
160 |
|
161 g_free (ucs4_result); |
|
162 |
|
163 ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written); |
|
164 |
|
165 if (!ucs4_equal (ucs4_result, ucs4) || |
|
166 items_written != ucs4_len) |
|
167 { |
|
168 fail ("line %d: results of conversion to ucs4 do not match expected.\n", line); |
|
169 return; |
|
170 } |
|
171 |
|
172 utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error); |
|
173 if (!utf8_result) |
|
174 { |
|
175 fail ("line %d: conversion back to utf8 failed: %s", line, error->message); |
|
176 return; |
|
177 } |
|
178 |
|
179 if (strcmp (utf8_result, utf8) != 0 || |
|
180 items_read != ucs4_len || |
|
181 items_written != strlen (utf8)) |
|
182 { |
|
183 fail ("line %d: conversion back to utf8 did not match original\n", line); |
|
184 return; |
|
185 } |
|
186 |
|
187 g_free (utf8_result); |
|
188 g_free (ucs4_result); |
|
189 } |
|
190 |
|
191 if (status == VALID) |
|
192 { |
|
193 gunichar2 *utf16_expected_tmp; |
|
194 gunichar2 *utf16_expected; |
|
195 gunichar2 *utf16_from_utf8; |
|
196 gunichar2 *utf16_from_ucs4; |
|
197 gunichar *ucs4_result; |
|
198 gsize bytes_written; |
|
199 gint n_chars; |
|
200 gchar *utf8_result; |
|
201 |
|
202 #if defined(G_PLATFORM_WIN32) || defined(SYMBIAN) |
|
203 #define TARGET "UTF-16LE" |
|
204 #else |
|
205 #define TARGET "UTF-16" |
|
206 #endif |
|
207 |
|
208 if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8", |
|
209 NULL, &bytes_written, NULL))) |
|
210 { |
|
211 fail ("line %d: could not convert to UTF-16 via g_convert\n", line); |
|
212 return; |
|
213 } |
|
214 |
|
215 /* zero-terminate and remove BOM |
|
216 */ |
|
217 n_chars = bytes_written / 2; |
|
218 if (utf16_expected_tmp[0] == 0xfeff) /* BOM */ |
|
219 { |
|
220 n_chars--; |
|
221 utf16_expected = g_new (gunichar2, n_chars + 1); |
|
222 memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars); |
|
223 } |
|
224 else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */ |
|
225 { |
|
226 fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line); |
|
227 return; |
|
228 } |
|
229 else |
|
230 { |
|
231 utf16_expected = g_new (gunichar2, n_chars + 1); |
|
232 memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars); |
|
233 } |
|
234 |
|
235 utf16_expected[n_chars] = '\0'; |
|
236 |
|
237 if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error))) |
|
238 { |
|
239 fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); |
|
240 return; |
|
241 } |
|
242 |
|
243 if (items_read != strlen (utf8) || |
|
244 utf16_count (utf16_from_utf8) != items_written) |
|
245 { |
|
246 fail ("line %d: length error in conversion to ucs16\n", line); |
|
247 return; |
|
248 } |
|
249 |
|
250 if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error))) |
|
251 { |
|
252 fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); |
|
253 return; |
|
254 } |
|
255 |
|
256 if (items_read != ucs4_len || |
|
257 utf16_count (utf16_from_ucs4) != items_written) |
|
258 { |
|
259 fail ("line %d: length error in conversion to ucs16\n", line); |
|
260 return; |
|
261 } |
|
262 |
|
263 if (!utf16_equal (utf16_from_utf8, utf16_expected) || |
|
264 !utf16_equal (utf16_from_ucs4, utf16_expected)) |
|
265 { |
|
266 fail ("line %d: results of conversion to ucs16 do not match\n", line); |
|
267 return; |
|
268 } |
|
269 |
|
270 if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error))) |
|
271 { |
|
272 fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message); |
|
273 return; |
|
274 } |
|
275 |
|
276 if (items_read != utf16_count (utf16_from_utf8) || |
|
277 items_written != strlen (utf8)) |
|
278 { |
|
279 fail ("line %d: length error in conversion from ucs16 to utf8\n", line); |
|
280 return; |
|
281 } |
|
282 |
|
283 if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error))) |
|
284 { |
|
285 fail ("line %d: conversion back to utf8/ucs4 failed\n", line); |
|
286 return; |
|
287 } |
|
288 |
|
289 if (items_read != utf16_count (utf16_from_utf8) || |
|
290 items_written != ucs4_len) |
|
291 { |
|
292 fail ("line %d: length error in conversion from ucs16 to ucs4\n", line); |
|
293 return; |
|
294 } |
|
295 |
|
296 if (strcmp (utf8, utf8_result) != 0 || |
|
297 !ucs4_equal (ucs4, ucs4_result)) |
|
298 { |
|
299 fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line); |
|
300 return; |
|
301 } |
|
302 |
|
303 g_free (utf16_expected_tmp); |
|
304 g_free (utf16_expected); |
|
305 g_free (utf16_from_utf8); |
|
306 g_free (utf16_from_ucs4); |
|
307 g_free (utf8_result); |
|
308 g_free (ucs4_result); |
|
309 } |
|
310 } |
|
311 |
|
312 int |
|
313 main (int argc, char **argv) |
|
314 { |
|
315 gchar *srcdir = getenv ("srcdir"); |
|
316 gchar *testfile; |
|
317 gchar *contents; |
|
318 GError *error = NULL; |
|
319 gchar *p, *end; |
|
320 char *tmp; |
|
321 gint state = 0; |
|
322 gint line = 1; |
|
323 gint start_line = 0; /* Quiet GCC */ |
|
324 gchar *utf8 = NULL; /* Quiet GCC */ |
|
325 GArray *ucs4; |
|
326 Status status = VALID; /* Quiet GCC */ |
|
327 |
|
328 #ifdef SYMBIAN |
|
329 |
|
330 g_log_set_handler (NULL, G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG, &mrtLogHandler, NULL); |
|
331 g_set_print_handler(mrtPrintHandler); |
|
332 #endif /*SYMBIAN*/ |
|
333 if (!srcdir) |
|
334 srcdir = "c:"; |
|
335 |
|
336 testfile = g_strconcat (srcdir, G_DIR_SEPARATOR_S "utf8.txt", NULL); |
|
337 |
|
338 g_file_get_contents (testfile, &contents, NULL, &error); |
|
339 if (error) |
|
340 { |
|
341 croak ("Cannot open utf8.txt: %s", error->message); |
|
342 |
|
343 #ifdef SYMBIAN |
|
344 testResultXml("unicode-encoding"); |
|
345 #endif /* EMULATOR */ |
|
346 |
|
347 exit(1); |
|
348 } |
|
349 |
|
350 ucs4 = g_array_new (TRUE, FALSE, sizeof(gunichar)); |
|
351 |
|
352 p = contents; |
|
353 |
|
354 /* Loop over lines */ |
|
355 while (*p) |
|
356 { |
|
357 while (*p && (*p == ' ' || *p == '\t')) |
|
358 p++; |
|
359 |
|
360 end = p; |
|
361 while (*end && (*end != '\r' && *end != '\n')) |
|
362 end++; |
|
363 |
|
364 if (!*p || *p == '#' || *p == '\r' || *p == '\n') |
|
365 goto next_line; |
|
366 |
|
367 tmp = g_strstrip (g_strndup (p, end - p)); |
|
368 |
|
369 switch (state) |
|
370 { |
|
371 case 0: |
|
372 /* UTF-8 string */ |
|
373 start_line = line; |
|
374 utf8 = tmp; |
|
375 tmp = NULL; |
|
376 break; |
|
377 |
|
378 case 1: |
|
379 /* Status */ |
|
380 if (!strcmp (tmp, "VALID")) |
|
381 status = VALID; |
|
382 else if (!strcmp (tmp, "INCOMPLETE")) |
|
383 status = INCOMPLETE; |
|
384 else if (!strcmp (tmp, "NOTUNICODE")) |
|
385 status = NOTUNICODE; |
|
386 else if (!strcmp (tmp, "OVERLONG")) |
|
387 status = OVERLONG; |
|
388 else if (!strcmp (tmp, "MALFORMED")) |
|
389 status = MALFORMED; |
|
390 else |
|
391 croak ("Invalid status on line %d\n", line); |
|
392 |
|
393 if (status != VALID && status != NOTUNICODE) |
|
394 state++; /* No UCS-4 data */ |
|
395 |
|
396 break; |
|
397 |
|
398 case 2: |
|
399 /* UCS-4 version */ |
|
400 |
|
401 p = strtok (tmp, " \t"); |
|
402 while (p) |
|
403 { |
|
404 gchar *endptr; |
|
405 |
|
406 gunichar ch = strtoul (p, &endptr, 16); |
|
407 if (*endptr != '\0') |
|
408 croak ("Invalid UCS-4 character on line %d\n", line); |
|
409 |
|
410 g_array_append_val (ucs4, ch); |
|
411 |
|
412 p = strtok (NULL, " \t"); |
|
413 } |
|
414 |
|
415 break; |
|
416 } |
|
417 |
|
418 g_free (tmp); |
|
419 state = (state + 1) % 3; |
|
420 |
|
421 if (state == 0) |
|
422 { |
|
423 process (start_line, utf8, status, (gunichar *)ucs4->data, ucs4->len); |
|
424 g_array_set_size (ucs4, 0); |
|
425 g_free (utf8); |
|
426 } |
|
427 |
|
428 next_line: |
|
429 p = end; |
|
430 if (*p && *p == '\r') |
|
431 p++; |
|
432 if (*p && *p == '\n') |
|
433 p++; |
|
434 |
|
435 line++; |
|
436 } |
|
437 |
|
438 #ifdef SYMBIAN |
|
439 testResultXml("unicode-encoding"); |
|
440 #endif /* EMULATOR */ |
|
441 return exit_status; |
|
442 } |