glib/tests/unicode-normalize.c
changeset 18 47c74d1534e1
child 34 5fae379060a7
equal deleted inserted replaced
0:e4d67989cc36 18:47c74d1534e1
       
     1 /*
       
     2 * Copyright (c) 2008 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 #undef G_DISABLE_ASSERT
       
    19 #undef G_LOG_DOMAIN
       
    20 
       
    21 #include <glib.h>
       
    22 #include <stdio.h>
       
    23 #include <stdlib.h>
       
    24 #include <string.h>
       
    25 
       
    26 #ifdef __SYMBIAN32__
       
    27 #include <glib_global.h>
       
    28 #include "mrt2_glib2_test.h"
       
    29 #endif /*__SYMBIAN32__*/
       
    30 
       
    31 gboolean success = TRUE;
       
    32 
       
    33 static char *
       
    34 decode (const gchar *input)
       
    35 {
       
    36   unsigned ch;
       
    37   int offset = 0;
       
    38   GString *result = g_string_new (NULL);
       
    39   
       
    40   do 
       
    41     {
       
    42       if (sscanf (input + offset, "%x", &ch) != 1)
       
    43 	{
       
    44 	  fprintf (stderr, "Error parsing character string %s\n", input);
       
    45 	  g_assert(FALSE && "unicode-normalize failed");
       
    46 	  #ifdef __SYMBIAN32__
       
    47   	  testResultXml("unicode-normalize");
       
    48   	  #endif /* EMULATOR */
       
    49 	  exit (1);
       
    50 	}
       
    51 
       
    52       g_string_append_unichar (result, ch);
       
    53       
       
    54       while (input[offset] && input[offset] != ' ')
       
    55 	offset++;
       
    56       while (input[offset] && input[offset] == ' ')
       
    57 	offset++;
       
    58     }
       
    59   while (input[offset]);
       
    60 
       
    61   return g_string_free (result, FALSE);
       
    62 }
       
    63 
       
    64 const char *names[4] = {
       
    65   "NFD",
       
    66   "NFC",
       
    67   "NFKD",
       
    68   "NFKC"
       
    69 };
       
    70 
       
    71 static char *
       
    72 encode (const gchar *input)
       
    73 {
       
    74   GString *result = g_string_new(NULL);
       
    75 
       
    76   const gchar *p = input;
       
    77   while (*p)
       
    78     {
       
    79       gunichar c = g_utf8_get_char (p);
       
    80       g_string_append_printf (result, "%04X ", c);
       
    81       p = g_utf8_next_char(p);
       
    82     }
       
    83 
       
    84   return g_string_free (result, FALSE);
       
    85 }
       
    86 
       
    87 static void
       
    88 test_form (int            line,
       
    89 	   GNormalizeMode mode,
       
    90 	   gboolean       do_compat,
       
    91 	   int            expected,
       
    92 	   char         **c,
       
    93 	   char         **raw)
       
    94 {
       
    95   int i;
       
    96   
       
    97   gboolean mode_is_compat = (mode == G_NORMALIZE_NFKC ||
       
    98 			     mode == G_NORMALIZE_NFKD);
       
    99 
       
   100   if (mode_is_compat || !do_compat)
       
   101     {
       
   102       for (i = 0; i < 3; i++)
       
   103 	{
       
   104 	  char *result = g_utf8_normalize (c[i], -1, mode);
       
   105 	  if (strcmp (result, c[expected]) != 0)
       
   106 	    {
       
   107 	      char *result_raw = encode(result);
       
   108 	      fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i + 1, raw[5]);
       
   109 	      fprintf (stderr, "  g_utf8_normalize (%s, %s) != %s but %s\n",
       
   110 		   raw[i], names[mode], raw[expected], result_raw);
       
   111 	      g_free (result_raw);
       
   112 	      success = FALSE;
       
   113 	    }
       
   114 	  
       
   115 	  g_free (result);
       
   116 	}
       
   117     }
       
   118   if (mode_is_compat || do_compat)
       
   119     {
       
   120       for (i = 3; i < 5; i++)
       
   121 	{
       
   122 	  char *result = g_utf8_normalize (c[i], -1, mode);
       
   123 	  if (strcmp (result, c[expected]) != 0)
       
   124 	    {
       
   125 	      char *result_raw = encode(result);
       
   126 	      fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i, raw[5]);
       
   127 	      fprintf (stderr, "  g_utf8_normalize (%s, %s) != %s but %s\n",
       
   128 		   raw[i], names[mode], raw[expected], result_raw);
       
   129 	      g_free (result_raw);
       
   130 	      success = FALSE;
       
   131 	    }
       
   132 	  
       
   133 	  g_free (result);
       
   134 	}
       
   135     }
       
   136 }
       
   137 
       
   138 static gboolean
       
   139 process_one (int line, gchar **columns)
       
   140 {
       
   141   char *c[5];
       
   142   int i;
       
   143   gboolean skip = FALSE;
       
   144 
       
   145   for (i=0; i < 5; i++)
       
   146     {
       
   147       c[i] = decode(columns[i]);
       
   148       if (!c[i])
       
   149 	skip = TRUE;
       
   150     }
       
   151 
       
   152   if (!skip)
       
   153     {
       
   154       test_form (line, G_NORMALIZE_NFD, FALSE, 2, c, columns);
       
   155       test_form (line, G_NORMALIZE_NFD, TRUE, 4, c, columns);
       
   156       test_form (line, G_NORMALIZE_NFC, FALSE, 1, c, columns);
       
   157       test_form (line, G_NORMALIZE_NFC, TRUE, 3, c, columns);
       
   158       test_form (line, G_NORMALIZE_NFKD, TRUE, 4, c, columns);
       
   159       test_form (line, G_NORMALIZE_NFKC, TRUE, 3, c, columns);
       
   160     }
       
   161 
       
   162   for (i=0; i < 5; i++)
       
   163     g_free (c[i]);
       
   164   
       
   165   return TRUE;
       
   166 }
       
   167 
       
   168 int main (int argc, char **argv)
       
   169 {
       
   170   GIOChannel *in;
       
   171   GError *error = NULL;
       
   172   GString *buffer = g_string_new (NULL);
       
   173   int line_to_do = 0;
       
   174   int line = 1;
       
   175 
       
   176   #ifdef __SYMBIAN32__
       
   177   g_log_set_handler (NULL,  G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG, &mrtLogHandler, NULL);
       
   178   g_set_print_handler(mrtPrintHandler);
       
   179   #endif /*__SYMBIAN32__*/
       
   180   if (argc != 2 && argc != 3)
       
   181     {
       
   182       fprintf (stderr, "Usage: unicode-normalize NormalizationTest.txt LINE\n");
       
   183       return 1;
       
   184     }
       
   185 
       
   186   if (argc == 3)
       
   187     line_to_do = atoi(argv[2]);
       
   188 
       
   189   in = g_io_channel_new_file (argv[1], "r", &error);
       
   190   if (!in)
       
   191     {
       
   192       fprintf (stderr, "Cannot open %s: %s\n", argv[1], error->message);
       
   193       g_assert(FALSE && "unicode-normalize failed");
       
   194       
       
   195       #ifdef __SYMBIAN32__
       
   196       testResultXml("unicode-normalize");
       
   197       #endif /* EMULATOR */
       
   198       
       
   199       return 1;
       
   200     }
       
   201 
       
   202   while (TRUE)
       
   203     {
       
   204       gsize term_pos;
       
   205       gchar **columns;
       
   206 
       
   207       if (g_io_channel_read_line_string (in, buffer, &term_pos, &error) != G_IO_STATUS_NORMAL)
       
   208 	break;
       
   209 	
       
   210       if (line_to_do && line != line_to_do)
       
   211 	goto next;
       
   212       
       
   213       buffer->str[term_pos] = '\0';
       
   214       
       
   215       if (buffer->str[0] == '#') /* Comment */
       
   216 	goto next;
       
   217       if (buffer->str[0] == '@') /* Part */
       
   218 	{
       
   219 	  fprintf (stderr, "\nProcessing %s\n", buffer->str + 1);
       
   220 	  goto next;
       
   221 	}
       
   222       
       
   223       columns = g_strsplit (buffer->str, ";", -1);
       
   224       if (!columns[0])
       
   225 	goto next;
       
   226       
       
   227       if (!process_one (line, columns))
       
   228 	return 1;
       
   229       g_strfreev (columns);
       
   230 
       
   231     next:
       
   232       g_string_truncate (buffer, 0);
       
   233       line++;
       
   234     }
       
   235 
       
   236   if (error)
       
   237     {
       
   238       fprintf (stderr, "Error reading test file, %s\n", error->message);
       
   239       g_assert(FALSE && "unicode-normalize failed");
       
   240 	  #ifdef __SYMBIAN32__
       
   241   	  testResultXml("unicode-normalize");
       
   242   	  #endif /* EMULATOR */
       
   243       return 1;
       
   244     }
       
   245 
       
   246   g_io_channel_unref (in);
       
   247   g_string_free (buffer, TRUE);
       
   248     
       
   249   #ifdef __SYMBIAN32__
       
   250   assert_failed = !success;
       
   251   testResultXml("unicode-normalize");
       
   252   #endif /* EMULATOR */
       
   253 
       
   254   return !success;
       
   255 }