xml/cxmllibrary/dictionary/dict_creator.c
branchRCL_3
changeset 20 889504eac4fb
equal deleted inserted replaced
19:6bcc0aa4be39 20:889504eac4fb
       
     1 /*
       
     2 * Copyright (c) 2002 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of the License "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description:   
       
    15 * Author:       Frank Richichi
       
    16 * Created:      Thu Apr 25 10:34:06 2002
       
    17 * Modified:     Thu Apr 25 11:12:35 2002 (Frank Richichi) richichi@D5250215
       
    18 * Language:     C
       
    19 * Subsystem:    N/A
       
    20 * RCS:          $Id$
       
    21 *
       
    22 */
       
    23 
       
    24 
       
    25 /* ****************************************************************
       
    26 **          Copyright 2000 - Nokia Corporation  All rights reserved.
       
    27 **          Nokia Americas
       
    28 **          6000 Connection Drive
       
    29 **          Irving, Texas 75039
       
    30 **
       
    31 **          Restricted Rights: Use, duplication, or disclosure by the
       
    32 **          U.S. Government is subject to restrictions as set forth in
       
    33 **          subparagraph (c)(1)(ii) of DFARS 252.227-7013, or in FAR
       
    34 **          52.227-19, or in FAR 52.227-14 Alt. III, as applicable.
       
    35 **
       
    36 **          This software is proprietary to and embodies the confidential
       
    37 **          technology of Nokia  Possession, use, or copying of this software 
       
    38 **          and media is authorized only pursuant to a valid written license
       
    39 **          from Nokia or an authorized sublicensor.
       
    40 **
       
    41 **          Nokia  - Wireless Software Solutions
       
    42 *****************************************************************/
       
    43 
       
    44 #include <stdio.h>
       
    45 #include <io.h>
       
    46 #include <string.h>
       
    47 #include <stdlib.h>
       
    48 #include <ctype.h>
       
    49 #include <time.h>
       
    50 
       
    51 
       
    52 /* ----------------------------------------------------------------------- **
       
    53    The following strings are used to identify the beginning of a tag or 
       
    54    attribute list in the input file.
       
    55 ** ----------------------------------------------------------------------- **/
       
    56 
       
    57 #define TAG_TABLE_HEADING   "TAGS_FOR_CODE_PAGE"
       
    58 #define ATTR_TABLE_HEADING  "ATTRIBUTES_FOR_CODE_PAGE"
       
    59 #define NO_STRINGS "NO_STRINGS"
       
    60 
       
    61 /* ----------------------------------------------------------------------- **
       
    62    Types for various tables.
       
    63 ** ----------------------------------------------------------------------- **/
       
    64 #define NAME_TABLE_T    "NW_Byte"
       
    65 #define TOKEN_TABLE_T   "NW_WBXML_DictEntry_t"
       
    66 #define CODEPAGE_TABLE_T  "NW_WBXML_Codepage_t"
       
    67 #define DICTIONARY_TABLE_T  "NW_WBXML_Dictionary_t"
       
    68 #define NAMES_T     "NW_String_UCS2Buff_t"
       
    69 #define ELEMENT_T "NW_%s_Element"
       
    70 #define ATTRIBUTE_T "NW_%s_Attribute"
       
    71 #define ELEMENT_TOKEN_T "NW_%s_ElementToken"
       
    72 #define ATTRIBUTE_TOKEN_T "NW_%s_AttributeToken"
       
    73 
       
    74 /* ----------------------------------------------------------------------- **
       
    75    Misc array limits
       
    76 ** ----------------------------------------------------------------------- **/
       
    77 #define TABLE_SIZE      255
       
    78 #define MAX_LINE        255
       
    79 #define MAX_NAME        255
       
    80 #define MAX_TOKEN       4
       
    81 #define MAX_DICT_NAME   255
       
    82 #define MAX_PUBLIC_ID   255
       
    83 #define MAX_DOC_TYPE    255
       
    84 #define MAX_START_NAME  255
       
    85 #define MAX_TYPE_NAME   255
       
    86 
       
    87 /* ----------------------------------------------------------------------- **
       
    88   Names used in the generated tables
       
    89 ** ----------------------------------------------------------------------- **/
       
    90 #define TAG_NAME    "tag"
       
    91 #define ATTR_NAME   "attribute"
       
    92 
       
    93 
       
    94 /* ----------------------------------------------------------------------- **
       
    95    Define a type for codepage storage
       
    96 ** ----------------------------------------------------------------------- **/
       
    97 typedef struct codepage_s {
       
    98   int   num;    /* the code page number */
       
    99   int   size;   /* the number of entries in table */
       
   100 } codepage_t;
       
   101 
       
   102 
       
   103 /* ----------------------------------------------------------------------- **
       
   104    Header information
       
   105 ** ----------------------------------------------------------------------- **/
       
   106 char dict_name[MAX_DICT_NAME];
       
   107 char doc_type[MAX_DOC_TYPE];
       
   108 char public_id[10];
       
   109 
       
   110 /* ----------------------------------------------------------------------- **
       
   111    Scratch tables
       
   112 ** ----------------------------------------------------------------------- **/
       
   113 char * tokens[TABLE_SIZE];
       
   114 char * names[TABLE_SIZE];
       
   115 int sorted_indexes[TABLE_SIZE];
       
   116 codepage_t tag_codepages[TABLE_SIZE];
       
   117 codepage_t attr_codepages[TABLE_SIZE];
       
   118 
       
   119 
       
   120 /* ----------------------------------------------------------------------- **
       
   121    Records time of program execution and command line arguments
       
   122 ** ----------------------------------------------------------------------- **/
       
   123 static time_t timestamp;
       
   124 static int main_argc = 1;
       
   125 static char** main_argv;
       
   126 
       
   127 /* ----------------------------------------------------------------------- **
       
   128    Globals so error clean up is easy
       
   129 ** ----------------------------------------------------------------------- **/
       
   130 static FILE* input_dict;
       
   131 static FILE* output_c;
       
   132 static FILE* output_h;
       
   133 
       
   134 static void exit_error(int exit_code) {
       
   135   static char errmsg[]
       
   136   = "#error \"Bad input dictionary data cannot complete code generation\"\n";
       
   137   if (input_dict) fclose(input_dict);
       
   138   if (output_c) {
       
   139     fprintf(output_c, errmsg);
       
   140     fclose(output_c);
       
   141   }
       
   142   if (output_h) {
       
   143     fprintf(output_h, errmsg);
       
   144     fclose(output_h);
       
   145   }
       
   146   exit(exit_code);
       
   147 }
       
   148 
       
   149 void print_usage(char* progname)
       
   150 {
       
   151   fprintf(stderr,
       
   152   "Dictionary creator - creates a .c and .h source file for a\n"
       
   153   "NW_Wbxml_Dictionary_t structure from a dictionary data file.\n"
       
   154   "\n"
       
   155   "Usage: %s data_input_file dot_c_output_file dot_h_output_file\n"
       
   156   "\n"
       
   157   " - all args are required\n"
       
   158   "\n"
       
   159   "Comments are defined to be:\n"
       
   160   "\n"
       
   161   " - A line whose first non-whitespace char is '#'\n"
       
   162   " - A blank line or line with only whitespace\n"
       
   163   " Note: Comments are NOT permitted at the end of other lines.\n"
       
   164   "\n"
       
   165   "Input format:\n"
       
   166   "\n"
       
   167   " - Dictionary name = the first non-comment line.  The dictionary\n"
       
   168   "   that will be created will use this string in the dictionary \n"
       
   169   "   name.  For example, if the dictionary name is \"wml\", the\n"
       
   170   "   following dictionary will be created:\n"
       
   171   "\n"
       
   172   "     NW_Wbxml_Dictionary_t NW_wml_dictionary = { ... }\n"
       
   173   "\n"
       
   174   " - Public id = the second non-comment line.  This is the Public \n"
       
   175   "   Identifier as specified in section 7.2 of the WBML spec.  If\n"
       
   176   "   a public id has not been defined, use \"1\".\n"
       
   177   "\n"
       
   178   " - Doc Type = the third non-comment line.  This is the source\n"
       
   179   "   document's Document Type Declaration.\n"
       
   180   "\n"
       
   181   " - The beginning of a Tag table has the following syntax:\n"
       
   182   "\n"
       
   183   "     TAGS_FOR_CODE_PAGE <codepage_number> [NO_STRINGS]\n"
       
   184   "\n"
       
   185   "     <codepage_number> is required and is a base 10 number\n"
       
   186   "\n"
       
   187   "     [NO_STRINGS] is an optional keyword which means output only\n"
       
   188   "     the token structures, treat all token names as empty strings\n"
       
   189   "\n"
       
   190   " - The beginning of an Attribute table has the following syntax:\n"
       
   191   "\n"
       
   192   "     ATTRIBUTES_FOR_CODE_PAGE <codepage_number> [NO_STRINGS]\n"
       
   193   "     \n"
       
   194   "     <codepage_number> is required and is a base 10 number\n"
       
   195   "\n"
       
   196   "     [NO_STRINGS] is an optional keyword which means output only\n"
       
   197   "     the token structures, treat all token names as empty strings\n"
       
   198   "\n"
       
   199   " - The syntax for an entry is the same for both Tag and Attribute \n"
       
   200   "   tables\n"
       
   201   "\n"
       
   202   "     <token> <name>\n"
       
   203   "\n"
       
   204   "     <token> must be a hexadecimal number written as 0x?? (e.g., 0x07)\n"
       
   205   "     NOTE: Tokens are sorted as strings so 0x7 MUST be entered as 0x07\n"
       
   206   "           or the sorting won't work!  It is ok to mix case as all hex\n"
       
   207   "           digits are converted to lower case before the sort.\n"
       
   208   "\n"
       
   209   "     <name> is any sequence of printable characters without whitespace\n"
       
   210   "\n"
       
   211   " - The input tables do NOT have to be sorted \n"
       
   212   "\n"
       
   213   " - If a language has \"Attribute Value Tokens\", they should be included\n"
       
   214   "   in the Attribute table\n"
       
   215   "\n"
       
   216   "Example input file (example.dict):\n"
       
   217   "\n"
       
   218   "  #\n"
       
   219   "  # Sample dictionary for the WML language\n"
       
   220   "  #\n"
       
   221   "  wml\n"
       
   222   "  #\n"
       
   223   "  # WML version 1.2 has a public id of 9\n"
       
   224   "  #\n"
       
   225   "  9\n"
       
   226   "  #\n"
       
   227   "  # WML 1.2 doc type\n"
       
   228   "  #\n"
       
   229   "  -//WAPFORUM//DTD WML 1.1//EN\n"
       
   230   "  #\n"
       
   231   "  # Tags\n"
       
   232   "  #\n"
       
   233   "  TAGS_FOR_CODE_PAGE 0\n"
       
   234   "  0x2b go\n"
       
   235   "  0x1d td\n"
       
   236   "  #\n"
       
   237   "  # Attributes\n"
       
   238   "  #\n"
       
   239   "  ATTRIBUTES_FOR_CODE_PAGE 0\n"
       
   240   "  0x12 format\n"
       
   241   "  0xA0 wrap\n"
       
   242   "  0x23 newcontext=true\n"
       
   243   "  0x8F http://www.\n"
       
   244   "\n"
       
   245   "Example run command:\n"
       
   246   "\n"
       
   247   "  %s example.dict example.c example.h\n",
       
   248   progname, progname);
       
   249 }
       
   250 
       
   251 static void print_automaticallyGeneratedCodeWarning(FILE *f)
       
   252 {
       
   253   int i;
       
   254   static char automaticallyGeneratedCodeWarning[] =
       
   255   "/*\n"
       
   256   "** WARNING\n"
       
   257   "**\n"
       
   258   "** DO NOT EDIT - THIS CODE IS AUTOMATICALLY GENERATED\n"
       
   259   "**               FROM A DATA FILE BY THE DICTIONARY CREATION PROGRAM";
       
   260 
       
   261   fprintf(f, "%s\n", automaticallyGeneratedCodeWarning);
       
   262   fprintf(f,
       
   263           "**\n"
       
   264           "** This file generated on %s"
       
   265           "**                        (coordinated universal time)\n"
       
   266           "**\n"
       
   267           "** Command line: ",
       
   268           asctime(gmtime(&timestamp))); /* asctime() generates a newline at the end */
       
   269   fprintf(f, "%s", main_argv[0]);
       
   270   for (i = 1; i < main_argc; i++) {
       
   271     fprintf(f, " %s", main_argv[i]);
       
   272   }
       
   273   fprintf(f, "\n*/");
       
   274 }
       
   275 
       
   276 static void print_copyright(FILE *f)
       
   277 {
       
   278   struct tm* tm_time;
       
   279   static const char copyright[] =
       
   280   "/* ****************************************************************\n"
       
   281   "**          Copyright %d - Nokia Corporation  All rights reserved.\n"
       
   282   "**          Nokia Americas\n"
       
   283   "**          6000 Connection Drive\n"
       
   284   "**          Irving, Texas 75039\n"
       
   285   "**\n"
       
   286   "**          Restricted Rights: Use, duplication, or disclosure by the\n"
       
   287   "**          U.S. Government is subject to restrictions as set forth in\n"
       
   288   "**          subparagraph (c)(1)(ii) of DFARS 252.227-7013, or in FAR\n"
       
   289   "**          52.227-19, or in FAR 52.227-14 Alt. III, as applicable.\n"
       
   290   "**\n"
       
   291   "**          This software is proprietary to and embodies the confidential\n"
       
   292   "**          technology of Nokia  Possession, use, or copying of this software\n"
       
   293   "**          and media is authorized only pursuant to a valid written license\n"
       
   294   "**          from Nokia or an authorized sublicensor.\n"
       
   295   "**\n"
       
   296   "**          Nokia  - Wireless Software Solutions\n"
       
   297   "*****************************************************************/";
       
   298 
       
   299   tm_time = gmtime(&timestamp);
       
   300   fprintf(f, copyright,
       
   301           tm_time->tm_year + 1900);
       
   302 }
       
   303 
       
   304 static FILE * open_file(char* fn, char *perms){
       
   305   FILE* f;
       
   306   
       
   307   if ((fn == 0)||((f = fopen(fn, perms)) == 0)){    
       
   308     return 0;
       
   309   }else{    
       
   310     return f;
       
   311   }
       
   312   
       
   313 }
       
   314 
       
   315 static init() {
       
   316   int i;
       
   317 
       
   318   for (i=0; i < TABLE_SIZE; i++) {
       
   319     tag_codepages[i].num = -1;
       
   320     attr_codepages[i].num = -1;
       
   321   }
       
   322 }
       
   323 
       
   324 
       
   325 /* ----------------------------------------------------------------------- **
       
   326   Read the next line of input and store it in s.  
       
   327  
       
   328   NOTE: s will have the new-line character stripped
       
   329  
       
   330   RETURN: 1 for success and 0 for EOF or failure 
       
   331 ** ----------------------------------------------------------------------- **/
       
   332 static int get_line(FILE *f, char s[], int n){
       
   333   int i = 0, j, indx, len;
       
   334   char line[MAX_LINE];
       
   335 
       
   336   for(;;) {
       
   337     if ((fgets(line, n, f)) == 0) {
       
   338       /* Either EOF or an error occurred */
       
   339       return 0;
       
   340     }
       
   341     len = (int) strlen(line);
       
   342     /* Skip any preceeding whitespace */
       
   343     for (i=0; i < len; i++) {
       
   344       if (isspace(line[i]))
       
   345       continue;
       
   346       break;
       
   347     }
       
   348     if (i >= len || line[i] == '#')
       
   349       continue;
       
   350     break;
       
   351   }
       
   352 
       
   353   /* Fill in s */
       
   354   for (j=i, indx = 0; j < (int) strlen(line); i++, j++, indx++) {
       
   355     if (line[j] == '\n') {
       
   356       break;
       
   357     }
       
   358     if (line[j] == '\t') {
       
   359       /* convert tab to space for isprint() test */
       
   360       line[j] = ' ';
       
   361     }
       
   362     if (!isprint(line[j])) {
       
   363       /* stops on bogus char */
       
   364       fprintf(stderr,
       
   365               "ERROR: Illegal character (may be control char) in input text "
       
   366               "near file byte offset %ld\n",
       
   367               ftell(f));
       
   368       exit_error(1);
       
   369     }
       
   370     s[indx] = line[j];
       
   371     if (indx + 1 == MAX_LINE) break;
       
   372   }
       
   373 
       
   374   if (indx > 0)
       
   375     s[indx] = '\000';
       
   376 
       
   377   return 1;
       
   378 }
       
   379 
       
   380 
       
   381 /* ----------------------------------------------------------------------- **
       
   382    Read the a line of input and break it into three items:
       
   383    For table header
       
   384       1. Table type name
       
   385       2. Code page number
       
   386       3. Optional "NO_STRINGS"
       
   387    For table entry
       
   388       1. 0x?? token value
       
   389       2. token string (optional, extends from first nonspace to eol)
       
   390       3. null
       
   391  
       
   392   RETURN: 1 for success and 0 for EOF or failure
       
   393  ** ----------------------------------------------------------------------- **/
       
   394 static int get_tuple(FILE *f, char item1[], char item2[], char item3[])
       
   395 {
       
   396   char line[MAX_LINE];
       
   397   int i;
       
   398   int j;
       
   399   int len;
       
   400 
       
   401   if ((get_line(f, line, MAX_LINE)) != 1) {
       
   402     return 0;
       
   403   }
       
   404 
       
   405   len = (int)strlen(line);
       
   406 
       
   407   /* tablename or token hex value */
       
   408   j = 0;
       
   409   for (i = 0; i < len; i++) {
       
   410     if (isspace(line[i])) break;
       
   411     if (item1[0] == '0' && j == MAX_TOKEN) {
       
   412       /* have to check for leading '0' because this func is called to process
       
   413       TAG_TABLE_HEADING lines too where length could be longer than MAX_TOKEN */
       
   414       fprintf(stderr, "ERROR: token value too long near input file byte offset %ld\n",
       
   415               ftell(f));
       
   416       exit_error(1);
       
   417       break;
       
   418     }
       
   419     item1[j++] = line[i];
       
   420   }
       
   421   item1[j] = '\0';
       
   422 
       
   423   /* whitespace */
       
   424   for (; i < len; i++) {
       
   425     if (!isspace(line[i])) break;
       
   426   }
       
   427 
       
   428   /* codepage or token string */
       
   429   j = 0;
       
   430   for (/* continue with i */; i < len; i++) {
       
   431     if (item1[0] != '0' && isspace(line[i])) break;
       
   432     if (j == MAX_NAME) {
       
   433       fprintf(stderr, "ERROR: token string too long near input file byte offset %ld\n",
       
   434               ftell(f));
       
   435       exit_error(1);
       
   436       break;
       
   437     }
       
   438     item2[j++] = line[i];
       
   439   }
       
   440   item2[j] = '\0';
       
   441 
       
   442   /* whitespace */
       
   443   for (; i < len; i++) {
       
   444     if (!isspace(line[i])) break;
       
   445   }
       
   446 
       
   447   /* nostrings or nothing */
       
   448   j = 0;
       
   449   for (/* continue with i */; i < len; i++) {
       
   450     item3[j++] = line[i];
       
   451   }
       
   452   item3[j] = '\0';
       
   453 
       
   454   return 1;
       
   455 }
       
   456 
       
   457 static void process_header(FILE *f)
       
   458 {
       
   459   if ((get_line(f, dict_name, MAX_DICT_NAME)) != 1) {
       
   460     fprintf(stderr, "ERROR: reading dictionary name\n");
       
   461     exit_error(1);
       
   462   }
       
   463 
       
   464   if ((get_line(f, public_id, MAX_PUBLIC_ID)) != 1) {
       
   465     fprintf(stderr, "ERROR: reading public id\n");
       
   466     exit_error(1);
       
   467   }
       
   468 
       
   469   if ((get_line(f, doc_type, MAX_DOC_TYPE)) != 1) {
       
   470     fprintf(stderr, "ERROR: reading doc type\n");
       
   471     exit_error(1);
       
   472   }
       
   473 }
       
   474 
       
   475 static void print_file_header(FILE *output, FILE *outputHeader)
       
   476 {
       
   477   fprintf(output, "/*\n");
       
   478   fprintf(output, " * Dictionary = %s\n", dict_name);
       
   479   fprintf(output, " * Public id = %s\n", public_id);
       
   480   fprintf(output, " * Doc Type = %s\n", doc_type);
       
   481   fprintf(output, " */\n");
       
   482   fprintf(output, "\n");
       
   483 
       
   484   fprintf(output, "#include \"%s\"\n", main_argv[3]);
       
   485   fprintf(output, "#include \"xml\/cxml\/nw_wbxml_dictionary.h\"\n");
       
   486 
       
   487   fprintf(output, "\n");
       
   488 
       
   489   /* Including the typedef for public id */
       
   490   fprintf(outputHeader, "#define NW_%s_PublicId %s\n", dict_name, public_id);
       
   491 }
       
   492 
       
   493 static void print_table_header(FILE *output, char * table_type, char *type, char *key, char *code_page, int n)
       
   494 {
       
   495   print_automaticallyGeneratedCodeWarning(output);
       
   496   fprintf(output, "\n\n");
       
   497   fprintf(output, "/*\n");
       
   498   fprintf(output, " * %s entries - sorted by %s\n", type, key);
       
   499   fprintf(output, " */\n");
       
   500   fprintf(output, "static const\n");
       
   501   fprintf(output, "%s NW_%s_%s_%s_%s[%d] = {\n", table_type, dict_name, type, key, code_page, n);
       
   502 }
       
   503 
       
   504 static void process_entry(int i, char *token, char *name) 
       
   505 {
       
   506   tokens[i] = strdup(token);
       
   507   names[i] = strdup(name);
       
   508   sorted_indexes[i] = i;
       
   509 }
       
   510 
       
   511 
       
   512 /* ----------------------------------------------------------------------- **
       
   513    Print the table of tokens and names - sorted by token
       
   514    Also create a list of the items, sorted by name 
       
   515  ** ----------------------------------------------------------------------- **/
       
   516 static void print_token_table(FILE *output, FILE* outputHeader,
       
   517                               char *type, char *key, char *code_page, int n,
       
   518                               int use_strings)
       
   519 {
       
   520   static char tagTypeString[MAX_TYPE_NAME];
       
   521   static char tokenTypeString[MAX_TYPE_NAME];
       
   522 
       
   523   /* Must first sort by token */
       
   524   int i, j;
       
   525   char *tmp_token;
       
   526   char *tmp_name;
       
   527   char *tagType = &tagTypeString[0];
       
   528 
       
   529   if (strcmp("tag", type) == 0)
       
   530   {
       
   531     (void)sprintf(tagTypeString, ELEMENT_T, dict_name);
       
   532     (void)sprintf(tokenTypeString, ELEMENT_TOKEN_T, dict_name);
       
   533   }
       
   534   else if (strcmp("attribute", type) == 0)
       
   535   {
       
   536     (void)sprintf(tagTypeString, ATTRIBUTE_T, dict_name);
       
   537     (void)sprintf(tokenTypeString, ATTRIBUTE_TOKEN_T, dict_name);
       
   538   }
       
   539   else {
       
   540     fprintf(stderr, "ERROR: internal error\n");
       
   541     exit_error(1);
       
   542   }
       
   543 
       
   544   /* make all the tokens lower case */
       
   545   for (i=0; i < n; i++) {
       
   546     int l = (int)strlen(tokens[i]);
       
   547     if (l != 4) {
       
   548       fprintf(stderr, 
       
   549               "ERROR: Badly formatted token %s\n"
       
   550               "All token values must be in the form 0x?? (e.g., 0x07) "
       
   551               "or the sorting algorithm won't work.\n"
       
   552               , tokens[i]);
       
   553       exit_error(1);
       
   554     }
       
   555     for (j=0; j < l; j++) {
       
   556       if (isupper(tokens[i][j])) {
       
   557         tokens[i][j] = (char)tolower(tokens[i][j]);
       
   558       }
       
   559       if ((tokens[i][j] != 'x') && isalpha(tokens[i][j])) {
       
   560         if ((tokens[i][j] < 'a') || (tokens[i][j] > 'f')) {
       
   561           fprintf(stderr, 
       
   562                   "ERROR: Illegal hex digit in token %s\n"
       
   563                   , tokens[i]);
       
   564           exit_error(1);
       
   565         }
       
   566       }
       
   567     }
       
   568   }
       
   569 
       
   570   for (i=0; i < n-1; i++) {
       
   571     for (j=0; j < n-1; j++) {
       
   572       /* 
       
   573        * Sort by token 
       
   574        */
       
   575       if ((strcmp(tokens[j], tokens[j+1])) > 0) {
       
   576         /* Swap the two elements */
       
   577         tmp_token = tokens[j];
       
   578         tmp_name = names[j];
       
   579         tokens[j] = tokens[j+1];
       
   580         names[j] = names[j+1];
       
   581         tokens[j+1] = tmp_token;
       
   582         names[j+1] = tmp_name;
       
   583       }
       
   584     }
       
   585   }
       
   586 
       
   587   /*
       
   588    * print variables
       
   589    */
       
   590   fprintf(output, "\n");
       
   591   for (i=0; i < n; i++) {
       
   592     char tempName[MAX_NAME+1];
       
   593     int l;
       
   594     if (isdigit(names[i][0])) {
       
   595       tempName[0] = '_';
       
   596       strcpy(tempName+1, names[i]);
       
   597     } else {
       
   598       strcpy(tempName, names[i]);
       
   599     }
       
   600     l = (int)strlen(tempName);
       
   601     for (j = 0; j < l; j++) {
       
   602       if (isdigit(tempName[j])
       
   603           || isalpha(tempName[j])
       
   604           || (tempName[j] == '_')) {
       
   605         continue;
       
   606       }
       
   607       tempName[j] = '_';
       
   608     }
       
   609     if (use_strings) {
       
   610       fprintf(output, 
       
   611               "static const NW_Ucs2 %sTag_%s[] = {"
       
   612               , tagType, tempName);
       
   613       for (j=0; j < (int) strlen(names[i]); j++) {
       
   614         if (names[i][j] == '\\') {
       
   615           fprintf(output, "\'\\\\\',");
       
   616         } else {
       
   617           fprintf(output, "\'%c\',", names[i][j]);
       
   618         }
       
   619       }
       
   620       fprintf(output, "\'\\0\'};\n");
       
   621     }
       
   622   }
       
   623   if (!use_strings) {
       
   624     fprintf(output,
       
   625             "static const NW_Ucs2 %sTag_emptyString_%s[] = { \'\\0\' };\n"
       
   626             , tagType, code_page);
       
   627   }
       
   628   fprintf(output, "\n");
       
   629 
       
   630   print_table_header(output, TOKEN_TABLE_T, type, key, code_page, n);
       
   631   if (use_strings) {
       
   632     fprintf(outputHeader, 
       
   633             "\ntypedef enum %sToken_%s_e{\n", 
       
   634             tagType, code_page);
       
   635   }
       
   636  /*
       
   637   * Print the table
       
   638   */
       
   639   for (i=0; i < n; i++) {
       
   640     char tempName[MAX_NAME+1] ;
       
   641     char tempToken[6] ;
       
   642     char *token;
       
   643     int l;
       
   644     if (isdigit(names[i][0])) {
       
   645       tempName[0] = '_';
       
   646       strcpy(tempName+1, names[i]);
       
   647     } else {
       
   648       strcpy(tempName, names[i]);
       
   649     }
       
   650     l = (int)strlen(tempName);
       
   651     for (j = 0; j < l; j++) {
       
   652       if (isdigit(tempName[j])
       
   653           || isalpha(tempName[j])
       
   654           || (tempName[j] == '_')) {
       
   655         continue;
       
   656       }
       
   657       tempName[j] = '_';
       
   658     }
       
   659     strcpy(tempToken, tokens[i]);
       
   660     token = strchr(tempToken, 'x');
       
   661     token++;
       
   662     if (use_strings) {
       
   663       fprintf(output, "\t{%s, (%s *) %sTag_%s", tokens[i], NAMES_T, tagType, tempName);
       
   664       fprintf(outputHeader, "\t%s_%s = 0x0%s%s", tokenTypeString, tempName, code_page, token);
       
   665     } else {
       
   666       fprintf(output, "\t{%s, &%sTag_emptyString_%s", tokens[i], tagType, code_page);
       
   667     }
       
   668     
       
   669     if (i == (n-1))
       
   670     {
       
   671       fprintf(output, "}\n");
       
   672       if (use_strings) {
       
   673         fprintf(outputHeader, "\n");
       
   674       }
       
   675     }
       
   676     else
       
   677     {
       
   678       fprintf(output, "},\n");
       
   679       if (use_strings) {
       
   680         fprintf(outputHeader, ",\n");
       
   681       }
       
   682     }
       
   683   }
       
   684   fprintf(output, "};\n\n");
       
   685   if (use_strings) {
       
   686     fprintf(outputHeader, "}%sToken_%s_t;\n\n", tagType, code_page);
       
   687   }
       
   688 
       
   689   if (use_strings) {
       
   690     /*
       
   691     * Create an array of the names sorted by index
       
   692     */
       
   693     for (i=0; i < n-1; i++) {
       
   694       for (j=0; j < n-1; j++) {
       
   695         /*
       
   696         * Since we will need an array of the names sorted by index,
       
   697         * generate that arrary now.
       
   698         */
       
   699         if ((strcmp(names[j], names[j+1])) > 0) {
       
   700           /* Swap the two names */
       
   701           int tmp_token;
       
   702           tmp_name = names[j];
       
   703           tmp_token = sorted_indexes[j];
       
   704           names[j] = names[j+1];
       
   705           names[j+1] = tmp_name;
       
   706           sorted_indexes[j] = sorted_indexes[j+1];
       
   707           sorted_indexes[j+1] = tmp_token;
       
   708         }
       
   709       }
       
   710     }
       
   711   }
       
   712 }
       
   713 
       
   714 static void cache_codepage(codepage_t table[], char *cp_num, int n)
       
   715 {
       
   716   int num = atoi(cp_num);
       
   717 
       
   718   if (num >= TABLE_SIZE) {
       
   719     fprintf(stderr, "ERROR: Codepage '%d' is too large!\n", num);
       
   720     exit(1);
       
   721   }
       
   722 
       
   723   table[num].num = num; 
       
   724   table[num].size = n; 
       
   725 }
       
   726 
       
   727 /*
       
   728  * Print a table of the names 
       
   729  */
       
   730 static void print_name_table(FILE *output, char *type, char *key, char *code_page, int n,
       
   731                              int use_strings)
       
   732 {
       
   733   int i;
       
   734 
       
   735   print_table_header(output, NAME_TABLE_T, type, key, code_page, n);
       
   736 
       
   737   for (i=0; i < n; i++) {
       
   738     if (use_strings) {
       
   739       fprintf(output, "\t%d,\n", sorted_indexes[i]);
       
   740     } else {
       
   741       fprintf(output, "\t0,\n");
       
   742     }
       
   743   }
       
   744 
       
   745   fprintf(output, "};\n");
       
   746 }
       
   747 
       
   748 static process_content(FILE *f, FILE *output, FILE* outputHeader)
       
   749 {
       
   750   char token[MAX_START_NAME+1]; /* Must be big enough to hold a 
       
   751                                    tag/attr start string */
       
   752   char name[MAX_NAME+1];
       
   753   char optional[MAX_LINE+1];
       
   754   char *tag_code_page = "";
       
   755   char *attr_code_page = "";
       
   756   char processing_tag = 2;  /* processing state: 1 = tag, 0 = attribute, 2 = init */
       
   757   int n = 0;
       
   758   int use_strings = 1;
       
   759 
       
   760   for(;;) {
       
   761 
       
   762     if ((get_tuple(f, token, name, optional)) != 1) {
       
   763       break;
       
   764     }
       
   765 
       
   766     if (!strcmp(token, TAG_TABLE_HEADING)) {
       
   767       use_strings = strcmp(optional, NO_STRINGS);
       
   768       if (processing_tag == 1) {
       
   769         /* Process the current tag table */
       
   770         print_token_table(output, outputHeader, TAG_NAME, "token", tag_code_page, n, use_strings);
       
   771         print_name_table(output, TAG_NAME, "name", tag_code_page, n, use_strings);
       
   772         cache_codepage(tag_codepages, tag_code_page, n);
       
   773         n = 0;
       
   774       } else if (processing_tag == 0) {
       
   775         /* Process the current attribute table */
       
   776         print_token_table(output, outputHeader, ATTR_NAME, "token", attr_code_page, n, use_strings);
       
   777         print_name_table(output, ATTR_NAME, "name", attr_code_page, n, use_strings);
       
   778         cache_codepage(attr_codepages, attr_code_page, n);
       
   779         n = 0;
       
   780       }
       
   781       tag_code_page = strdup(name);
       
   782       processing_tag = 1;
       
   783     }
       
   784     else if (!strcmp(token, ATTR_TABLE_HEADING)) {
       
   785       use_strings = strcmp(optional, NO_STRINGS);
       
   786       if (processing_tag == 1) {
       
   787         /* Process the current tag table */
       
   788         print_token_table(output, outputHeader, TAG_NAME, "token", tag_code_page, n, use_strings);
       
   789         print_name_table(output, TAG_NAME, "name", tag_code_page, n, use_strings);
       
   790         cache_codepage(tag_codepages, tag_code_page, n);
       
   791         n = 0;
       
   792       } else if (processing_tag == 0) {
       
   793         /* Process the current attribute table */
       
   794         print_token_table(output, outputHeader, ATTR_NAME, "token", attr_code_page, n, use_strings);
       
   795         print_name_table(output, ATTR_NAME, "name", attr_code_page, n, use_strings);
       
   796         cache_codepage(attr_codepages, attr_code_page, n);
       
   797         n = 0;
       
   798       }
       
   799       attr_code_page = strdup(name);
       
   800       processing_tag = 0;
       
   801     } else {
       
   802       process_entry(n, token, name);
       
   803       n++;
       
   804     }
       
   805   }
       
   806 
       
   807   if (processing_tag == 2) {
       
   808     fprintf(stderr, 
       
   809             "ERROR: Could not find tag or attribute table starts in file.\n"
       
   810             "       Input file syntax has changed.\n"
       
   811             "       See usage by executing this program with no arguments.\n");
       
   812     exit_error(1);
       
   813   }
       
   814 
       
   815   /*
       
   816    * If anything is left, process it
       
   817    */
       
   818   if (n > 0) {
       
   819     if (processing_tag == 1) {
       
   820       print_token_table(output, outputHeader, TAG_NAME, "token", tag_code_page, n, use_strings);
       
   821       print_name_table(output, TAG_NAME, "name", tag_code_page, n, use_strings);
       
   822       cache_codepage(tag_codepages, tag_code_page, n);
       
   823     } else if (processing_tag == 0) {
       
   824       print_token_table(output, outputHeader, ATTR_NAME, "token", attr_code_page, n, use_strings);
       
   825       print_name_table(output, ATTR_NAME, "name", attr_code_page, n, use_strings);
       
   826       cache_codepage(attr_codepages, attr_code_page, n);
       
   827     }
       
   828   }
       
   829 }
       
   830 
       
   831 static void get_num_codepages(codepage_t cp[], int *n, int *max)
       
   832 {
       
   833   int i;
       
   834   /* Determine the number of codepages */
       
   835   for (i=0; i < TABLE_SIZE; i++) {
       
   836     if (cp[i].num != -1) {
       
   837       *max = i;
       
   838       (*n)++;
       
   839     }
       
   840   }
       
   841 }
       
   842 
       
   843 static void print_codepage_table(FILE *output, codepage_t cp[], int n, int max, char *type)
       
   844 {
       
   845   int i;
       
   846 
       
   847   if (n == 0)
       
   848   return;
       
   849 
       
   850   fprintf(output, "static const\n");
       
   851   fprintf(output, "%s NW_%s_%s_codepages[%d] = {\n", CODEPAGE_TABLE_T, 
       
   852       dict_name, type, max + 1);
       
   853 
       
   854   for (i=0; i <= max && i < TABLE_SIZE; i++) {
       
   855     if (cp[i].num == -1) {
       
   856       fprintf(output, "\t{0, 0, 0},\n");
       
   857     } else {
       
   858       fprintf(output, "\t{%d, (%s*)&NW_%s_%s_token_%d[0], ", 
       
   859       cp[i].size, TOKEN_TABLE_T, dict_name, type, cp[i].num);
       
   860       fprintf(output, "(NW_Byte *)&NW_%s_%s_name_%d[0]},\n", 
       
   861       dict_name, type, cp[i].num);
       
   862     }
       
   863   }
       
   864   fprintf(output, "};\n");
       
   865 }
       
   866 
       
   867 static void print_codepage_tables(FILE *output) 
       
   868 {
       
   869   int n=0, max=0;
       
   870 
       
   871   /* Tag tables */
       
   872   fprintf(output, 
       
   873           "\n"
       
   874           "/*\n"
       
   875           " * Tag codepage table\n"
       
   876           " */\n"
       
   877           );
       
   878 
       
   879   get_num_codepages(tag_codepages, &n, &max);
       
   880   print_codepage_table(output, tag_codepages, n, max, TAG_NAME);
       
   881 
       
   882   /* Attr tables */
       
   883   fprintf(output, 
       
   884           "\n"
       
   885           "/*\n"
       
   886           " * Attribute codepage table\n"
       
   887           " */\n"
       
   888           );
       
   889 
       
   890   n = max = 0;
       
   891   get_num_codepages(attr_codepages, &n, &max);
       
   892   print_codepage_table(output, attr_codepages, n, max, ATTR_NAME);
       
   893 }
       
   894 
       
   895 static void add_codepage(FILE *output, codepage_t cp[], char *type)
       
   896 {
       
   897   int n=0, max=0;
       
   898 
       
   899   get_num_codepages(cp, &n, &max);
       
   900 
       
   901   if (n == 0)
       
   902     fprintf(output, "\t0, 0,\n");
       
   903   else
       
   904     fprintf(output, "\t%d, (%s*)&NW_%s_%s_codepages[0],\n", max + 1, CODEPAGE_TABLE_T, dict_name, type);
       
   905 }
       
   906 
       
   907 static void print_dictionary_table(FILE *output, FILE *outputHeader)
       
   908 {
       
   909   int i;
       
   910   if (strlen(doc_type) > 0)
       
   911   {
       
   912     fprintf(output, "\nstatic const NW_Ucs2 NW_%s_docType[] = {", dict_name);
       
   913     for (i=0; i < (int) strlen(doc_type); i++)
       
   914       fprintf(output, "\'%c\',", doc_type[i]);
       
   915     fprintf(output, "\'\\0\'};\n");
       
   916   }
       
   917 
       
   918   fprintf(output, 
       
   919           "\n"
       
   920           "/*\n"
       
   921           " * Dictionary\n"
       
   922           " */\n"
       
   923           );
       
   924 
       
   925   fprintf(outputHeader, "extern const %s NW_%s_WBXMLDictionary\n",
       
   926           DICTIONARY_TABLE_T, dict_name);
       
   927   fprintf(output, "%s NW_%s_WBXMLDictionary = {\n", DICTIONARY_TABLE_T, dict_name);
       
   928   fprintf(output, "\tNW_%s_PublicId,\n", dict_name);
       
   929 
       
   930   /* Print the doc type as a UCS2 string */
       
   931   fprintf(output, "\t(%s *)NW_%s_docType,\n", "NW_Ucs2", dict_name);
       
   932 
       
   933   /* Add the tag and attribute code page */
       
   934   add_codepage(output, tag_codepages, TAG_NAME);
       
   935   add_codepage(output, attr_codepages, ATTR_NAME);
       
   936 
       
   937   fprintf(output, "};\n");
       
   938 }
       
   939 
       
   940 static void process_file(FILE *f, FILE *output, FILE* outputHeader)
       
   941 {
       
   942   process_header(f);
       
   943 
       
   944   print_file_header(output, outputHeader);
       
   945 
       
   946   process_content(f, output, outputHeader);
       
   947 
       
   948   print_codepage_tables(output);
       
   949 
       
   950   print_dictionary_table(output, outputHeader);
       
   951 }
       
   952 
       
   953 int main(int argc, char ** argv){
       
   954   if (argc < 4) {
       
   955     print_usage(argv[0]);
       
   956     exit_error(1);
       
   957   }
       
   958 
       
   959   init();
       
   960 
       
   961   input_dict = NULL;
       
   962   output_c = NULL;
       
   963   output_h = NULL;
       
   964 
       
   965   /* ----------------------------------------------------------------------- **
       
   966      Save info used in print_automaticallyGeneratedCodeWarning()   
       
   967   ** ----------------------------------------------------------------------- **/
       
   968   (void)time(&timestamp);
       
   969   main_argc = argc;
       
   970   main_argv = argv;
       
   971 
       
   972 
       
   973   /* ----------------------------------------------------------------------- **
       
   974      Work on files in text mode to ease end-of-line processing in DOS.  
       
   975   ** ----------------------------------------------------------------------- **/
       
   976   if ((input_dict = open_file(argv[1], "rt")) == NULL) {
       
   977     fprintf(stderr, "ERROR: Input File '%s' could NOT be opened!\n", argv[1]);
       
   978     exit_error(1);
       
   979   }
       
   980 
       
   981   if ((output_c = open_file(argv[2], "wt")) == NULL) {
       
   982     fprintf(stderr, "ERROR: Output .c File '%s' could NOT be opened!\n", argv[2]);
       
   983     exit_error(1);
       
   984   }
       
   985 
       
   986   if ((output_h = open_file(argv[3], "wt")) == NULL) {
       
   987     fprintf(stderr, "ERROR: Output .h File '%s' could NOT be opened!\n", argv[3]);
       
   988     exit_error(1);
       
   989   }
       
   990 
       
   991   print_copyright(output_c);
       
   992   fprintf(output_c, "\n\n");
       
   993   print_automaticallyGeneratedCodeWarning(output_c);
       
   994   fprintf(output_c, "\n\n");
       
   995 
       
   996   print_copyright(output_h);
       
   997   fprintf(output_h, "\n\n");
       
   998   print_automaticallyGeneratedCodeWarning(output_h);
       
   999   fprintf(output_h, "\n\n");
       
  1000 
       
  1001   {
       
  1002     int l;
       
  1003     char* p = strrchr(argv[3], '.');
       
  1004     l = (p == NULL) ? (int)strlen(argv[3]) : p - argv[3];
       
  1005 
       
  1006     fprintf(output_h, "#ifndef HEADER_GUARD_%.*s", l, argv[3]);
       
  1007     if (p != NULL) fprintf(output_h, "_%s", p+1);
       
  1008     fprintf(output_h,
       
  1009             "\n"
       
  1010             "#define HEADER_GUARD_%.*s"
       
  1011             , l, argv[3]);
       
  1012     if (p != NULL) fprintf(output_h, "_%s", p+1);
       
  1013     fprintf(output_h, 
       
  1014             "\n"
       
  1015             "\n"
       
  1016             "#ifdef __cplusplus\n"
       
  1017             "extern \"C\"\n"
       
  1018             "{\n"
       
  1019             "#endif\n"
       
  1020             "\n"
       
  1021             "\n"
       
  1022             );
       
  1023   }
       
  1024 
       
  1025   /* process input */
       
  1026   process_file(input_dict, output_c, output_h);
       
  1027 
       
  1028   fprintf(output_c, "\n");
       
  1029   print_automaticallyGeneratedCodeWarning(output_c);
       
  1030 
       
  1031   fprintf(output_c, "\n");
       
  1032   fprintf(output_h, "\n");
       
  1033   print_automaticallyGeneratedCodeWarning(output_h);
       
  1034 
       
  1035   fprintf(output_h,
       
  1036           "\n"
       
  1037           "\n"
       
  1038           "#ifdef __cplusplus\n"
       
  1039           "} /* extern \"C\" */\n"
       
  1040           "#endif\n"
       
  1041           "\n"
       
  1042           "#endif\n"
       
  1043           );
       
  1044   
       
  1045   /* close files */
       
  1046   fclose(input_dict);
       
  1047   fclose(output_c);
       
  1048   fclose(output_h);
       
  1049 
       
  1050   return 0;
       
  1051 }