xml/cxmllibrary/src/wbxmlp/src/parse_buffer.c
branchRCL_3
changeset 21 604ca70b6235
parent 20 889504eac4fb
equal deleted inserted replaced
20:889504eac4fb 21:604ca70b6235
     1 /*
       
     2 * Copyright (c) 2000 - 2001 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of the License "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 /*****************************************************************
       
    20 **  File: wbxml_parse_buffer.c
       
    21 **  Description:
       
    22 *
       
    23 * A buffer-based implementation of the "parser_reader interface".
       
    24 * To create a parser that gets wbxml from some other source (such
       
    25 * as reading from a stream), these are the only functions that should
       
    26 * have to be ported. wbxml_parse.c contains implementation of the
       
    27 * parsing logic, independent of how wbxml is read/stored (with a few
       
    28 * small exceptions).
       
    29 *
       
    30 * The parser reader is responsible for safely getting various primitive
       
    31 * data types and in-line strings from the wbxml NW_Byte code. The reader
       
    32 * also returns a pointer to the string table (which in this case is
       
    33 * just a pointer into the wbxml buffer.)
       
    34 *
       
    35 *****************************************************************/
       
    36 
       
    37 #include "cxml_internal.h"
       
    38 #include <xml/cxml/nw_wbxml_reader.h>
       
    39 #include <xml/cxml/nw_wbxml_dictionary.h>
       
    40 #include <xml/cxml/nw_wbxml_event.h>
       
    41 #include <xml/cxml/nw_wbxml_token.h>
       
    42 #include <xml/cxml/nw_wbxml_opaque.h>
       
    43 #include "nw_wbxml_parsei.h"
       
    44 #include <xml/cxml/nw_string_char.h>
       
    45 #include <xml/cxml/nw_tinydom.h>
       
    46 
       
    47 /*
       
    48  * Initialize the parser
       
    49  */
       
    50 
       
    51 EXPORT_C void
       
    52 NW_WBXML_Parser_newInPlace (NW_WBXML_Parser_t * parser)
       
    53 {
       
    54   NW_ASSERT(parser != NULL);
       
    55   
       
    56   parser->handler = NULL;
       
    57   parser->flags = 0;
       
    58   parser->state = NW_WBXML_PARSER_S_START;
       
    59   parser->tag_code_page = 0;
       
    60   parser->attribute_code_page = 0;
       
    61   parser->cp_registry.storage = NULL;
       
    62   parser->cp_registry.current = 0;
       
    63   parser->cp_registry.count = 0;
       
    64   parser->dictionary = 0;
       
    65   parser->doc = NULL;
       
    66   parser->p = NULL;
       
    67   parser->offset = 0;
       
    68   parser->left = 0;
       
    69   parser->status = NW_STAT_SUCCESS;
       
    70   parser->recursiveCallCnt = 0;
       
    71   parser->ext_t_not_table_index = 0;
       
    72 }
       
    73 
       
    74 
       
    75 void
       
    76 NW_WBXML_Parser_reset (NW_WBXML_Parser_t * parser)
       
    77 {
       
    78   NW_ASSERT(parser != NULL);
       
    79 
       
    80   parser->handler = NULL;
       
    81   parser->state = NW_WBXML_PARSER_S_START;
       
    82   parser->tag_code_page = 0;
       
    83   parser->attribute_code_page = 0;
       
    84   parser->p = NULL;
       
    85   parser->offset = 0;
       
    86   parser->left = 0;
       
    87   parser->status = NW_STAT_SUCCESS;
       
    88   parser->recursiveCallCnt = 0;
       
    89 }
       
    90 
       
    91 void
       
    92 NW_WBXML_Parser_delete (NW_WBXML_Parser_t * parser)
       
    93 {
       
    94   if (parser->cp_registry.storage != NULL) {
       
    95     NW_Mem_Free(parser->cp_registry.storage);
       
    96   }
       
    97 }
       
    98 
       
    99 
       
   100 /*
       
   101 * Safely advance the parser through bytecode.
       
   102 * The functions which read values from the parser don't advance
       
   103 * the parser since this may not be necessary or safe (e.g. when reading
       
   104 * the last data item.).
       
   105 *
       
   106 * "Count" may equal "left" so zero is a possible "left" value.
       
   107 */
       
   108 
       
   109 NW_Int32
       
   110 NW_WBXML_Parser_advance (NW_WBXML_Parser_t * parser,
       
   111                          NW_Int32 count)
       
   112 {
       
   113   NW_ASSERT(parser != NULL);
       
   114 
       
   115   if ((count > (NW_Int32) parser->left)
       
   116       || (((NW_Int32)parser->offset + count) < 0)){
       
   117     return -1;
       
   118   }
       
   119   parser->p += count;
       
   120   parser->offset = (NW_Uint32) ((NW_Int32)parser->offset +  count);
       
   121   parser->left = (NW_Uint32) ((NW_Int32) parser->left - count);
       
   122   return count;
       
   123 }
       
   124 
       
   125 /* Get the current offset */
       
   126 
       
   127 NW_Uint32
       
   128 NW_WBXML_Parser_getOffset(NW_WBXML_Parser_t * parser)
       
   129 {
       
   130   NW_ASSERT(parser != NULL);
       
   131 
       
   132   return parser->offset;
       
   133 }
       
   134 
       
   135 /* Safely set the parser to a new offset */
       
   136 
       
   137 NW_Int32
       
   138 NW_WBXML_Parser_setOffset(NW_WBXML_Parser_t * parser,
       
   139                           NW_Uint32 offset)
       
   140 {
       
   141   NW_Int32 count =
       
   142     (NW_Int32)offset - (NW_Int32)NW_WBXML_Parser_getOffset(parser);
       
   143 
       
   144   NW_ASSERT(parser != NULL);
       
   145 
       
   146   /* If the code page registry exists and is inited, reset code pages */
       
   147   if((parser->cp_registry.count > 0)
       
   148     && ((parser->flags & NW_WBXML_REGISTRY_INIT) == NW_WBXML_REGISTRY_INIT)){
       
   149     NW_WBXML_CPRegistry_getCodePages(&(parser->cp_registry),
       
   150                                      offset,
       
   151                                      &(parser->tag_code_page),
       
   152                                      &(parser->attribute_code_page));
       
   153   }
       
   154 
       
   155   /*
       
   156    * Try to "advance" the parser to the new position. This will fail
       
   157    * if the offset is not in the parser's current buffer.
       
   158    */
       
   159 
       
   160   return NW_WBXML_Parser_advance(parser, count);
       
   161 }
       
   162 
       
   163 
       
   164 /* Check if there is more NW_Byte code to read */
       
   165 
       
   166 NW_Int32
       
   167 NW_WBXML_Parser_hasMoreBytecode (NW_WBXML_Parser_t * parser)
       
   168 {
       
   169   NW_ASSERT(parser != NULL);
       
   170 
       
   171   return (parser->left > 0);
       
   172 }
       
   173 
       
   174 
       
   175 /*
       
   176  * Read one unsigned NW_Byte from the current parser position.
       
   177  */
       
   178 
       
   179 NW_Uint8
       
   180 NW_WBXML_Parser_readUint8 (NW_WBXML_Parser_t * parser)
       
   181 {
       
   182   NW_ASSERT(parser != NULL);
       
   183 
       
   184   return (NW_Uint8) (*(parser->p));
       
   185 }
       
   186 
       
   187 
       
   188 /*
       
   189  * Safely read an unsigned_32 from the current parser position
       
   190  */
       
   191 
       
   192 NW_Int32
       
   193 NW_WBXML_Parser_readMbUint32 (NW_WBXML_Parser_t * parser,
       
   194                               NW_Uint32 * val)
       
   195 {
       
   196   NW_Uint32 i, space, next;
       
   197 
       
   198   NW_ASSERT(parser != NULL);
       
   199 
       
   200   space = parser->left;
       
   201 
       
   202   for (i = 0, *val = 0;; i++, *val <<= 7)
       
   203   {
       
   204     if (i > 4 || i == space)
       
   205     {
       
   206       return -1;
       
   207     }
       
   208     next = (parser->p)[i];
       
   209     if (!(next & 0x80))
       
   210     {
       
   211       *val |= next;
       
   212       break;
       
   213     }
       
   214     else
       
   215       *val |= (next &= 0x7F);
       
   216   }
       
   217   return (NW_Int32)(i + 1);
       
   218 }
       
   219 
       
   220 /*
       
   221  * Safely get the length of an inline string at current parser position
       
   222  */
       
   223 
       
   224 NW_Int32 NW_WBXML_Parser_getInlineStrLen(NW_WBXML_Parser_t *parser,
       
   225                                          NW_WBXML_Document_t *doc)
       
   226 {
       
   227   NW_Uint32 len = 0;
       
   228 
       
   229   NW_ASSERT(parser != NULL);
       
   230 
       
   231   if (!NW_String_valid(parser->p, parser->left, doc->charset)) {
       
   232     return -1;
       
   233   }
       
   234 
       
   235   if(NW_String_charBuffGetLength (parser->p, doc->charset, &len) < 0){
       
   236     return -1;
       
   237   }
       
   238   return (NW_Int32)len;
       
   239 }
       
   240 
       
   241 /*
       
   242  * Safely read an inline string at the current parser position
       
   243  */
       
   244 
       
   245 EXPORT_C NW_Status_t
       
   246 NW_WBXML_Parser_getStringInline (NW_WBXML_Parser_t * parser,
       
   247                                  NW_WBXML_Document_t * doc,
       
   248                                  NW_String_t *string)
       
   249 {
       
   250   NW_ASSERT(parser != NULL);
       
   251   NW_ASSERT(doc != NULL);
       
   252   NW_ASSERT(string != NULL);
       
   253 
       
   254   if (!NW_String_valid (parser->p, parser->left, doc->charset))
       
   255   {
       
   256     return NW_STAT_WBXML_ERROR_BYTECODE;
       
   257   }
       
   258 
       
   259   if (NW_String_initialize (string, parser->p, doc->charset)
       
   260       != NW_STAT_SUCCESS){
       
   261     return NW_STAT_WBXML_ERROR_BYTECODE;
       
   262   }
       
   263   return NW_STAT_SUCCESS;
       
   264 }
       
   265 
       
   266 
       
   267 /*
       
   268  * Safely get an opaque at the current parser position
       
   269  */
       
   270 
       
   271 EXPORT_C NW_Status_t
       
   272 NW_WBXML_Parser_getOpaque (NW_WBXML_Parser_t * parser,
       
   273                            NW_Uint32 length,
       
   274                            NW_WBXML_Opaque_t *opaque)
       
   275 {
       
   276   NW_ASSERT(parser != NULL);
       
   277   NW_ASSERT(length != 0);
       
   278   NW_ASSERT(opaque != NULL);
       
   279 
       
   280   if (length > parser->left){
       
   281     return NW_STAT_WBXML_ERROR_BYTECODE;
       
   282   }
       
   283 
       
   284   return NW_WBXML_Opaque_construct (opaque, parser->p, length);
       
   285 }
       
   286 
       
   287 /*
       
   288  * Read in the string table
       
   289  */
       
   290 
       
   291 NW_Status_t
       
   292 NW_WBXML_Parser_readStringTable (NW_WBXML_Parser_t * parser,
       
   293                                  NW_WBXML_Document_t * doc,
       
   294                                  NW_Byte **table)
       
   295 {
       
   296   NW_Int32 skip;
       
   297 
       
   298   NW_ASSERT(parser != NULL);
       
   299   NW_ASSERT(doc != NULL);
       
   300   NW_ASSERT(table != NULL);
       
   301 
       
   302   *table = NULL;
       
   303 
       
   304   /* Read the string table length into the document header */
       
   305 
       
   306   skip = NW_WBXML_Parser_readMbUint32 (parser, &(doc->strtbl.length));
       
   307   if (skip < 0) {
       
   308     return NW_STAT_WBXML_ERROR_BYTECODE;
       
   309   }
       
   310 
       
   311   /* Try to advance over the length bytes to beginning of table */
       
   312 
       
   313   if (NW_WBXML_Parser_advance (parser, skip) < 0){
       
   314     return NW_STAT_WBXML_ERROR_BYTECODE;
       
   315   }
       
   316 
       
   317   /* Set table to current parser location*/
       
   318 
       
   319   *table = parser->p;
       
   320 
       
   321   /* Try to advance to end of table */
       
   322 
       
   323   if (NW_WBXML_Parser_advance (parser, (NW_Int32) doc->strtbl.length) < 0){
       
   324     return NW_STAT_WBXML_ERROR_BYTECODE;
       
   325   }
       
   326 
       
   327   return NW_STAT_SUCCESS;
       
   328 }
       
   329 
       
   330 
       
   331 /* Reset the parser to the start of a buffer. The buffsize is
       
   332  * critical here since it is used in all safety tests by the
       
   333  * parser reader to make sure the parser doesn't read past the
       
   334  * end of the buffer.
       
   335  */
       
   336 
       
   337 NW_Status_t
       
   338 NW_WBXML_Parser_resetBuffer(NW_WBXML_Parser_t *parser,
       
   339                             NW_Byte *buff,
       
   340                             NW_Uint32 buffsize)
       
   341 {
       
   342   NW_ASSERT(parser != NULL);
       
   343 
       
   344   parser->offset = 0;
       
   345   parser->p = buff;
       
   346   if (buffsize == 0 || buff == NULL) {
       
   347     parser->left = 0;
       
   348   }
       
   349   else {
       
   350     parser->left = buffsize;
       
   351   }
       
   352   parser->tag_code_page = 0;
       
   353   parser->attribute_code_page = 0;
       
   354   parser->state = NW_WBXML_PARSER_S_HEADER;
       
   355   parser->recursiveCallCnt = 0;
       
   356   return NW_STAT_SUCCESS;
       
   357 }
       
   358 
       
   359 
       
   360 /*
       
   361  * This method sets the parser to a buffer which may contain only
       
   362  * wbxml fragments and no header. Used by writable DOM only.
       
   363  *
       
   364  */
       
   365 
       
   366 NW_Status_t
       
   367 NW_WBXML_Parser_setBuffer (NW_WBXML_Parser_t * parser,
       
   368                            NW_WBXML_Document_t * doc,
       
   369                            NW_Byte * buff,
       
   370                            NW_Uint32 buffsize)
       
   371 {
       
   372   NW_ASSERT(parser != NULL);
       
   373   NW_ASSERT(doc != NULL);
       
   374   NW_ASSERT(buff != NULL);
       
   375   NW_ASSERT(buffsize > 0);
       
   376 
       
   377   /* Find dictionary from document */
       
   378 
       
   379   if (doc->publicid > 0){
       
   380     parser->dictionary =
       
   381       NW_WBXML_Dictionary_getIndexByPublicId (doc->publicid);
       
   382   }
       
   383   else if (doc->doc_type){
       
   384     parser->dictionary =
       
   385       NW_WBXML_Dictionary_getIndexByDocType (doc->doc_type, doc->charset);
       
   386   }
       
   387   else{
       
   388     NW_ASSERT(0); /* Doc not correctly initialized */
       
   389   }
       
   390 
       
   391   /* Set the buffer */
       
   392 
       
   393   NW_WBXML_Parser_resetBuffer(parser, buff, buffsize);
       
   394   return NW_STAT_SUCCESS;
       
   395 }
       
   396 
       
   397 /*
       
   398  * Parse a document from a buffer
       
   399  */
       
   400 
       
   401 EXPORT_C NW_Status_t
       
   402 NW_WBXML_Parser_parseBuffer (NW_WBXML_Parser_t * parser,
       
   403                              NW_WBXML_Document_t * doc,
       
   404                              NW_Byte * buff,
       
   405                              NW_Uint32 buffsize)
       
   406 {
       
   407   NW_Status_t status;
       
   408   
       
   409   NW_ASSERT(parser != NULL);
       
   410   NW_ASSERT(doc != NULL);
       
   411   NW_ASSERT(buff != NULL);
       
   412   NW_ASSERT(buffsize > 0);
       
   413 
       
   414   if (parser->p == NULL)
       
   415   {
       
   416     parser->p = buff;
       
   417     parser->left = buffsize;
       
   418     
       
   419     /* First parser the document header */
       
   420     
       
   421     if ((status = NW_WBXML_Parser_docHeaderParse (parser, doc))
       
   422       != NW_STAT_SUCCESS)
       
   423       return status;
       
   424     
       
   425     /* Now get the dictionary from the document */
       
   426     
       
   427     if (doc->publicid > 0){
       
   428       parser->dictionary =
       
   429         NW_WBXML_Dictionary_getIndexByPublicId (doc->publicid);
       
   430     }
       
   431     
       
   432     else if (doc->doc_type){
       
   433       parser->dictionary =
       
   434         NW_WBXML_Dictionary_getIndexByDocType (doc->doc_type, doc->charset);
       
   435     }
       
   436     
       
   437     /* If a dictionary could not be attained try using the default public id */
       
   438     if (parser->dictionary == 0){
       
   439       doc->publicid = doc->default_public_id;
       
   440       parser->dictionary =
       
   441         NW_WBXML_Dictionary_getIndexByPublicId (doc->publicid);
       
   442     }
       
   443 
       
   444     /* Make the StartDocument callback */
       
   445     if (parser->handler && parser->handler->StartDocument_CB)
       
   446     {
       
   447       status = (*(parser->handler->StartDocument_CB)) (parser, doc,
       
   448         parser->context);
       
   449       if (status != NW_STAT_SUCCESS)
       
   450         return status;
       
   451     }
       
   452   }
       
   453 
       
   454   /* Parse the document body */
       
   455 
       
   456   return NW_WBXML_Parser_bodyParse (parser);
       
   457 }