1 /* |
|
2 * Copyright (c) 2000 - 2001 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of the License "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 /***************************************************************** |
|
20 ** File: wbxml_parse_buffer.c |
|
21 ** Description: |
|
22 * |
|
23 * A buffer-based implementation of the "parser_reader interface". |
|
24 * To create a parser that gets wbxml from some other source (such |
|
25 * as reading from a stream), these are the only functions that should |
|
26 * have to be ported. wbxml_parse.c contains implementation of the |
|
27 * parsing logic, independent of how wbxml is read/stored (with a few |
|
28 * small exceptions). |
|
29 * |
|
30 * The parser reader is responsible for safely getting various primitive |
|
31 * data types and in-line strings from the wbxml NW_Byte code. The reader |
|
32 * also returns a pointer to the string table (which in this case is |
|
33 * just a pointer into the wbxml buffer.) |
|
34 * |
|
35 *****************************************************************/ |
|
36 |
|
37 #include "cxml_internal.h" |
|
38 #include <xml/cxml/nw_wbxml_reader.h> |
|
39 #include <xml/cxml/nw_wbxml_dictionary.h> |
|
40 #include <xml/cxml/nw_wbxml_event.h> |
|
41 #include <xml/cxml/nw_wbxml_token.h> |
|
42 #include <xml/cxml/nw_wbxml_opaque.h> |
|
43 #include "nw_wbxml_parsei.h" |
|
44 #include <xml/cxml/nw_string_char.h> |
|
45 #include <xml/cxml/nw_tinydom.h> |
|
46 |
|
47 /* |
|
48 * Initialize the parser |
|
49 */ |
|
50 |
|
51 EXPORT_C void |
|
52 NW_WBXML_Parser_newInPlace (NW_WBXML_Parser_t * parser) |
|
53 { |
|
54 NW_ASSERT(parser != NULL); |
|
55 |
|
56 parser->handler = NULL; |
|
57 parser->flags = 0; |
|
58 parser->state = NW_WBXML_PARSER_S_START; |
|
59 parser->tag_code_page = 0; |
|
60 parser->attribute_code_page = 0; |
|
61 parser->cp_registry.storage = NULL; |
|
62 parser->cp_registry.current = 0; |
|
63 parser->cp_registry.count = 0; |
|
64 parser->dictionary = 0; |
|
65 parser->doc = NULL; |
|
66 parser->p = NULL; |
|
67 parser->offset = 0; |
|
68 parser->left = 0; |
|
69 parser->status = NW_STAT_SUCCESS; |
|
70 parser->recursiveCallCnt = 0; |
|
71 parser->ext_t_not_table_index = 0; |
|
72 } |
|
73 |
|
74 |
|
75 void |
|
76 NW_WBXML_Parser_reset (NW_WBXML_Parser_t * parser) |
|
77 { |
|
78 NW_ASSERT(parser != NULL); |
|
79 |
|
80 parser->handler = NULL; |
|
81 parser->state = NW_WBXML_PARSER_S_START; |
|
82 parser->tag_code_page = 0; |
|
83 parser->attribute_code_page = 0; |
|
84 parser->p = NULL; |
|
85 parser->offset = 0; |
|
86 parser->left = 0; |
|
87 parser->status = NW_STAT_SUCCESS; |
|
88 parser->recursiveCallCnt = 0; |
|
89 } |
|
90 |
|
91 void |
|
92 NW_WBXML_Parser_delete (NW_WBXML_Parser_t * parser) |
|
93 { |
|
94 if (parser->cp_registry.storage != NULL) { |
|
95 NW_Mem_Free(parser->cp_registry.storage); |
|
96 } |
|
97 } |
|
98 |
|
99 |
|
100 /* |
|
101 * Safely advance the parser through bytecode. |
|
102 * The functions which read values from the parser don't advance |
|
103 * the parser since this may not be necessary or safe (e.g. when reading |
|
104 * the last data item.). |
|
105 * |
|
106 * "Count" may equal "left" so zero is a possible "left" value. |
|
107 */ |
|
108 |
|
109 NW_Int32 |
|
110 NW_WBXML_Parser_advance (NW_WBXML_Parser_t * parser, |
|
111 NW_Int32 count) |
|
112 { |
|
113 NW_ASSERT(parser != NULL); |
|
114 |
|
115 if ((count > (NW_Int32) parser->left) |
|
116 || (((NW_Int32)parser->offset + count) < 0)){ |
|
117 return -1; |
|
118 } |
|
119 parser->p += count; |
|
120 parser->offset = (NW_Uint32) ((NW_Int32)parser->offset + count); |
|
121 parser->left = (NW_Uint32) ((NW_Int32) parser->left - count); |
|
122 return count; |
|
123 } |
|
124 |
|
125 /* Get the current offset */ |
|
126 |
|
127 NW_Uint32 |
|
128 NW_WBXML_Parser_getOffset(NW_WBXML_Parser_t * parser) |
|
129 { |
|
130 NW_ASSERT(parser != NULL); |
|
131 |
|
132 return parser->offset; |
|
133 } |
|
134 |
|
135 /* Safely set the parser to a new offset */ |
|
136 |
|
137 NW_Int32 |
|
138 NW_WBXML_Parser_setOffset(NW_WBXML_Parser_t * parser, |
|
139 NW_Uint32 offset) |
|
140 { |
|
141 NW_Int32 count = |
|
142 (NW_Int32)offset - (NW_Int32)NW_WBXML_Parser_getOffset(parser); |
|
143 |
|
144 NW_ASSERT(parser != NULL); |
|
145 |
|
146 /* If the code page registry exists and is inited, reset code pages */ |
|
147 if((parser->cp_registry.count > 0) |
|
148 && ((parser->flags & NW_WBXML_REGISTRY_INIT) == NW_WBXML_REGISTRY_INIT)){ |
|
149 NW_WBXML_CPRegistry_getCodePages(&(parser->cp_registry), |
|
150 offset, |
|
151 &(parser->tag_code_page), |
|
152 &(parser->attribute_code_page)); |
|
153 } |
|
154 |
|
155 /* |
|
156 * Try to "advance" the parser to the new position. This will fail |
|
157 * if the offset is not in the parser's current buffer. |
|
158 */ |
|
159 |
|
160 return NW_WBXML_Parser_advance(parser, count); |
|
161 } |
|
162 |
|
163 |
|
164 /* Check if there is more NW_Byte code to read */ |
|
165 |
|
166 NW_Int32 |
|
167 NW_WBXML_Parser_hasMoreBytecode (NW_WBXML_Parser_t * parser) |
|
168 { |
|
169 NW_ASSERT(parser != NULL); |
|
170 |
|
171 return (parser->left > 0); |
|
172 } |
|
173 |
|
174 |
|
175 /* |
|
176 * Read one unsigned NW_Byte from the current parser position. |
|
177 */ |
|
178 |
|
179 NW_Uint8 |
|
180 NW_WBXML_Parser_readUint8 (NW_WBXML_Parser_t * parser) |
|
181 { |
|
182 NW_ASSERT(parser != NULL); |
|
183 |
|
184 return (NW_Uint8) (*(parser->p)); |
|
185 } |
|
186 |
|
187 |
|
188 /* |
|
189 * Safely read an unsigned_32 from the current parser position |
|
190 */ |
|
191 |
|
192 NW_Int32 |
|
193 NW_WBXML_Parser_readMbUint32 (NW_WBXML_Parser_t * parser, |
|
194 NW_Uint32 * val) |
|
195 { |
|
196 NW_Uint32 i, space, next; |
|
197 |
|
198 NW_ASSERT(parser != NULL); |
|
199 |
|
200 space = parser->left; |
|
201 |
|
202 for (i = 0, *val = 0;; i++, *val <<= 7) |
|
203 { |
|
204 if (i > 4 || i == space) |
|
205 { |
|
206 return -1; |
|
207 } |
|
208 next = (parser->p)[i]; |
|
209 if (!(next & 0x80)) |
|
210 { |
|
211 *val |= next; |
|
212 break; |
|
213 } |
|
214 else |
|
215 *val |= (next &= 0x7F); |
|
216 } |
|
217 return (NW_Int32)(i + 1); |
|
218 } |
|
219 |
|
220 /* |
|
221 * Safely get the length of an inline string at current parser position |
|
222 */ |
|
223 |
|
224 NW_Int32 NW_WBXML_Parser_getInlineStrLen(NW_WBXML_Parser_t *parser, |
|
225 NW_WBXML_Document_t *doc) |
|
226 { |
|
227 NW_Uint32 len = 0; |
|
228 |
|
229 NW_ASSERT(parser != NULL); |
|
230 |
|
231 if (!NW_String_valid(parser->p, parser->left, doc->charset)) { |
|
232 return -1; |
|
233 } |
|
234 |
|
235 if(NW_String_charBuffGetLength (parser->p, doc->charset, &len) < 0){ |
|
236 return -1; |
|
237 } |
|
238 return (NW_Int32)len; |
|
239 } |
|
240 |
|
241 /* |
|
242 * Safely read an inline string at the current parser position |
|
243 */ |
|
244 |
|
245 EXPORT_C NW_Status_t |
|
246 NW_WBXML_Parser_getStringInline (NW_WBXML_Parser_t * parser, |
|
247 NW_WBXML_Document_t * doc, |
|
248 NW_String_t *string) |
|
249 { |
|
250 NW_ASSERT(parser != NULL); |
|
251 NW_ASSERT(doc != NULL); |
|
252 NW_ASSERT(string != NULL); |
|
253 |
|
254 if (!NW_String_valid (parser->p, parser->left, doc->charset)) |
|
255 { |
|
256 return NW_STAT_WBXML_ERROR_BYTECODE; |
|
257 } |
|
258 |
|
259 if (NW_String_initialize (string, parser->p, doc->charset) |
|
260 != NW_STAT_SUCCESS){ |
|
261 return NW_STAT_WBXML_ERROR_BYTECODE; |
|
262 } |
|
263 return NW_STAT_SUCCESS; |
|
264 } |
|
265 |
|
266 |
|
267 /* |
|
268 * Safely get an opaque at the current parser position |
|
269 */ |
|
270 |
|
271 EXPORT_C NW_Status_t |
|
272 NW_WBXML_Parser_getOpaque (NW_WBXML_Parser_t * parser, |
|
273 NW_Uint32 length, |
|
274 NW_WBXML_Opaque_t *opaque) |
|
275 { |
|
276 NW_ASSERT(parser != NULL); |
|
277 NW_ASSERT(length != 0); |
|
278 NW_ASSERT(opaque != NULL); |
|
279 |
|
280 if (length > parser->left){ |
|
281 return NW_STAT_WBXML_ERROR_BYTECODE; |
|
282 } |
|
283 |
|
284 return NW_WBXML_Opaque_construct (opaque, parser->p, length); |
|
285 } |
|
286 |
|
287 /* |
|
288 * Read in the string table |
|
289 */ |
|
290 |
|
291 NW_Status_t |
|
292 NW_WBXML_Parser_readStringTable (NW_WBXML_Parser_t * parser, |
|
293 NW_WBXML_Document_t * doc, |
|
294 NW_Byte **table) |
|
295 { |
|
296 NW_Int32 skip; |
|
297 |
|
298 NW_ASSERT(parser != NULL); |
|
299 NW_ASSERT(doc != NULL); |
|
300 NW_ASSERT(table != NULL); |
|
301 |
|
302 *table = NULL; |
|
303 |
|
304 /* Read the string table length into the document header */ |
|
305 |
|
306 skip = NW_WBXML_Parser_readMbUint32 (parser, &(doc->strtbl.length)); |
|
307 if (skip < 0) { |
|
308 return NW_STAT_WBXML_ERROR_BYTECODE; |
|
309 } |
|
310 |
|
311 /* Try to advance over the length bytes to beginning of table */ |
|
312 |
|
313 if (NW_WBXML_Parser_advance (parser, skip) < 0){ |
|
314 return NW_STAT_WBXML_ERROR_BYTECODE; |
|
315 } |
|
316 |
|
317 /* Set table to current parser location*/ |
|
318 |
|
319 *table = parser->p; |
|
320 |
|
321 /* Try to advance to end of table */ |
|
322 |
|
323 if (NW_WBXML_Parser_advance (parser, (NW_Int32) doc->strtbl.length) < 0){ |
|
324 return NW_STAT_WBXML_ERROR_BYTECODE; |
|
325 } |
|
326 |
|
327 return NW_STAT_SUCCESS; |
|
328 } |
|
329 |
|
330 |
|
331 /* Reset the parser to the start of a buffer. The buffsize is |
|
332 * critical here since it is used in all safety tests by the |
|
333 * parser reader to make sure the parser doesn't read past the |
|
334 * end of the buffer. |
|
335 */ |
|
336 |
|
337 NW_Status_t |
|
338 NW_WBXML_Parser_resetBuffer(NW_WBXML_Parser_t *parser, |
|
339 NW_Byte *buff, |
|
340 NW_Uint32 buffsize) |
|
341 { |
|
342 NW_ASSERT(parser != NULL); |
|
343 |
|
344 parser->offset = 0; |
|
345 parser->p = buff; |
|
346 if (buffsize == 0 || buff == NULL) { |
|
347 parser->left = 0; |
|
348 } |
|
349 else { |
|
350 parser->left = buffsize; |
|
351 } |
|
352 parser->tag_code_page = 0; |
|
353 parser->attribute_code_page = 0; |
|
354 parser->state = NW_WBXML_PARSER_S_HEADER; |
|
355 parser->recursiveCallCnt = 0; |
|
356 return NW_STAT_SUCCESS; |
|
357 } |
|
358 |
|
359 |
|
360 /* |
|
361 * This method sets the parser to a buffer which may contain only |
|
362 * wbxml fragments and no header. Used by writable DOM only. |
|
363 * |
|
364 */ |
|
365 |
|
366 NW_Status_t |
|
367 NW_WBXML_Parser_setBuffer (NW_WBXML_Parser_t * parser, |
|
368 NW_WBXML_Document_t * doc, |
|
369 NW_Byte * buff, |
|
370 NW_Uint32 buffsize) |
|
371 { |
|
372 NW_ASSERT(parser != NULL); |
|
373 NW_ASSERT(doc != NULL); |
|
374 NW_ASSERT(buff != NULL); |
|
375 NW_ASSERT(buffsize > 0); |
|
376 |
|
377 /* Find dictionary from document */ |
|
378 |
|
379 if (doc->publicid > 0){ |
|
380 parser->dictionary = |
|
381 NW_WBXML_Dictionary_getIndexByPublicId (doc->publicid); |
|
382 } |
|
383 else if (doc->doc_type){ |
|
384 parser->dictionary = |
|
385 NW_WBXML_Dictionary_getIndexByDocType (doc->doc_type, doc->charset); |
|
386 } |
|
387 else{ |
|
388 NW_ASSERT(0); /* Doc not correctly initialized */ |
|
389 } |
|
390 |
|
391 /* Set the buffer */ |
|
392 |
|
393 NW_WBXML_Parser_resetBuffer(parser, buff, buffsize); |
|
394 return NW_STAT_SUCCESS; |
|
395 } |
|
396 |
|
397 /* |
|
398 * Parse a document from a buffer |
|
399 */ |
|
400 |
|
401 EXPORT_C NW_Status_t |
|
402 NW_WBXML_Parser_parseBuffer (NW_WBXML_Parser_t * parser, |
|
403 NW_WBXML_Document_t * doc, |
|
404 NW_Byte * buff, |
|
405 NW_Uint32 buffsize) |
|
406 { |
|
407 NW_Status_t status; |
|
408 |
|
409 NW_ASSERT(parser != NULL); |
|
410 NW_ASSERT(doc != NULL); |
|
411 NW_ASSERT(buff != NULL); |
|
412 NW_ASSERT(buffsize > 0); |
|
413 |
|
414 if (parser->p == NULL) |
|
415 { |
|
416 parser->p = buff; |
|
417 parser->left = buffsize; |
|
418 |
|
419 /* First parser the document header */ |
|
420 |
|
421 if ((status = NW_WBXML_Parser_docHeaderParse (parser, doc)) |
|
422 != NW_STAT_SUCCESS) |
|
423 return status; |
|
424 |
|
425 /* Now get the dictionary from the document */ |
|
426 |
|
427 if (doc->publicid > 0){ |
|
428 parser->dictionary = |
|
429 NW_WBXML_Dictionary_getIndexByPublicId (doc->publicid); |
|
430 } |
|
431 |
|
432 else if (doc->doc_type){ |
|
433 parser->dictionary = |
|
434 NW_WBXML_Dictionary_getIndexByDocType (doc->doc_type, doc->charset); |
|
435 } |
|
436 |
|
437 /* If a dictionary could not be attained try using the default public id */ |
|
438 if (parser->dictionary == 0){ |
|
439 doc->publicid = doc->default_public_id; |
|
440 parser->dictionary = |
|
441 NW_WBXML_Dictionary_getIndexByPublicId (doc->publicid); |
|
442 } |
|
443 |
|
444 /* Make the StartDocument callback */ |
|
445 if (parser->handler && parser->handler->StartDocument_CB) |
|
446 { |
|
447 status = (*(parser->handler->StartDocument_CB)) (parser, doc, |
|
448 parser->context); |
|
449 if (status != NW_STAT_SUCCESS) |
|
450 return status; |
|
451 } |
|
452 } |
|
453 |
|
454 /* Parse the document body */ |
|
455 |
|
456 return NW_WBXML_Parser_bodyParse (parser); |
|
457 } |
|