|
1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd |
|
2 See the file COPYING for copying permission. |
|
3 */ |
|
4 |
|
5 #ifndef IS_INVALID_CHAR |
|
6 #define IS_INVALID_CHAR(enc, ptr, n) (0) |
|
7 #endif |
|
8 |
|
9 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ |
|
10 case BT_LEAD ## n: \ |
|
11 if (end - ptr < n) \ |
|
12 return XML_TOK_PARTIAL_CHAR; \ |
|
13 if (IS_INVALID_CHAR(enc, ptr, n)) { \ |
|
14 *(nextTokPtr) = (ptr); \ |
|
15 return XML_TOK_INVALID; \ |
|
16 } \ |
|
17 ptr += n; \ |
|
18 break; |
|
19 |
|
20 #define INVALID_CASES(ptr, nextTokPtr) \ |
|
21 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ |
|
22 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ |
|
23 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ |
|
24 case BT_NONXML: \ |
|
25 case BT_MALFORM: \ |
|
26 case BT_TRAIL: \ |
|
27 *(nextTokPtr) = (ptr); \ |
|
28 return XML_TOK_INVALID; |
|
29 |
|
30 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ |
|
31 case BT_LEAD ## n: \ |
|
32 if (end - ptr < n) \ |
|
33 return XML_TOK_PARTIAL_CHAR; \ |
|
34 if (!IS_NAME_CHAR(enc, ptr, n)) { \ |
|
35 *nextTokPtr = ptr; \ |
|
36 return XML_TOK_INVALID; \ |
|
37 } \ |
|
38 ptr += n; \ |
|
39 break; |
|
40 |
|
41 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ |
|
42 case BT_NONASCII: \ |
|
43 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ |
|
44 *nextTokPtr = ptr; \ |
|
45 return XML_TOK_INVALID; \ |
|
46 } \ |
|
47 case BT_NMSTRT: \ |
|
48 case BT_HEX: \ |
|
49 case BT_DIGIT: \ |
|
50 case BT_NAME: \ |
|
51 case BT_MINUS: \ |
|
52 ptr += MINBPC(enc); \ |
|
53 break; \ |
|
54 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ |
|
55 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ |
|
56 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) |
|
57 |
|
58 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ |
|
59 case BT_LEAD ## n: \ |
|
60 if (end - ptr < n) \ |
|
61 return XML_TOK_PARTIAL_CHAR; \ |
|
62 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
|
63 *nextTokPtr = ptr; \ |
|
64 return XML_TOK_INVALID; \ |
|
65 } \ |
|
66 ptr += n; \ |
|
67 break; |
|
68 |
|
69 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ |
|
70 case BT_NONASCII: \ |
|
71 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ |
|
72 *nextTokPtr = ptr; \ |
|
73 return XML_TOK_INVALID; \ |
|
74 } \ |
|
75 case BT_NMSTRT: \ |
|
76 case BT_HEX: \ |
|
77 ptr += MINBPC(enc); \ |
|
78 break; \ |
|
79 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ |
|
80 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ |
|
81 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) |
|
82 |
|
83 #ifndef PREFIX |
|
84 #define PREFIX(ident) ident |
|
85 #endif |
|
86 |
|
87 /* ptr points to character following "<!-" */ |
|
88 |
|
89 static int PTRCALL |
|
90 PREFIX(scanComment)(const ENCODING *enc, const char *ptr, |
|
91 const char *end, const char **nextTokPtr) |
|
92 { |
|
93 if (ptr != end) { |
|
94 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { |
|
95 *nextTokPtr = ptr; |
|
96 return XML_TOK_INVALID; |
|
97 } |
|
98 ptr += MINBPC(enc); |
|
99 while (ptr != end) { |
|
100 switch (BYTE_TYPE(enc, ptr)) { |
|
101 INVALID_CASES(ptr, nextTokPtr) |
|
102 case BT_MINUS: |
|
103 if ((ptr += MINBPC(enc)) == end) |
|
104 return XML_TOK_PARTIAL; |
|
105 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { |
|
106 if ((ptr += MINBPC(enc)) == end) |
|
107 return XML_TOK_PARTIAL; |
|
108 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
|
109 *nextTokPtr = ptr; |
|
110 return XML_TOK_INVALID; |
|
111 } |
|
112 *nextTokPtr = ptr + MINBPC(enc); |
|
113 return XML_TOK_COMMENT; |
|
114 } |
|
115 break; |
|
116 default: |
|
117 ptr += MINBPC(enc); |
|
118 break; |
|
119 } |
|
120 } |
|
121 } |
|
122 return XML_TOK_PARTIAL; |
|
123 } |
|
124 |
|
125 /* ptr points to character following "<!" */ |
|
126 |
|
127 static int PTRCALL |
|
128 PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, |
|
129 const char *end, const char **nextTokPtr) |
|
130 { |
|
131 if (ptr == end) |
|
132 return XML_TOK_PARTIAL; |
|
133 switch (BYTE_TYPE(enc, ptr)) { |
|
134 case BT_MINUS: |
|
135 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
136 case BT_LSQB: |
|
137 *nextTokPtr = ptr + MINBPC(enc); |
|
138 return XML_TOK_COND_SECT_OPEN; |
|
139 case BT_NMSTRT: |
|
140 case BT_HEX: |
|
141 ptr += MINBPC(enc); |
|
142 break; |
|
143 default: |
|
144 *nextTokPtr = ptr; |
|
145 return XML_TOK_INVALID; |
|
146 } |
|
147 while (ptr != end) { |
|
148 switch (BYTE_TYPE(enc, ptr)) { |
|
149 case BT_PERCNT: |
|
150 if (ptr + MINBPC(enc) == end) |
|
151 return XML_TOK_PARTIAL; |
|
152 /* don't allow <!ENTITY% foo "whatever"> */ |
|
153 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { |
|
154 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: |
|
155 *nextTokPtr = ptr; |
|
156 return XML_TOK_INVALID; |
|
157 } |
|
158 /* fall through */ |
|
159 case BT_S: case BT_CR: case BT_LF: |
|
160 *nextTokPtr = ptr; |
|
161 return XML_TOK_DECL_OPEN; |
|
162 case BT_NMSTRT: |
|
163 case BT_HEX: |
|
164 ptr += MINBPC(enc); |
|
165 break; |
|
166 default: |
|
167 *nextTokPtr = ptr; |
|
168 return XML_TOK_INVALID; |
|
169 } |
|
170 } |
|
171 return XML_TOK_PARTIAL; |
|
172 } |
|
173 |
|
174 static int PTRCALL |
|
175 PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, |
|
176 const char *end, int *tokPtr) |
|
177 { |
|
178 int upper = 0; |
|
179 *tokPtr = XML_TOK_PI; |
|
180 if (end - ptr != MINBPC(enc)*3) |
|
181 return 1; |
|
182 switch (BYTE_TO_ASCII(enc, ptr)) { |
|
183 case ASCII_x: |
|
184 break; |
|
185 case ASCII_X: |
|
186 upper = 1; |
|
187 break; |
|
188 default: |
|
189 return 1; |
|
190 } |
|
191 ptr += MINBPC(enc); |
|
192 switch (BYTE_TO_ASCII(enc, ptr)) { |
|
193 case ASCII_m: |
|
194 break; |
|
195 case ASCII_M: |
|
196 upper = 1; |
|
197 break; |
|
198 default: |
|
199 return 1; |
|
200 } |
|
201 ptr += MINBPC(enc); |
|
202 switch (BYTE_TO_ASCII(enc, ptr)) { |
|
203 case ASCII_l: |
|
204 break; |
|
205 case ASCII_L: |
|
206 upper = 1; |
|
207 break; |
|
208 default: |
|
209 return 1; |
|
210 } |
|
211 if (upper) |
|
212 return 0; |
|
213 *tokPtr = XML_TOK_XML_DECL; |
|
214 return 1; |
|
215 } |
|
216 |
|
217 /* ptr points to character following "<?" */ |
|
218 |
|
219 static int PTRCALL |
|
220 PREFIX(scanPi)(const ENCODING *enc, const char *ptr, |
|
221 const char *end, const char **nextTokPtr) |
|
222 { |
|
223 int tok; |
|
224 const char *target = ptr; |
|
225 if (ptr == end) |
|
226 return XML_TOK_PARTIAL; |
|
227 switch (BYTE_TYPE(enc, ptr)) { |
|
228 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
229 default: |
|
230 *nextTokPtr = ptr; |
|
231 return XML_TOK_INVALID; |
|
232 } |
|
233 while (ptr != end) { |
|
234 switch (BYTE_TYPE(enc, ptr)) { |
|
235 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
236 case BT_S: case BT_CR: case BT_LF: |
|
237 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
|
238 *nextTokPtr = ptr; |
|
239 return XML_TOK_INVALID; |
|
240 } |
|
241 ptr += MINBPC(enc); |
|
242 while (ptr != end) { |
|
243 switch (BYTE_TYPE(enc, ptr)) { |
|
244 INVALID_CASES(ptr, nextTokPtr) |
|
245 case BT_QUEST: |
|
246 ptr += MINBPC(enc); |
|
247 if (ptr == end) |
|
248 return XML_TOK_PARTIAL; |
|
249 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
|
250 *nextTokPtr = ptr + MINBPC(enc); |
|
251 return tok; |
|
252 } |
|
253 break; |
|
254 default: |
|
255 ptr += MINBPC(enc); |
|
256 break; |
|
257 } |
|
258 } |
|
259 return XML_TOK_PARTIAL; |
|
260 case BT_QUEST: |
|
261 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
|
262 *nextTokPtr = ptr; |
|
263 return XML_TOK_INVALID; |
|
264 } |
|
265 ptr += MINBPC(enc); |
|
266 if (ptr == end) |
|
267 return XML_TOK_PARTIAL; |
|
268 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
|
269 *nextTokPtr = ptr + MINBPC(enc); |
|
270 return tok; |
|
271 } |
|
272 /* fall through */ |
|
273 default: |
|
274 *nextTokPtr = ptr; |
|
275 return XML_TOK_INVALID; |
|
276 } |
|
277 } |
|
278 return XML_TOK_PARTIAL; |
|
279 } |
|
280 |
|
281 static int PTRCALL |
|
282 PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, |
|
283 const char *end, const char **nextTokPtr) |
|
284 { |
|
285 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, |
|
286 ASCII_T, ASCII_A, ASCII_LSQB }; |
|
287 int i; |
|
288 /* CDATA[ */ |
|
289 if (end - ptr < 6 * MINBPC(enc)) |
|
290 return XML_TOK_PARTIAL; |
|
291 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { |
|
292 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { |
|
293 *nextTokPtr = ptr; |
|
294 return XML_TOK_INVALID; |
|
295 } |
|
296 } |
|
297 *nextTokPtr = ptr; |
|
298 return XML_TOK_CDATA_SECT_OPEN; |
|
299 } |
|
300 |
|
301 static int PTRCALL |
|
302 PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, |
|
303 const char *end, const char **nextTokPtr) |
|
304 { |
|
305 if (ptr == end) |
|
306 return XML_TOK_NONE; |
|
307 if (MINBPC(enc) > 1) { |
|
308 size_t n = end - ptr; |
|
309 if (n & (MINBPC(enc) - 1)) { |
|
310 n &= ~(MINBPC(enc) - 1); |
|
311 if (n == 0) |
|
312 return XML_TOK_PARTIAL; |
|
313 end = ptr + n; |
|
314 } |
|
315 } |
|
316 switch (BYTE_TYPE(enc, ptr)) { |
|
317 case BT_RSQB: |
|
318 ptr += MINBPC(enc); |
|
319 if (ptr == end) |
|
320 return XML_TOK_PARTIAL; |
|
321 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) |
|
322 break; |
|
323 ptr += MINBPC(enc); |
|
324 if (ptr == end) |
|
325 return XML_TOK_PARTIAL; |
|
326 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
|
327 ptr -= MINBPC(enc); |
|
328 break; |
|
329 } |
|
330 *nextTokPtr = ptr + MINBPC(enc); |
|
331 return XML_TOK_CDATA_SECT_CLOSE; |
|
332 case BT_CR: |
|
333 ptr += MINBPC(enc); |
|
334 if (ptr == end) |
|
335 return XML_TOK_PARTIAL; |
|
336 if (BYTE_TYPE(enc, ptr) == BT_LF) |
|
337 ptr += MINBPC(enc); |
|
338 *nextTokPtr = ptr; |
|
339 return XML_TOK_DATA_NEWLINE; |
|
340 case BT_LF: |
|
341 *nextTokPtr = ptr + MINBPC(enc); |
|
342 return XML_TOK_DATA_NEWLINE; |
|
343 INVALID_CASES(ptr, nextTokPtr) |
|
344 default: |
|
345 ptr += MINBPC(enc); |
|
346 break; |
|
347 } |
|
348 while (ptr != end) { |
|
349 switch (BYTE_TYPE(enc, ptr)) { |
|
350 #define LEAD_CASE(n) \ |
|
351 case BT_LEAD ## n: \ |
|
352 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
|
353 *nextTokPtr = ptr; \ |
|
354 return XML_TOK_DATA_CHARS; \ |
|
355 } \ |
|
356 ptr += n; \ |
|
357 break; |
|
358 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
359 #undef LEAD_CASE |
|
360 case BT_NONXML: |
|
361 case BT_MALFORM: |
|
362 case BT_TRAIL: |
|
363 case BT_CR: |
|
364 case BT_LF: |
|
365 case BT_RSQB: |
|
366 *nextTokPtr = ptr; |
|
367 return XML_TOK_DATA_CHARS; |
|
368 default: |
|
369 ptr += MINBPC(enc); |
|
370 break; |
|
371 } |
|
372 } |
|
373 *nextTokPtr = ptr; |
|
374 return XML_TOK_DATA_CHARS; |
|
375 } |
|
376 |
|
377 /* ptr points to character following "</" */ |
|
378 |
|
379 static int PTRCALL |
|
380 PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, |
|
381 const char *end, const char **nextTokPtr) |
|
382 { |
|
383 if (ptr == end) |
|
384 return XML_TOK_PARTIAL; |
|
385 switch (BYTE_TYPE(enc, ptr)) { |
|
386 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
387 default: |
|
388 *nextTokPtr = ptr; |
|
389 return XML_TOK_INVALID; |
|
390 } |
|
391 while (ptr != end) { |
|
392 switch (BYTE_TYPE(enc, ptr)) { |
|
393 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
394 case BT_S: case BT_CR: case BT_LF: |
|
395 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { |
|
396 switch (BYTE_TYPE(enc, ptr)) { |
|
397 case BT_S: case BT_CR: case BT_LF: |
|
398 break; |
|
399 case BT_GT: |
|
400 *nextTokPtr = ptr + MINBPC(enc); |
|
401 return XML_TOK_END_TAG; |
|
402 default: |
|
403 *nextTokPtr = ptr; |
|
404 return XML_TOK_INVALID; |
|
405 } |
|
406 } |
|
407 return XML_TOK_PARTIAL; |
|
408 #ifdef XML_NS |
|
409 case BT_COLON: |
|
410 /* no need to check qname syntax here, |
|
411 since end-tag must match exactly */ |
|
412 ptr += MINBPC(enc); |
|
413 break; |
|
414 #endif |
|
415 case BT_GT: |
|
416 *nextTokPtr = ptr + MINBPC(enc); |
|
417 return XML_TOK_END_TAG; |
|
418 default: |
|
419 *nextTokPtr = ptr; |
|
420 return XML_TOK_INVALID; |
|
421 } |
|
422 } |
|
423 return XML_TOK_PARTIAL; |
|
424 } |
|
425 |
|
426 /* ptr points to character following "&#X" */ |
|
427 |
|
428 static int PTRCALL |
|
429 PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, |
|
430 const char *end, const char **nextTokPtr) |
|
431 { |
|
432 if (ptr != end) { |
|
433 switch (BYTE_TYPE(enc, ptr)) { |
|
434 case BT_DIGIT: |
|
435 case BT_HEX: |
|
436 break; |
|
437 default: |
|
438 *nextTokPtr = ptr; |
|
439 return XML_TOK_INVALID; |
|
440 } |
|
441 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { |
|
442 switch (BYTE_TYPE(enc, ptr)) { |
|
443 case BT_DIGIT: |
|
444 case BT_HEX: |
|
445 break; |
|
446 case BT_SEMI: |
|
447 *nextTokPtr = ptr + MINBPC(enc); |
|
448 return XML_TOK_CHAR_REF; |
|
449 default: |
|
450 *nextTokPtr = ptr; |
|
451 return XML_TOK_INVALID; |
|
452 } |
|
453 } |
|
454 } |
|
455 return XML_TOK_PARTIAL; |
|
456 } |
|
457 |
|
458 /* ptr points to character following "&#" */ |
|
459 |
|
460 static int PTRCALL |
|
461 PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, |
|
462 const char *end, const char **nextTokPtr) |
|
463 { |
|
464 if (ptr != end) { |
|
465 if (CHAR_MATCHES(enc, ptr, ASCII_x)) |
|
466 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
467 switch (BYTE_TYPE(enc, ptr)) { |
|
468 case BT_DIGIT: |
|
469 break; |
|
470 default: |
|
471 *nextTokPtr = ptr; |
|
472 return XML_TOK_INVALID; |
|
473 } |
|
474 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { |
|
475 switch (BYTE_TYPE(enc, ptr)) { |
|
476 case BT_DIGIT: |
|
477 break; |
|
478 case BT_SEMI: |
|
479 *nextTokPtr = ptr + MINBPC(enc); |
|
480 return XML_TOK_CHAR_REF; |
|
481 default: |
|
482 *nextTokPtr = ptr; |
|
483 return XML_TOK_INVALID; |
|
484 } |
|
485 } |
|
486 } |
|
487 return XML_TOK_PARTIAL; |
|
488 } |
|
489 |
|
490 /* ptr points to character following "&" */ |
|
491 |
|
492 static int PTRCALL |
|
493 PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, |
|
494 const char **nextTokPtr) |
|
495 { |
|
496 if (ptr == end) |
|
497 return XML_TOK_PARTIAL; |
|
498 switch (BYTE_TYPE(enc, ptr)) { |
|
499 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
500 case BT_NUM: |
|
501 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
502 default: |
|
503 *nextTokPtr = ptr; |
|
504 return XML_TOK_INVALID; |
|
505 } |
|
506 while (ptr != end) { |
|
507 switch (BYTE_TYPE(enc, ptr)) { |
|
508 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
509 case BT_SEMI: |
|
510 *nextTokPtr = ptr + MINBPC(enc); |
|
511 return XML_TOK_ENTITY_REF; |
|
512 default: |
|
513 *nextTokPtr = ptr; |
|
514 return XML_TOK_INVALID; |
|
515 } |
|
516 } |
|
517 return XML_TOK_PARTIAL; |
|
518 } |
|
519 |
|
520 /* ptr points to character following first character of attribute name */ |
|
521 |
|
522 static int PTRCALL |
|
523 PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, |
|
524 const char **nextTokPtr) |
|
525 { |
|
526 #ifdef XML_NS |
|
527 int hadColon = 0; |
|
528 #endif |
|
529 while (ptr != end) { |
|
530 switch (BYTE_TYPE(enc, ptr)) { |
|
531 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
532 #ifdef XML_NS |
|
533 case BT_COLON: |
|
534 if (hadColon) { |
|
535 *nextTokPtr = ptr; |
|
536 return XML_TOK_INVALID; |
|
537 } |
|
538 hadColon = 1; |
|
539 ptr += MINBPC(enc); |
|
540 if (ptr == end) |
|
541 return XML_TOK_PARTIAL; |
|
542 switch (BYTE_TYPE(enc, ptr)) { |
|
543 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
544 default: |
|
545 *nextTokPtr = ptr; |
|
546 return XML_TOK_INVALID; |
|
547 } |
|
548 break; |
|
549 #endif |
|
550 case BT_S: case BT_CR: case BT_LF: |
|
551 for (;;) { |
|
552 int t; |
|
553 |
|
554 ptr += MINBPC(enc); |
|
555 if (ptr == end) |
|
556 return XML_TOK_PARTIAL; |
|
557 t = BYTE_TYPE(enc, ptr); |
|
558 if (t == BT_EQUALS) |
|
559 break; |
|
560 switch (t) { |
|
561 case BT_S: |
|
562 case BT_LF: |
|
563 case BT_CR: |
|
564 break; |
|
565 default: |
|
566 *nextTokPtr = ptr; |
|
567 return XML_TOK_INVALID; |
|
568 } |
|
569 } |
|
570 /* fall through */ |
|
571 case BT_EQUALS: |
|
572 { |
|
573 int open; |
|
574 #ifdef XML_NS |
|
575 hadColon = 0; |
|
576 #endif |
|
577 for (;;) { |
|
578 ptr += MINBPC(enc); |
|
579 if (ptr == end) |
|
580 return XML_TOK_PARTIAL; |
|
581 open = BYTE_TYPE(enc, ptr); |
|
582 if (open == BT_QUOT || open == BT_APOS) |
|
583 break; |
|
584 switch (open) { |
|
585 case BT_S: |
|
586 case BT_LF: |
|
587 case BT_CR: |
|
588 break; |
|
589 default: |
|
590 *nextTokPtr = ptr; |
|
591 return XML_TOK_INVALID; |
|
592 } |
|
593 } |
|
594 ptr += MINBPC(enc); |
|
595 /* in attribute value */ |
|
596 for (;;) { |
|
597 int t; |
|
598 if (ptr == end) |
|
599 return XML_TOK_PARTIAL; |
|
600 t = BYTE_TYPE(enc, ptr); |
|
601 if (t == open) |
|
602 break; |
|
603 switch (t) { |
|
604 INVALID_CASES(ptr, nextTokPtr) |
|
605 case BT_AMP: |
|
606 { |
|
607 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); |
|
608 if (tok <= 0) { |
|
609 if (tok == XML_TOK_INVALID) |
|
610 *nextTokPtr = ptr; |
|
611 return tok; |
|
612 } |
|
613 break; |
|
614 } |
|
615 case BT_LT: |
|
616 *nextTokPtr = ptr; |
|
617 return XML_TOK_INVALID; |
|
618 default: |
|
619 ptr += MINBPC(enc); |
|
620 break; |
|
621 } |
|
622 } |
|
623 ptr += MINBPC(enc); |
|
624 if (ptr == end) |
|
625 return XML_TOK_PARTIAL; |
|
626 switch (BYTE_TYPE(enc, ptr)) { |
|
627 case BT_S: |
|
628 case BT_CR: |
|
629 case BT_LF: |
|
630 break; |
|
631 case BT_SOL: |
|
632 goto sol; |
|
633 case BT_GT: |
|
634 goto gt; |
|
635 default: |
|
636 *nextTokPtr = ptr; |
|
637 return XML_TOK_INVALID; |
|
638 } |
|
639 /* ptr points to closing quote */ |
|
640 for (;;) { |
|
641 ptr += MINBPC(enc); |
|
642 if (ptr == end) |
|
643 return XML_TOK_PARTIAL; |
|
644 switch (BYTE_TYPE(enc, ptr)) { |
|
645 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
646 case BT_S: case BT_CR: case BT_LF: |
|
647 continue; |
|
648 case BT_GT: |
|
649 gt: |
|
650 *nextTokPtr = ptr + MINBPC(enc); |
|
651 return XML_TOK_START_TAG_WITH_ATTS; |
|
652 case BT_SOL: |
|
653 sol: |
|
654 ptr += MINBPC(enc); |
|
655 if (ptr == end) |
|
656 return XML_TOK_PARTIAL; |
|
657 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
|
658 *nextTokPtr = ptr; |
|
659 return XML_TOK_INVALID; |
|
660 } |
|
661 *nextTokPtr = ptr + MINBPC(enc); |
|
662 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; |
|
663 default: |
|
664 *nextTokPtr = ptr; |
|
665 return XML_TOK_INVALID; |
|
666 } |
|
667 break; |
|
668 } |
|
669 break; |
|
670 } |
|
671 default: |
|
672 *nextTokPtr = ptr; |
|
673 return XML_TOK_INVALID; |
|
674 } |
|
675 } |
|
676 return XML_TOK_PARTIAL; |
|
677 } |
|
678 |
|
679 /* ptr points to character following "<" */ |
|
680 |
|
681 static int PTRCALL |
|
682 PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, |
|
683 const char **nextTokPtr) |
|
684 { |
|
685 #ifdef XML_NS |
|
686 int hadColon; |
|
687 #endif |
|
688 if (ptr == end) |
|
689 return XML_TOK_PARTIAL; |
|
690 switch (BYTE_TYPE(enc, ptr)) { |
|
691 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
692 case BT_EXCL: |
|
693 if ((ptr += MINBPC(enc)) == end) |
|
694 return XML_TOK_PARTIAL; |
|
695 switch (BYTE_TYPE(enc, ptr)) { |
|
696 case BT_MINUS: |
|
697 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
698 case BT_LSQB: |
|
699 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), |
|
700 end, nextTokPtr); |
|
701 } |
|
702 *nextTokPtr = ptr; |
|
703 return XML_TOK_INVALID; |
|
704 case BT_QUEST: |
|
705 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
706 case BT_SOL: |
|
707 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
708 default: |
|
709 *nextTokPtr = ptr; |
|
710 return XML_TOK_INVALID; |
|
711 } |
|
712 #ifdef XML_NS |
|
713 hadColon = 0; |
|
714 #endif |
|
715 /* we have a start-tag */ |
|
716 while (ptr != end) { |
|
717 switch (BYTE_TYPE(enc, ptr)) { |
|
718 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
719 #ifdef XML_NS |
|
720 case BT_COLON: |
|
721 if (hadColon) { |
|
722 *nextTokPtr = ptr; |
|
723 return XML_TOK_INVALID; |
|
724 } |
|
725 hadColon = 1; |
|
726 ptr += MINBPC(enc); |
|
727 if (ptr == end) |
|
728 return XML_TOK_PARTIAL; |
|
729 switch (BYTE_TYPE(enc, ptr)) { |
|
730 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
731 default: |
|
732 *nextTokPtr = ptr; |
|
733 return XML_TOK_INVALID; |
|
734 } |
|
735 break; |
|
736 #endif |
|
737 case BT_S: case BT_CR: case BT_LF: |
|
738 { |
|
739 ptr += MINBPC(enc); |
|
740 while (ptr != end) { |
|
741 switch (BYTE_TYPE(enc, ptr)) { |
|
742 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
743 case BT_GT: |
|
744 goto gt; |
|
745 case BT_SOL: |
|
746 goto sol; |
|
747 case BT_S: case BT_CR: case BT_LF: |
|
748 ptr += MINBPC(enc); |
|
749 continue; |
|
750 default: |
|
751 *nextTokPtr = ptr; |
|
752 return XML_TOK_INVALID; |
|
753 } |
|
754 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); |
|
755 } |
|
756 return XML_TOK_PARTIAL; |
|
757 } |
|
758 case BT_GT: |
|
759 gt: |
|
760 *nextTokPtr = ptr + MINBPC(enc); |
|
761 return XML_TOK_START_TAG_NO_ATTS; |
|
762 case BT_SOL: |
|
763 sol: |
|
764 ptr += MINBPC(enc); |
|
765 if (ptr == end) |
|
766 return XML_TOK_PARTIAL; |
|
767 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
|
768 *nextTokPtr = ptr; |
|
769 return XML_TOK_INVALID; |
|
770 } |
|
771 *nextTokPtr = ptr + MINBPC(enc); |
|
772 return XML_TOK_EMPTY_ELEMENT_NO_ATTS; |
|
773 default: |
|
774 *nextTokPtr = ptr; |
|
775 return XML_TOK_INVALID; |
|
776 } |
|
777 } |
|
778 return XML_TOK_PARTIAL; |
|
779 } |
|
780 |
|
781 static int PTRCALL |
|
782 PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, |
|
783 const char **nextTokPtr) |
|
784 { |
|
785 if (ptr == end) |
|
786 return XML_TOK_NONE; |
|
787 if (MINBPC(enc) > 1) { |
|
788 size_t n = end - ptr; |
|
789 if (n & (MINBPC(enc) - 1)) { |
|
790 n &= ~(MINBPC(enc) - 1); |
|
791 if (n == 0) |
|
792 return XML_TOK_PARTIAL; |
|
793 end = ptr + n; |
|
794 } |
|
795 } |
|
796 switch (BYTE_TYPE(enc, ptr)) { |
|
797 case BT_LT: |
|
798 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
799 case BT_AMP: |
|
800 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
801 case BT_CR: |
|
802 ptr += MINBPC(enc); |
|
803 if (ptr == end) |
|
804 return XML_TOK_TRAILING_CR; |
|
805 if (BYTE_TYPE(enc, ptr) == BT_LF) |
|
806 ptr += MINBPC(enc); |
|
807 *nextTokPtr = ptr; |
|
808 return XML_TOK_DATA_NEWLINE; |
|
809 case BT_LF: |
|
810 *nextTokPtr = ptr + MINBPC(enc); |
|
811 return XML_TOK_DATA_NEWLINE; |
|
812 case BT_RSQB: |
|
813 ptr += MINBPC(enc); |
|
814 if (ptr == end) |
|
815 return XML_TOK_TRAILING_RSQB; |
|
816 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) |
|
817 break; |
|
818 ptr += MINBPC(enc); |
|
819 if (ptr == end) |
|
820 return XML_TOK_TRAILING_RSQB; |
|
821 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
|
822 ptr -= MINBPC(enc); |
|
823 break; |
|
824 } |
|
825 *nextTokPtr = ptr; |
|
826 return XML_TOK_INVALID; |
|
827 INVALID_CASES(ptr, nextTokPtr) |
|
828 default: |
|
829 ptr += MINBPC(enc); |
|
830 break; |
|
831 } |
|
832 while (ptr != end) { |
|
833 switch (BYTE_TYPE(enc, ptr)) { |
|
834 #define LEAD_CASE(n) \ |
|
835 case BT_LEAD ## n: \ |
|
836 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
|
837 *nextTokPtr = ptr; \ |
|
838 return XML_TOK_DATA_CHARS; \ |
|
839 } \ |
|
840 ptr += n; \ |
|
841 break; |
|
842 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
843 #undef LEAD_CASE |
|
844 case BT_RSQB: |
|
845 if (ptr + MINBPC(enc) != end) { |
|
846 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { |
|
847 ptr += MINBPC(enc); |
|
848 break; |
|
849 } |
|
850 if (ptr + 2*MINBPC(enc) != end) { |
|
851 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { |
|
852 ptr += MINBPC(enc); |
|
853 break; |
|
854 } |
|
855 *nextTokPtr = ptr + 2*MINBPC(enc); |
|
856 return XML_TOK_INVALID; |
|
857 } |
|
858 } |
|
859 /* fall through */ |
|
860 case BT_AMP: |
|
861 case BT_LT: |
|
862 case BT_NONXML: |
|
863 case BT_MALFORM: |
|
864 case BT_TRAIL: |
|
865 case BT_CR: |
|
866 case BT_LF: |
|
867 *nextTokPtr = ptr; |
|
868 return XML_TOK_DATA_CHARS; |
|
869 default: |
|
870 ptr += MINBPC(enc); |
|
871 break; |
|
872 } |
|
873 } |
|
874 *nextTokPtr = ptr; |
|
875 return XML_TOK_DATA_CHARS; |
|
876 } |
|
877 |
|
878 /* ptr points to character following "%" */ |
|
879 |
|
880 static int PTRCALL |
|
881 PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, |
|
882 const char **nextTokPtr) |
|
883 { |
|
884 if (ptr == end) |
|
885 return -XML_TOK_PERCENT; |
|
886 switch (BYTE_TYPE(enc, ptr)) { |
|
887 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
888 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: |
|
889 *nextTokPtr = ptr; |
|
890 return XML_TOK_PERCENT; |
|
891 default: |
|
892 *nextTokPtr = ptr; |
|
893 return XML_TOK_INVALID; |
|
894 } |
|
895 while (ptr != end) { |
|
896 switch (BYTE_TYPE(enc, ptr)) { |
|
897 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
898 case BT_SEMI: |
|
899 *nextTokPtr = ptr + MINBPC(enc); |
|
900 return XML_TOK_PARAM_ENTITY_REF; |
|
901 default: |
|
902 *nextTokPtr = ptr; |
|
903 return XML_TOK_INVALID; |
|
904 } |
|
905 } |
|
906 return XML_TOK_PARTIAL; |
|
907 } |
|
908 |
|
909 static int PTRCALL |
|
910 PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, |
|
911 const char **nextTokPtr) |
|
912 { |
|
913 if (ptr == end) |
|
914 return XML_TOK_PARTIAL; |
|
915 switch (BYTE_TYPE(enc, ptr)) { |
|
916 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
917 default: |
|
918 *nextTokPtr = ptr; |
|
919 return XML_TOK_INVALID; |
|
920 } |
|
921 while (ptr != end) { |
|
922 switch (BYTE_TYPE(enc, ptr)) { |
|
923 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
924 case BT_CR: case BT_LF: case BT_S: |
|
925 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: |
|
926 *nextTokPtr = ptr; |
|
927 return XML_TOK_POUND_NAME; |
|
928 default: |
|
929 *nextTokPtr = ptr; |
|
930 return XML_TOK_INVALID; |
|
931 } |
|
932 } |
|
933 return -XML_TOK_POUND_NAME; |
|
934 } |
|
935 |
|
936 static int PTRCALL |
|
937 PREFIX(scanLit)(int open, const ENCODING *enc, |
|
938 const char *ptr, const char *end, |
|
939 const char **nextTokPtr) |
|
940 { |
|
941 while (ptr != end) { |
|
942 int t = BYTE_TYPE(enc, ptr); |
|
943 switch (t) { |
|
944 INVALID_CASES(ptr, nextTokPtr) |
|
945 case BT_QUOT: |
|
946 case BT_APOS: |
|
947 ptr += MINBPC(enc); |
|
948 if (t != open) |
|
949 break; |
|
950 if (ptr == end) |
|
951 return -XML_TOK_LITERAL; |
|
952 *nextTokPtr = ptr; |
|
953 switch (BYTE_TYPE(enc, ptr)) { |
|
954 case BT_S: case BT_CR: case BT_LF: |
|
955 case BT_GT: case BT_PERCNT: case BT_LSQB: |
|
956 return XML_TOK_LITERAL; |
|
957 default: |
|
958 return XML_TOK_INVALID; |
|
959 } |
|
960 default: |
|
961 ptr += MINBPC(enc); |
|
962 break; |
|
963 } |
|
964 } |
|
965 return XML_TOK_PARTIAL; |
|
966 } |
|
967 |
|
968 static int PTRCALL |
|
969 PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, |
|
970 const char **nextTokPtr) |
|
971 { |
|
972 int tok; |
|
973 if (ptr == end) |
|
974 return XML_TOK_NONE; |
|
975 if (MINBPC(enc) > 1) { |
|
976 size_t n = end - ptr; |
|
977 if (n & (MINBPC(enc) - 1)) { |
|
978 n &= ~(MINBPC(enc) - 1); |
|
979 if (n == 0) |
|
980 return XML_TOK_PARTIAL; |
|
981 end = ptr + n; |
|
982 } |
|
983 } |
|
984 switch (BYTE_TYPE(enc, ptr)) { |
|
985 case BT_QUOT: |
|
986 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
987 case BT_APOS: |
|
988 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
989 case BT_LT: |
|
990 { |
|
991 ptr += MINBPC(enc); |
|
992 if (ptr == end) |
|
993 return XML_TOK_PARTIAL; |
|
994 switch (BYTE_TYPE(enc, ptr)) { |
|
995 case BT_EXCL: |
|
996 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
997 case BT_QUEST: |
|
998 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
999 case BT_NMSTRT: |
|
1000 case BT_HEX: |
|
1001 case BT_NONASCII: |
|
1002 case BT_LEAD2: |
|
1003 case BT_LEAD3: |
|
1004 case BT_LEAD4: |
|
1005 *nextTokPtr = ptr - MINBPC(enc); |
|
1006 return XML_TOK_INSTANCE_START; |
|
1007 } |
|
1008 *nextTokPtr = ptr; |
|
1009 return XML_TOK_INVALID; |
|
1010 } |
|
1011 case BT_CR: |
|
1012 if (ptr + MINBPC(enc) == end) { |
|
1013 *nextTokPtr = end; |
|
1014 /* indicate that this might be part of a CR/LF pair */ |
|
1015 return -XML_TOK_PROLOG_S; |
|
1016 } |
|
1017 /* fall through */ |
|
1018 case BT_S: case BT_LF: |
|
1019 for (;;) { |
|
1020 ptr += MINBPC(enc); |
|
1021 if (ptr == end) |
|
1022 break; |
|
1023 switch (BYTE_TYPE(enc, ptr)) { |
|
1024 case BT_S: case BT_LF: |
|
1025 break; |
|
1026 case BT_CR: |
|
1027 /* don't split CR/LF pair */ |
|
1028 if (ptr + MINBPC(enc) != end) |
|
1029 break; |
|
1030 /* fall through */ |
|
1031 default: |
|
1032 *nextTokPtr = ptr; |
|
1033 return XML_TOK_PROLOG_S; |
|
1034 } |
|
1035 } |
|
1036 *nextTokPtr = ptr; |
|
1037 return XML_TOK_PROLOG_S; |
|
1038 case BT_PERCNT: |
|
1039 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1040 case BT_COMMA: |
|
1041 *nextTokPtr = ptr + MINBPC(enc); |
|
1042 return XML_TOK_COMMA; |
|
1043 case BT_LSQB: |
|
1044 *nextTokPtr = ptr + MINBPC(enc); |
|
1045 return XML_TOK_OPEN_BRACKET; |
|
1046 case BT_RSQB: |
|
1047 ptr += MINBPC(enc); |
|
1048 if (ptr == end) |
|
1049 return -XML_TOK_CLOSE_BRACKET; |
|
1050 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { |
|
1051 if (ptr + MINBPC(enc) == end) |
|
1052 return XML_TOK_PARTIAL; |
|
1053 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { |
|
1054 *nextTokPtr = ptr + 2*MINBPC(enc); |
|
1055 return XML_TOK_COND_SECT_CLOSE; |
|
1056 } |
|
1057 } |
|
1058 *nextTokPtr = ptr; |
|
1059 return XML_TOK_CLOSE_BRACKET; |
|
1060 case BT_LPAR: |
|
1061 *nextTokPtr = ptr + MINBPC(enc); |
|
1062 return XML_TOK_OPEN_PAREN; |
|
1063 case BT_RPAR: |
|
1064 ptr += MINBPC(enc); |
|
1065 if (ptr == end) |
|
1066 return -XML_TOK_CLOSE_PAREN; |
|
1067 switch (BYTE_TYPE(enc, ptr)) { |
|
1068 case BT_AST: |
|
1069 *nextTokPtr = ptr + MINBPC(enc); |
|
1070 return XML_TOK_CLOSE_PAREN_ASTERISK; |
|
1071 case BT_QUEST: |
|
1072 *nextTokPtr = ptr + MINBPC(enc); |
|
1073 return XML_TOK_CLOSE_PAREN_QUESTION; |
|
1074 case BT_PLUS: |
|
1075 *nextTokPtr = ptr + MINBPC(enc); |
|
1076 return XML_TOK_CLOSE_PAREN_PLUS; |
|
1077 case BT_CR: case BT_LF: case BT_S: |
|
1078 case BT_GT: case BT_COMMA: case BT_VERBAR: |
|
1079 case BT_RPAR: |
|
1080 *nextTokPtr = ptr; |
|
1081 return XML_TOK_CLOSE_PAREN; |
|
1082 } |
|
1083 *nextTokPtr = ptr; |
|
1084 return XML_TOK_INVALID; |
|
1085 case BT_VERBAR: |
|
1086 *nextTokPtr = ptr + MINBPC(enc); |
|
1087 return XML_TOK_OR; |
|
1088 case BT_GT: |
|
1089 *nextTokPtr = ptr + MINBPC(enc); |
|
1090 return XML_TOK_DECL_CLOSE; |
|
1091 case BT_NUM: |
|
1092 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1093 #define LEAD_CASE(n) \ |
|
1094 case BT_LEAD ## n: \ |
|
1095 if (end - ptr < n) \ |
|
1096 return XML_TOK_PARTIAL_CHAR; \ |
|
1097 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
|
1098 ptr += n; \ |
|
1099 tok = XML_TOK_NAME; \ |
|
1100 break; \ |
|
1101 } \ |
|
1102 if (IS_NAME_CHAR(enc, ptr, n)) { \ |
|
1103 ptr += n; \ |
|
1104 tok = XML_TOK_NMTOKEN; \ |
|
1105 break; \ |
|
1106 } \ |
|
1107 *nextTokPtr = ptr; \ |
|
1108 return XML_TOK_INVALID; |
|
1109 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1110 #undef LEAD_CASE |
|
1111 case BT_NMSTRT: |
|
1112 case BT_HEX: |
|
1113 tok = XML_TOK_NAME; |
|
1114 ptr += MINBPC(enc); |
|
1115 break; |
|
1116 case BT_DIGIT: |
|
1117 case BT_NAME: |
|
1118 case BT_MINUS: |
|
1119 #ifdef XML_NS |
|
1120 case BT_COLON: |
|
1121 #endif |
|
1122 tok = XML_TOK_NMTOKEN; |
|
1123 ptr += MINBPC(enc); |
|
1124 break; |
|
1125 case BT_NONASCII: |
|
1126 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { |
|
1127 ptr += MINBPC(enc); |
|
1128 tok = XML_TOK_NAME; |
|
1129 break; |
|
1130 } |
|
1131 if (IS_NAME_CHAR_MINBPC(enc, ptr)) { |
|
1132 ptr += MINBPC(enc); |
|
1133 tok = XML_TOK_NMTOKEN; |
|
1134 break; |
|
1135 } |
|
1136 /* fall through */ |
|
1137 default: |
|
1138 *nextTokPtr = ptr; |
|
1139 return XML_TOK_INVALID; |
|
1140 } |
|
1141 while (ptr != end) { |
|
1142 switch (BYTE_TYPE(enc, ptr)) { |
|
1143 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
1144 case BT_GT: case BT_RPAR: case BT_COMMA: |
|
1145 case BT_VERBAR: case BT_LSQB: case BT_PERCNT: |
|
1146 case BT_S: case BT_CR: case BT_LF: |
|
1147 *nextTokPtr = ptr; |
|
1148 return tok; |
|
1149 #ifdef XML_NS |
|
1150 case BT_COLON: |
|
1151 ptr += MINBPC(enc); |
|
1152 switch (tok) { |
|
1153 case XML_TOK_NAME: |
|
1154 if (ptr == end) |
|
1155 return XML_TOK_PARTIAL; |
|
1156 tok = XML_TOK_PREFIXED_NAME; |
|
1157 switch (BYTE_TYPE(enc, ptr)) { |
|
1158 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
1159 default: |
|
1160 tok = XML_TOK_NMTOKEN; |
|
1161 break; |
|
1162 } |
|
1163 break; |
|
1164 case XML_TOK_PREFIXED_NAME: |
|
1165 tok = XML_TOK_NMTOKEN; |
|
1166 break; |
|
1167 } |
|
1168 break; |
|
1169 #endif |
|
1170 case BT_PLUS: |
|
1171 if (tok == XML_TOK_NMTOKEN) { |
|
1172 *nextTokPtr = ptr; |
|
1173 return XML_TOK_INVALID; |
|
1174 } |
|
1175 *nextTokPtr = ptr + MINBPC(enc); |
|
1176 return XML_TOK_NAME_PLUS; |
|
1177 case BT_AST: |
|
1178 if (tok == XML_TOK_NMTOKEN) { |
|
1179 *nextTokPtr = ptr; |
|
1180 return XML_TOK_INVALID; |
|
1181 } |
|
1182 *nextTokPtr = ptr + MINBPC(enc); |
|
1183 return XML_TOK_NAME_ASTERISK; |
|
1184 case BT_QUEST: |
|
1185 if (tok == XML_TOK_NMTOKEN) { |
|
1186 *nextTokPtr = ptr; |
|
1187 return XML_TOK_INVALID; |
|
1188 } |
|
1189 *nextTokPtr = ptr + MINBPC(enc); |
|
1190 return XML_TOK_NAME_QUESTION; |
|
1191 default: |
|
1192 *nextTokPtr = ptr; |
|
1193 return XML_TOK_INVALID; |
|
1194 } |
|
1195 } |
|
1196 return -tok; |
|
1197 } |
|
1198 |
|
1199 static int PTRCALL |
|
1200 PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, |
|
1201 const char *end, const char **nextTokPtr) |
|
1202 { |
|
1203 const char *start; |
|
1204 if (ptr == end) |
|
1205 return XML_TOK_NONE; |
|
1206 start = ptr; |
|
1207 while (ptr != end) { |
|
1208 switch (BYTE_TYPE(enc, ptr)) { |
|
1209 #define LEAD_CASE(n) \ |
|
1210 case BT_LEAD ## n: ptr += n; break; |
|
1211 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1212 #undef LEAD_CASE |
|
1213 case BT_AMP: |
|
1214 if (ptr == start) |
|
1215 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1216 *nextTokPtr = ptr; |
|
1217 return XML_TOK_DATA_CHARS; |
|
1218 case BT_LT: |
|
1219 /* this is for inside entity references */ |
|
1220 *nextTokPtr = ptr; |
|
1221 return XML_TOK_INVALID; |
|
1222 case BT_LF: |
|
1223 if (ptr == start) { |
|
1224 *nextTokPtr = ptr + MINBPC(enc); |
|
1225 return XML_TOK_DATA_NEWLINE; |
|
1226 } |
|
1227 *nextTokPtr = ptr; |
|
1228 return XML_TOK_DATA_CHARS; |
|
1229 case BT_CR: |
|
1230 if (ptr == start) { |
|
1231 ptr += MINBPC(enc); |
|
1232 if (ptr == end) |
|
1233 return XML_TOK_TRAILING_CR; |
|
1234 if (BYTE_TYPE(enc, ptr) == BT_LF) |
|
1235 ptr += MINBPC(enc); |
|
1236 *nextTokPtr = ptr; |
|
1237 return XML_TOK_DATA_NEWLINE; |
|
1238 } |
|
1239 *nextTokPtr = ptr; |
|
1240 return XML_TOK_DATA_CHARS; |
|
1241 case BT_S: |
|
1242 if (ptr == start) { |
|
1243 *nextTokPtr = ptr + MINBPC(enc); |
|
1244 return XML_TOK_ATTRIBUTE_VALUE_S; |
|
1245 } |
|
1246 *nextTokPtr = ptr; |
|
1247 return XML_TOK_DATA_CHARS; |
|
1248 default: |
|
1249 ptr += MINBPC(enc); |
|
1250 break; |
|
1251 } |
|
1252 } |
|
1253 *nextTokPtr = ptr; |
|
1254 return XML_TOK_DATA_CHARS; |
|
1255 } |
|
1256 |
|
1257 static int PTRCALL |
|
1258 PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, |
|
1259 const char *end, const char **nextTokPtr) |
|
1260 { |
|
1261 const char *start; |
|
1262 if (ptr == end) |
|
1263 return XML_TOK_NONE; |
|
1264 start = ptr; |
|
1265 while (ptr != end) { |
|
1266 switch (BYTE_TYPE(enc, ptr)) { |
|
1267 #define LEAD_CASE(n) \ |
|
1268 case BT_LEAD ## n: ptr += n; break; |
|
1269 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1270 #undef LEAD_CASE |
|
1271 case BT_AMP: |
|
1272 if (ptr == start) |
|
1273 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1274 *nextTokPtr = ptr; |
|
1275 return XML_TOK_DATA_CHARS; |
|
1276 case BT_PERCNT: |
|
1277 if (ptr == start) { |
|
1278 int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), |
|
1279 end, nextTokPtr); |
|
1280 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; |
|
1281 } |
|
1282 *nextTokPtr = ptr; |
|
1283 return XML_TOK_DATA_CHARS; |
|
1284 case BT_LF: |
|
1285 if (ptr == start) { |
|
1286 *nextTokPtr = ptr + MINBPC(enc); |
|
1287 return XML_TOK_DATA_NEWLINE; |
|
1288 } |
|
1289 *nextTokPtr = ptr; |
|
1290 return XML_TOK_DATA_CHARS; |
|
1291 case BT_CR: |
|
1292 if (ptr == start) { |
|
1293 ptr += MINBPC(enc); |
|
1294 if (ptr == end) |
|
1295 return XML_TOK_TRAILING_CR; |
|
1296 if (BYTE_TYPE(enc, ptr) == BT_LF) |
|
1297 ptr += MINBPC(enc); |
|
1298 *nextTokPtr = ptr; |
|
1299 return XML_TOK_DATA_NEWLINE; |
|
1300 } |
|
1301 *nextTokPtr = ptr; |
|
1302 return XML_TOK_DATA_CHARS; |
|
1303 default: |
|
1304 ptr += MINBPC(enc); |
|
1305 break; |
|
1306 } |
|
1307 } |
|
1308 *nextTokPtr = ptr; |
|
1309 return XML_TOK_DATA_CHARS; |
|
1310 } |
|
1311 |
|
1312 #ifdef XML_DTD |
|
1313 |
|
1314 static int PTRCALL |
|
1315 PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, |
|
1316 const char *end, const char **nextTokPtr) |
|
1317 { |
|
1318 int level = 0; |
|
1319 if (MINBPC(enc) > 1) { |
|
1320 size_t n = end - ptr; |
|
1321 if (n & (MINBPC(enc) - 1)) { |
|
1322 n &= ~(MINBPC(enc) - 1); |
|
1323 end = ptr + n; |
|
1324 } |
|
1325 } |
|
1326 while (ptr != end) { |
|
1327 switch (BYTE_TYPE(enc, ptr)) { |
|
1328 INVALID_CASES(ptr, nextTokPtr) |
|
1329 case BT_LT: |
|
1330 if ((ptr += MINBPC(enc)) == end) |
|
1331 return XML_TOK_PARTIAL; |
|
1332 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { |
|
1333 if ((ptr += MINBPC(enc)) == end) |
|
1334 return XML_TOK_PARTIAL; |
|
1335 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { |
|
1336 ++level; |
|
1337 ptr += MINBPC(enc); |
|
1338 } |
|
1339 } |
|
1340 break; |
|
1341 case BT_RSQB: |
|
1342 if ((ptr += MINBPC(enc)) == end) |
|
1343 return XML_TOK_PARTIAL; |
|
1344 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { |
|
1345 if ((ptr += MINBPC(enc)) == end) |
|
1346 return XML_TOK_PARTIAL; |
|
1347 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
|
1348 ptr += MINBPC(enc); |
|
1349 if (level == 0) { |
|
1350 *nextTokPtr = ptr; |
|
1351 return XML_TOK_IGNORE_SECT; |
|
1352 } |
|
1353 --level; |
|
1354 } |
|
1355 } |
|
1356 break; |
|
1357 default: |
|
1358 ptr += MINBPC(enc); |
|
1359 break; |
|
1360 } |
|
1361 } |
|
1362 return XML_TOK_PARTIAL; |
|
1363 } |
|
1364 |
|
1365 #endif /* XML_DTD */ |
|
1366 |
|
1367 static int PTRCALL |
|
1368 PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, |
|
1369 const char **badPtr) |
|
1370 { |
|
1371 ptr += MINBPC(enc); |
|
1372 end -= MINBPC(enc); |
|
1373 for (; ptr != end; ptr += MINBPC(enc)) { |
|
1374 switch (BYTE_TYPE(enc, ptr)) { |
|
1375 case BT_DIGIT: |
|
1376 case BT_HEX: |
|
1377 case BT_MINUS: |
|
1378 case BT_APOS: |
|
1379 case BT_LPAR: |
|
1380 case BT_RPAR: |
|
1381 case BT_PLUS: |
|
1382 case BT_COMMA: |
|
1383 case BT_SOL: |
|
1384 case BT_EQUALS: |
|
1385 case BT_QUEST: |
|
1386 case BT_CR: |
|
1387 case BT_LF: |
|
1388 case BT_SEMI: |
|
1389 case BT_EXCL: |
|
1390 case BT_AST: |
|
1391 case BT_PERCNT: |
|
1392 case BT_NUM: |
|
1393 #ifdef XML_NS |
|
1394 case BT_COLON: |
|
1395 #endif |
|
1396 break; |
|
1397 case BT_S: |
|
1398 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { |
|
1399 *badPtr = ptr; |
|
1400 return 0; |
|
1401 } |
|
1402 break; |
|
1403 case BT_NAME: |
|
1404 case BT_NMSTRT: |
|
1405 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) |
|
1406 break; |
|
1407 default: |
|
1408 switch (BYTE_TO_ASCII(enc, ptr)) { |
|
1409 case 0x24: /* $ */ |
|
1410 case 0x40: /* @ */ |
|
1411 break; |
|
1412 default: |
|
1413 *badPtr = ptr; |
|
1414 return 0; |
|
1415 } |
|
1416 break; |
|
1417 } |
|
1418 } |
|
1419 return 1; |
|
1420 } |
|
1421 |
|
1422 /* This must only be called for a well-formed start-tag or empty |
|
1423 element tag. Returns the number of attributes. Pointers to the |
|
1424 first attsMax attributes are stored in atts. |
|
1425 */ |
|
1426 |
|
1427 static int PTRCALL |
|
1428 PREFIX(getAtts)(const ENCODING *enc, const char *ptr, |
|
1429 int attsMax, ATTRIBUTE *atts) |
|
1430 { |
|
1431 enum { other, inName, inValue } state = inName; |
|
1432 int nAtts = 0; |
|
1433 int open = 0; /* defined when state == inValue; |
|
1434 initialization just to shut up compilers */ |
|
1435 |
|
1436 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { |
|
1437 switch (BYTE_TYPE(enc, ptr)) { |
|
1438 #define START_NAME \ |
|
1439 if (state == other) { \ |
|
1440 if (nAtts < attsMax) { \ |
|
1441 atts[nAtts].name = ptr; \ |
|
1442 atts[nAtts].normalized = 1; \ |
|
1443 } \ |
|
1444 state = inName; \ |
|
1445 } |
|
1446 #define LEAD_CASE(n) \ |
|
1447 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; |
|
1448 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1449 #undef LEAD_CASE |
|
1450 case BT_NONASCII: |
|
1451 case BT_NMSTRT: |
|
1452 case BT_HEX: |
|
1453 START_NAME |
|
1454 break; |
|
1455 #undef START_NAME |
|
1456 case BT_QUOT: |
|
1457 if (state != inValue) { |
|
1458 if (nAtts < attsMax) |
|
1459 atts[nAtts].valuePtr = ptr + MINBPC(enc); |
|
1460 state = inValue; |
|
1461 open = BT_QUOT; |
|
1462 } |
|
1463 else if (open == BT_QUOT) { |
|
1464 state = other; |
|
1465 if (nAtts < attsMax) |
|
1466 atts[nAtts].valueEnd = ptr; |
|
1467 nAtts++; |
|
1468 } |
|
1469 break; |
|
1470 case BT_APOS: |
|
1471 if (state != inValue) { |
|
1472 if (nAtts < attsMax) |
|
1473 atts[nAtts].valuePtr = ptr + MINBPC(enc); |
|
1474 state = inValue; |
|
1475 open = BT_APOS; |
|
1476 } |
|
1477 else if (open == BT_APOS) { |
|
1478 state = other; |
|
1479 if (nAtts < attsMax) |
|
1480 atts[nAtts].valueEnd = ptr; |
|
1481 nAtts++; |
|
1482 } |
|
1483 break; |
|
1484 case BT_AMP: |
|
1485 if (nAtts < attsMax) |
|
1486 atts[nAtts].normalized = 0; |
|
1487 break; |
|
1488 case BT_S: |
|
1489 if (state == inName) |
|
1490 state = other; |
|
1491 else if (state == inValue |
|
1492 && nAtts < attsMax |
|
1493 && atts[nAtts].normalized |
|
1494 && (ptr == atts[nAtts].valuePtr |
|
1495 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE |
|
1496 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE |
|
1497 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) |
|
1498 atts[nAtts].normalized = 0; |
|
1499 break; |
|
1500 case BT_CR: case BT_LF: |
|
1501 /* This case ensures that the first attribute name is counted |
|
1502 Apart from that we could just change state on the quote. */ |
|
1503 if (state == inName) |
|
1504 state = other; |
|
1505 else if (state == inValue && nAtts < attsMax) |
|
1506 atts[nAtts].normalized = 0; |
|
1507 break; |
|
1508 case BT_GT: |
|
1509 case BT_SOL: |
|
1510 if (state != inValue) |
|
1511 return nAtts; |
|
1512 break; |
|
1513 default: |
|
1514 break; |
|
1515 } |
|
1516 } |
|
1517 /* not reached */ |
|
1518 } |
|
1519 |
|
1520 static int PTRFASTCALL |
|
1521 PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) |
|
1522 { |
|
1523 int result = 0; |
|
1524 /* skip &# */ |
|
1525 ptr += 2*MINBPC(enc); |
|
1526 if (CHAR_MATCHES(enc, ptr, ASCII_x)) { |
|
1527 for (ptr += MINBPC(enc); |
|
1528 !CHAR_MATCHES(enc, ptr, ASCII_SEMI); |
|
1529 ptr += MINBPC(enc)) { |
|
1530 int c = BYTE_TO_ASCII(enc, ptr); |
|
1531 switch (c) { |
|
1532 case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: |
|
1533 case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: |
|
1534 result <<= 4; |
|
1535 result |= (c - ASCII_0); |
|
1536 break; |
|
1537 case ASCII_A: case ASCII_B: case ASCII_C: |
|
1538 case ASCII_D: case ASCII_E: case ASCII_F: |
|
1539 result <<= 4; |
|
1540 result += 10 + (c - ASCII_A); |
|
1541 break; |
|
1542 case ASCII_a: case ASCII_b: case ASCII_c: |
|
1543 case ASCII_d: case ASCII_e: case ASCII_f: |
|
1544 result <<= 4; |
|
1545 result += 10 + (c - ASCII_a); |
|
1546 break; |
|
1547 } |
|
1548 if (result >= 0x110000) |
|
1549 return -1; |
|
1550 } |
|
1551 } |
|
1552 else { |
|
1553 for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { |
|
1554 int c = BYTE_TO_ASCII(enc, ptr); |
|
1555 result *= 10; |
|
1556 result += (c - ASCII_0); |
|
1557 if (result >= 0x110000) |
|
1558 return -1; |
|
1559 } |
|
1560 } |
|
1561 return checkCharRefNumber(result); |
|
1562 } |
|
1563 |
|
1564 static int PTRCALL |
|
1565 PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, |
|
1566 const char *end) |
|
1567 { |
|
1568 switch ((end - ptr)/MINBPC(enc)) { |
|
1569 case 2: |
|
1570 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { |
|
1571 switch (BYTE_TO_ASCII(enc, ptr)) { |
|
1572 case ASCII_l: |
|
1573 return ASCII_LT; |
|
1574 case ASCII_g: |
|
1575 return ASCII_GT; |
|
1576 } |
|
1577 } |
|
1578 break; |
|
1579 case 3: |
|
1580 if (CHAR_MATCHES(enc, ptr, ASCII_a)) { |
|
1581 ptr += MINBPC(enc); |
|
1582 if (CHAR_MATCHES(enc, ptr, ASCII_m)) { |
|
1583 ptr += MINBPC(enc); |
|
1584 if (CHAR_MATCHES(enc, ptr, ASCII_p)) |
|
1585 return ASCII_AMP; |
|
1586 } |
|
1587 } |
|
1588 break; |
|
1589 case 4: |
|
1590 switch (BYTE_TO_ASCII(enc, ptr)) { |
|
1591 case ASCII_q: |
|
1592 ptr += MINBPC(enc); |
|
1593 if (CHAR_MATCHES(enc, ptr, ASCII_u)) { |
|
1594 ptr += MINBPC(enc); |
|
1595 if (CHAR_MATCHES(enc, ptr, ASCII_o)) { |
|
1596 ptr += MINBPC(enc); |
|
1597 if (CHAR_MATCHES(enc, ptr, ASCII_t)) |
|
1598 return ASCII_QUOT; |
|
1599 } |
|
1600 } |
|
1601 break; |
|
1602 case ASCII_a: |
|
1603 ptr += MINBPC(enc); |
|
1604 if (CHAR_MATCHES(enc, ptr, ASCII_p)) { |
|
1605 ptr += MINBPC(enc); |
|
1606 if (CHAR_MATCHES(enc, ptr, ASCII_o)) { |
|
1607 ptr += MINBPC(enc); |
|
1608 if (CHAR_MATCHES(enc, ptr, ASCII_s)) |
|
1609 return ASCII_APOS; |
|
1610 } |
|
1611 } |
|
1612 break; |
|
1613 } |
|
1614 } |
|
1615 return 0; |
|
1616 } |
|
1617 |
|
1618 static int PTRCALL |
|
1619 PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) |
|
1620 { |
|
1621 for (;;) { |
|
1622 switch (BYTE_TYPE(enc, ptr1)) { |
|
1623 #define LEAD_CASE(n) \ |
|
1624 case BT_LEAD ## n: \ |
|
1625 if (*ptr1++ != *ptr2++) \ |
|
1626 return 0; |
|
1627 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) |
|
1628 #undef LEAD_CASE |
|
1629 /* fall through */ |
|
1630 if (*ptr1++ != *ptr2++) |
|
1631 return 0; |
|
1632 break; |
|
1633 case BT_NONASCII: |
|
1634 case BT_NMSTRT: |
|
1635 #ifdef XML_NS |
|
1636 case BT_COLON: |
|
1637 #endif |
|
1638 case BT_HEX: |
|
1639 case BT_DIGIT: |
|
1640 case BT_NAME: |
|
1641 case BT_MINUS: |
|
1642 if (*ptr2++ != *ptr1++) |
|
1643 return 0; |
|
1644 if (MINBPC(enc) > 1) { |
|
1645 if (*ptr2++ != *ptr1++) |
|
1646 return 0; |
|
1647 if (MINBPC(enc) > 2) { |
|
1648 if (*ptr2++ != *ptr1++) |
|
1649 return 0; |
|
1650 if (MINBPC(enc) > 3) { |
|
1651 if (*ptr2++ != *ptr1++) |
|
1652 return 0; |
|
1653 } |
|
1654 } |
|
1655 } |
|
1656 break; |
|
1657 default: |
|
1658 if (MINBPC(enc) == 1 && *ptr1 == *ptr2) |
|
1659 return 1; |
|
1660 switch (BYTE_TYPE(enc, ptr2)) { |
|
1661 case BT_LEAD2: |
|
1662 case BT_LEAD3: |
|
1663 case BT_LEAD4: |
|
1664 case BT_NONASCII: |
|
1665 case BT_NMSTRT: |
|
1666 #ifdef XML_NS |
|
1667 case BT_COLON: |
|
1668 #endif |
|
1669 case BT_HEX: |
|
1670 case BT_DIGIT: |
|
1671 case BT_NAME: |
|
1672 case BT_MINUS: |
|
1673 return 0; |
|
1674 default: |
|
1675 return 1; |
|
1676 } |
|
1677 } |
|
1678 } |
|
1679 /* not reached */ |
|
1680 } |
|
1681 |
|
1682 static int PTRCALL |
|
1683 PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, |
|
1684 const char *end1, const char *ptr2) |
|
1685 { |
|
1686 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { |
|
1687 if (ptr1 == end1) |
|
1688 return 0; |
|
1689 if (!CHAR_MATCHES(enc, ptr1, *ptr2)) |
|
1690 return 0; |
|
1691 } |
|
1692 return ptr1 == end1; |
|
1693 } |
|
1694 |
|
1695 static int PTRFASTCALL |
|
1696 PREFIX(nameLength)(const ENCODING *enc, const char *ptr) |
|
1697 { |
|
1698 const char *start = ptr; |
|
1699 for (;;) { |
|
1700 switch (BYTE_TYPE(enc, ptr)) { |
|
1701 #define LEAD_CASE(n) \ |
|
1702 case BT_LEAD ## n: ptr += n; break; |
|
1703 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1704 #undef LEAD_CASE |
|
1705 case BT_NONASCII: |
|
1706 case BT_NMSTRT: |
|
1707 #ifdef XML_NS |
|
1708 case BT_COLON: |
|
1709 #endif |
|
1710 case BT_HEX: |
|
1711 case BT_DIGIT: |
|
1712 case BT_NAME: |
|
1713 case BT_MINUS: |
|
1714 ptr += MINBPC(enc); |
|
1715 break; |
|
1716 default: |
|
1717 return (int)(ptr - start); |
|
1718 } |
|
1719 } |
|
1720 } |
|
1721 |
|
1722 static const char * PTRFASTCALL |
|
1723 PREFIX(skipS)(const ENCODING *enc, const char *ptr) |
|
1724 { |
|
1725 for (;;) { |
|
1726 switch (BYTE_TYPE(enc, ptr)) { |
|
1727 case BT_LF: |
|
1728 case BT_CR: |
|
1729 case BT_S: |
|
1730 ptr += MINBPC(enc); |
|
1731 break; |
|
1732 default: |
|
1733 return ptr; |
|
1734 } |
|
1735 } |
|
1736 } |
|
1737 |
|
1738 static void PTRCALL |
|
1739 PREFIX(updatePosition)(const ENCODING *enc, |
|
1740 const char *ptr, |
|
1741 const char *end, |
|
1742 POSITION *pos) |
|
1743 { |
|
1744 while (ptr != end) { |
|
1745 switch (BYTE_TYPE(enc, ptr)) { |
|
1746 #define LEAD_CASE(n) \ |
|
1747 case BT_LEAD ## n: \ |
|
1748 ptr += n; \ |
|
1749 break; |
|
1750 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1751 #undef LEAD_CASE |
|
1752 case BT_LF: |
|
1753 pos->columnNumber = (XML_Size)-1; |
|
1754 pos->lineNumber++; |
|
1755 ptr += MINBPC(enc); |
|
1756 break; |
|
1757 case BT_CR: |
|
1758 pos->lineNumber++; |
|
1759 ptr += MINBPC(enc); |
|
1760 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) |
|
1761 ptr += MINBPC(enc); |
|
1762 pos->columnNumber = (XML_Size)-1; |
|
1763 break; |
|
1764 default: |
|
1765 ptr += MINBPC(enc); |
|
1766 break; |
|
1767 } |
|
1768 pos->columnNumber++; |
|
1769 } |
|
1770 } |
|
1771 |
|
1772 #undef DO_LEAD_CASE |
|
1773 #undef MULTIBYTE_CASES |
|
1774 #undef INVALID_CASES |
|
1775 #undef CHECK_NAME_CASE |
|
1776 #undef CHECK_NAME_CASES |
|
1777 #undef CHECK_NMSTRT_CASE |
|
1778 #undef CHECK_NMSTRT_CASES |
|
1779 |