|
1 /**************************************************************************** |
|
2 ** |
|
3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). |
|
4 ** All rights reserved. |
|
5 ** Contact: Nokia Corporation (qt-info@nokia.com) |
|
6 ** |
|
7 ** This file is part of the QtXmlPatterns module of the Qt Toolkit. |
|
8 ** |
|
9 ** $QT_BEGIN_LICENSE:LGPL$ |
|
10 ** No Commercial Usage |
|
11 ** This file contains pre-release code and may not be distributed. |
|
12 ** You may use this file in accordance with the terms and conditions |
|
13 ** contained in the Technology Preview License Agreement accompanying |
|
14 ** this package. |
|
15 ** |
|
16 ** GNU Lesser General Public License Usage |
|
17 ** Alternatively, this file may be used under the terms of the GNU Lesser |
|
18 ** General Public License version 2.1 as published by the Free Software |
|
19 ** Foundation and appearing in the file LICENSE.LGPL included in the |
|
20 ** packaging of this file. Please review the following information to |
|
21 ** ensure the GNU Lesser General Public License version 2.1 requirements |
|
22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
|
23 ** |
|
24 ** In addition, as a special exception, Nokia gives you certain additional |
|
25 ** rights. These rights are described in the Nokia Qt LGPL Exception |
|
26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
|
27 ** |
|
28 ** If you have questions regarding the use of this file, please contact |
|
29 ** Nokia at qt-info@nokia.com. |
|
30 ** |
|
31 ** |
|
32 ** |
|
33 ** |
|
34 ** |
|
35 ** |
|
36 ** |
|
37 ** |
|
38 ** $QT_END_LICENSE$ |
|
39 ** |
|
40 ****************************************************************************/ |
|
41 |
|
42 #include <QByteArray> |
|
43 |
|
44 #include "qquerytransformparser_p.h" |
|
45 |
|
46 #include "qxquerytokenizer_p.h" |
|
47 |
|
48 #include "qtokenlookup.cpp" |
|
49 |
|
50 QT_BEGIN_NAMESPACE |
|
51 |
|
52 namespace QPatternist |
|
53 { |
|
54 |
|
55 #define handleWhitespace() \ |
|
56 { \ |
|
57 const TokenType t = consumeWhitespace(); \ |
|
58 if(t != SUCCESS) \ |
|
59 return Token(t); \ |
|
60 } |
|
61 |
|
62 XQueryTokenizer::XQueryTokenizer(const QString &query, |
|
63 const QUrl &location, |
|
64 const State startingState) : Tokenizer(location) |
|
65 , m_data(query) |
|
66 , m_length(query.length()) |
|
67 , m_state(startingState) |
|
68 , m_pos(0) |
|
69 , m_line(1) |
|
70 , m_columnOffset(0) |
|
71 , m_scanOnly(false) |
|
72 { |
|
73 Q_ASSERT(location.isValid() || location.isEmpty()); |
|
74 } |
|
75 |
|
76 const QChar XQueryTokenizer::current() const |
|
77 { |
|
78 if(m_pos < m_length) |
|
79 return m_data.at(m_pos); |
|
80 else |
|
81 return QChar(); |
|
82 } |
|
83 |
|
84 char XQueryTokenizer::peekCurrent() const |
|
85 { |
|
86 return current().toAscii(); |
|
87 } |
|
88 |
|
89 int XQueryTokenizer::peekForColonColon() const |
|
90 { |
|
91 /* Note, we don't modify m_pos in this function, so we need to do offset |
|
92 * calculations. */ |
|
93 int pos = m_pos; |
|
94 |
|
95 while(pos < m_length) |
|
96 { |
|
97 switch(m_data.at(pos).toAscii()) |
|
98 { |
|
99 /* Fallthrough these four. */ |
|
100 case ' ': |
|
101 case '\t': |
|
102 case '\n': |
|
103 case '\r': |
|
104 break; |
|
105 case ':': |
|
106 { |
|
107 if(peekAhead((pos - m_pos) + 1) == ':') |
|
108 return pos - m_pos; |
|
109 /* Fallthrough. */ |
|
110 } |
|
111 default: |
|
112 return -1; |
|
113 } |
|
114 ++pos; |
|
115 } |
|
116 |
|
117 return -1; |
|
118 } |
|
119 |
|
120 Tokenizer::Token XQueryTokenizer::tokenAndChangeState(const TokenType code, |
|
121 const State s, |
|
122 const int advance) |
|
123 { |
|
124 Q_ASSERT(advance >= 0); |
|
125 m_pos += advance; |
|
126 setState(s); |
|
127 return Token(code); |
|
128 } |
|
129 |
|
130 Tokenizer::Token XQueryTokenizer::tokenAndChangeState(const TokenType code, |
|
131 const QString &value, |
|
132 const State s) |
|
133 { |
|
134 setState(s); |
|
135 return Token(code, value); |
|
136 } |
|
137 |
|
138 Tokenizer::Token XQueryTokenizer::tokenAndAdvance(const TokenType code, |
|
139 const int advance) |
|
140 { |
|
141 Q_ASSERT(advance >= 0); |
|
142 m_pos += advance; |
|
143 return Token(code); |
|
144 } |
|
145 |
|
146 QString XQueryTokenizer::normalizeEOL(const QString &input, |
|
147 const CharacterSkips &characterSkips) |
|
148 { |
|
149 const int len = input.count(); |
|
150 QString result; |
|
151 |
|
152 /* The likely hood is rather high it'll be the same content. */ |
|
153 result.reserve(len); |
|
154 |
|
155 for(int i = 0; i < len; ++i) |
|
156 { |
|
157 const QChar &at = input.at(i); |
|
158 |
|
159 if(characterSkips.contains(i)) |
|
160 { |
|
161 result.append(at); |
|
162 continue; |
|
163 } |
|
164 switch(input.at(i).unicode()) |
|
165 { |
|
166 case '\r': |
|
167 { |
|
168 if(i + 1 < len && input.at(i + 1) == QLatin1Char('\n')) |
|
169 ++i; |
|
170 |
|
171 /* Else, fallthrough. */ |
|
172 } |
|
173 case '\n': |
|
174 { |
|
175 result.append(QLatin1Char('\n')); |
|
176 continue; |
|
177 } |
|
178 default: |
|
179 { |
|
180 result.append(at); |
|
181 } |
|
182 } |
|
183 } |
|
184 |
|
185 return result; |
|
186 } |
|
187 |
|
188 Tokenizer::TokenType XQueryTokenizer::consumeComment() |
|
189 { |
|
190 /* Below, we return ERROR instead of END_OF_FILE such that the parser |
|
191 * sees an invalid comment. */ |
|
192 while(m_pos < m_length) |
|
193 { |
|
194 switch(peekCurrent()) |
|
195 { |
|
196 case ':': |
|
197 { |
|
198 ++m_pos; /* Consume ':' */ |
|
199 if(atEnd()) |
|
200 return ERROR; |
|
201 |
|
202 if(peekCurrent() == ')') |
|
203 { |
|
204 ++m_pos; /* Consume ')' */ |
|
205 return SUCCESS; /* The comment closed nicely. */ |
|
206 } |
|
207 continue; /* We don't want to increment m_pos twice. */ |
|
208 } |
|
209 case '(': |
|
210 { /* It looks like the start of a comment. */ |
|
211 ++m_pos; |
|
212 |
|
213 if(atEnd()) |
|
214 return END_OF_FILE; |
|
215 else if(peekCurrent() == ':') |
|
216 { |
|
217 /* And it is a nested comment -- parse it. */ |
|
218 const TokenType retval = consumeComment(); |
|
219 if(retval == SUCCESS) |
|
220 continue; /* Continue with our "own" comment. */ |
|
221 else |
|
222 return retval; /* Return the error in the nested comment. */ |
|
223 } |
|
224 break; |
|
225 } |
|
226 case '\n': |
|
227 /* Fallthrough. */ |
|
228 case '\r': |
|
229 { |
|
230 /* We want to count \r\n as a single line break. */ |
|
231 if(peekAhead() == '\n') |
|
232 ++m_pos; |
|
233 |
|
234 m_columnOffset = m_pos; |
|
235 ++m_line; |
|
236 |
|
237 break; |
|
238 } |
|
239 } |
|
240 ++m_pos; |
|
241 } |
|
242 |
|
243 return ERROR; /* Error: we reached the end while inside a comment. */ |
|
244 } |
|
245 |
|
246 bool XQueryTokenizer::consumeRawWhitespace() |
|
247 { |
|
248 while(m_pos < m_length) |
|
249 { |
|
250 switch(peekCurrent()) |
|
251 { |
|
252 case ' ': |
|
253 case '\t': |
|
254 break; |
|
255 case '\n': |
|
256 case '\r': |
|
257 { |
|
258 if(peekAhead() == '\n') |
|
259 ++m_pos; |
|
260 |
|
261 m_columnOffset = m_pos; |
|
262 ++m_line; |
|
263 |
|
264 break; |
|
265 } |
|
266 default: |
|
267 return false; |
|
268 } |
|
269 ++m_pos; |
|
270 } |
|
271 return true; |
|
272 } |
|
273 |
|
274 Tokenizer::TokenType XQueryTokenizer::consumeWhitespace() |
|
275 { |
|
276 while(m_pos < m_length) |
|
277 { |
|
278 switch(peekCurrent()) |
|
279 { |
|
280 case ' ': |
|
281 case '\t': |
|
282 break; |
|
283 case '\n': |
|
284 case '\r': |
|
285 { |
|
286 /* We want to count \r\n as a single line break. */ |
|
287 if(peekAhead() == '\n') |
|
288 ++m_pos; |
|
289 |
|
290 m_columnOffset = m_pos; |
|
291 ++m_line; |
|
292 |
|
293 break; |
|
294 } |
|
295 case '(': |
|
296 { |
|
297 if(peekAhead() == ':') |
|
298 { |
|
299 m_pos += 2; /* Consume "(:" */ |
|
300 |
|
301 const TokenType comment = consumeComment(); |
|
302 if(comment == SUCCESS) |
|
303 continue; |
|
304 else |
|
305 return comment; |
|
306 } |
|
307 } |
|
308 default: |
|
309 return SUCCESS; |
|
310 } |
|
311 ++m_pos; |
|
312 } |
|
313 |
|
314 return END_OF_FILE; |
|
315 } |
|
316 |
|
317 char XQueryTokenizer::peekAhead(const int length) const |
|
318 { |
|
319 if(m_pos + length < m_length) |
|
320 return m_data.at(m_pos + length).toAscii(); |
|
321 else |
|
322 return 0; |
|
323 } |
|
324 |
|
325 Tokenizer::Token XQueryTokenizer::error() |
|
326 { |
|
327 return Token(ERROR); |
|
328 } |
|
329 |
|
330 bool XQueryTokenizer::isDigit(const char ch) |
|
331 { |
|
332 return ch >= '0' && ch <= '9'; |
|
333 } |
|
334 |
|
335 /* Replace with function in QXmlUtils. Write test cases for this. */ |
|
336 bool XQueryTokenizer::isNCNameStart(const QChar ch) |
|
337 { |
|
338 if(ch == QLatin1Char('_')) |
|
339 return true; |
|
340 |
|
341 switch(ch.category()) |
|
342 { |
|
343 case QChar::Letter_Lowercase: |
|
344 case QChar::Letter_Uppercase: |
|
345 case QChar::Letter_Other: |
|
346 case QChar::Letter_Titlecase: |
|
347 case QChar::Number_Letter: |
|
348 return true; |
|
349 default: |
|
350 return false; |
|
351 } |
|
352 } |
|
353 |
|
354 bool XQueryTokenizer::isNCNameBody(const QChar ch) |
|
355 { |
|
356 switch(ch.unicode()) |
|
357 { |
|
358 case '.': |
|
359 case '_': |
|
360 case '-': |
|
361 return true; |
|
362 } |
|
363 |
|
364 switch(ch.category()) |
|
365 { |
|
366 case QChar::Letter_Lowercase: |
|
367 case QChar::Letter_Uppercase: |
|
368 case QChar::Letter_Other: |
|
369 case QChar::Letter_Titlecase: |
|
370 case QChar::Number_Letter: |
|
371 case QChar::Mark_SpacingCombining: |
|
372 case QChar::Mark_Enclosing: |
|
373 case QChar::Mark_NonSpacing: |
|
374 case QChar::Letter_Modifier: |
|
375 case QChar::Number_DecimalDigit: |
|
376 return true; |
|
377 default: |
|
378 return false; |
|
379 } |
|
380 } |
|
381 |
|
382 bool XQueryTokenizer::isPhraseKeyword(const TokenType code) |
|
383 { |
|
384 switch(code) |
|
385 { |
|
386 /* Fallthrough all these. */ |
|
387 case CASTABLE: |
|
388 case CAST: |
|
389 case COPY_NAMESPACES: |
|
390 case DECLARE: |
|
391 case EMPTY: |
|
392 case MODULE: |
|
393 case IMPORT: |
|
394 case INSTANCE: |
|
395 case ORDER: |
|
396 case ORDERING: |
|
397 case XQUERY: |
|
398 case STABLE: |
|
399 case TREAT: |
|
400 return true; |
|
401 default: |
|
402 return false; |
|
403 } |
|
404 } |
|
405 |
|
406 bool XQueryTokenizer::isOperatorKeyword(const TokenType code) |
|
407 { |
|
408 switch(code) |
|
409 { |
|
410 /* Fallthrough all these. */ |
|
411 case AS: |
|
412 case ASCENDING: |
|
413 case AT: |
|
414 case CASE: |
|
415 case CAST: |
|
416 case CASTABLE: |
|
417 case EQ: |
|
418 case EXTERNAL: |
|
419 case GE: |
|
420 case G_EQ: |
|
421 case G_GT: |
|
422 case G_LT: |
|
423 case G_NE: |
|
424 case GT: |
|
425 case IN: |
|
426 case INHERIT: |
|
427 case INSTANCE: |
|
428 case IS: |
|
429 case ITEM: |
|
430 case LE: |
|
431 case LT: |
|
432 case NE: |
|
433 case NO_INHERIT: |
|
434 case NO_PRESERVE: |
|
435 case OF: |
|
436 case PRESERVE: |
|
437 case RETURN: |
|
438 case STABLE: |
|
439 case TO: |
|
440 case TREAT: |
|
441 return true; |
|
442 default: |
|
443 return false; |
|
444 }; |
|
445 } |
|
446 |
|
447 bool XQueryTokenizer::isTypeToken(const TokenType t) |
|
448 { |
|
449 switch(t) |
|
450 { |
|
451 /* Fallthrough all these. */ |
|
452 case ATTRIBUTE: |
|
453 case COMMENT: |
|
454 case DOCUMENT: |
|
455 case DOCUMENT_NODE: |
|
456 case ELEMENT: |
|
457 case ITEM: |
|
458 case NODE: |
|
459 case PROCESSING_INSTRUCTION: |
|
460 case SCHEMA_ATTRIBUTE: |
|
461 case SCHEMA_ELEMENT: |
|
462 case TEXT: |
|
463 return true; |
|
464 default: |
|
465 return false; |
|
466 } |
|
467 } |
|
468 |
|
469 Tokenizer::Token XQueryTokenizer::tokenizeNCNameOrQName() |
|
470 { |
|
471 const int start = m_pos; |
|
472 |
|
473 const Token t1 = tokenizeNCName(); |
|
474 if(t1.hasError()) |
|
475 return t1; |
|
476 |
|
477 if(peekCurrent() != ':' || peekAhead() == '=') |
|
478 return t1; |
|
479 |
|
480 ++m_pos; |
|
481 |
|
482 const Token t2 = tokenizeNCName(); |
|
483 if(t2.hasError()) |
|
484 return t2; |
|
485 else |
|
486 return Token(QNAME, m_data.mid(start, m_pos - start)); |
|
487 } |
|
488 |
|
489 Tokenizer::Token XQueryTokenizer::tokenizeNumberLiteral() |
|
490 { |
|
491 setState(Operator); |
|
492 const int startPos = m_pos; |
|
493 bool hasDot = false; |
|
494 bool isXPath20 = false; |
|
495 |
|
496 for(; m_pos < m_length; ++m_pos) |
|
497 { |
|
498 QChar ch(current()); |
|
499 |
|
500 char cell = ch.cell(); |
|
501 |
|
502 if(cell == 'e' || cell == 'E') |
|
503 { |
|
504 isXPath20 = true; |
|
505 ++m_pos; |
|
506 ch = current(); |
|
507 |
|
508 if(ch.row() != 0) |
|
509 break; |
|
510 |
|
511 cell = ch.cell(); |
|
512 |
|
513 if(cell == '+' || cell == '-') |
|
514 continue; |
|
515 } |
|
516 |
|
517 if(isNCNameStart(ch)) |
|
518 return error(); |
|
519 |
|
520 if(cell < '0' || cell > '9') |
|
521 { |
|
522 if(cell == '.' && !hasDot) |
|
523 hasDot = true; |
|
524 else |
|
525 break; |
|
526 } |
|
527 } |
|
528 |
|
529 return Token(isXPath20 ? XPATH2_NUMBER : NUMBER, m_data.mid(startPos, m_pos - startPos)); |
|
530 } |
|
531 |
|
532 QString XQueryTokenizer::tokenizeCharacterReference() |
|
533 { |
|
534 Q_ASSERT(peekCurrent() == '&'); |
|
535 |
|
536 const int theEnd = m_data.indexOf(QLatin1Char(';'), m_pos + 1); |
|
537 |
|
538 if(theEnd == -1) /* No ';' found, a syntax error. i18n. */ |
|
539 return QString(); |
|
540 |
|
541 QString content(m_data.mid(m_pos + 1, (theEnd - m_pos) - 1)); |
|
542 m_pos = theEnd; |
|
543 |
|
544 const QChar charRef(charForReference(content)); |
|
545 |
|
546 if(!charRef.isNull()) |
|
547 return charRef; |
|
548 else if(content.startsWith(QLatin1Char('#'))) |
|
549 { |
|
550 int base; |
|
551 |
|
552 /* It is only '#' or '#x'. */ |
|
553 if(content.length() < 2) |
|
554 return QString(); |
|
555 |
|
556 /* We got a hex number if it starts with 'x', otherwise it's a decimal. */ |
|
557 if(content.at(1) == QLatin1Char('x')) |
|
558 { |
|
559 base = 16; |
|
560 content = content.mid(2); /* Remove "#x". */ |
|
561 } |
|
562 else |
|
563 { |
|
564 base = 10; |
|
565 content = content.mid(1); /* Remove "#". */ |
|
566 } |
|
567 |
|
568 bool conversionOK = false; |
|
569 const int codepoint = content.toInt(&conversionOK, base); |
|
570 |
|
571 if(conversionOK) |
|
572 { |
|
573 const QChar ch(codepoint); |
|
574 |
|
575 if(ch.isNull()) |
|
576 { |
|
577 /* We likely have something which require surrogate pairs. */ |
|
578 QString result; |
|
579 result += QChar(QChar::highSurrogate(codepoint)); |
|
580 result += QChar(QChar::lowSurrogate(codepoint)); |
|
581 return result; |
|
582 } |
|
583 else |
|
584 return ch; |
|
585 } |
|
586 else |
|
587 return QString(); |
|
588 } |
|
589 else |
|
590 return QString(); |
|
591 } |
|
592 |
|
593 int XQueryTokenizer::scanUntil(const char *const content) |
|
594 { |
|
595 const int end = m_data.indexOf(QString::fromLatin1(content), m_pos); |
|
596 |
|
597 if(end == -1) |
|
598 return -1; |
|
599 else |
|
600 { |
|
601 const int len = end - m_pos; |
|
602 m_pos += len; |
|
603 return len; |
|
604 } |
|
605 } |
|
606 |
|
607 QChar XQueryTokenizer::charForReference(const QString &reference) |
|
608 { |
|
609 if(m_charRefs.isEmpty()) |
|
610 { |
|
611 /* Initialize. */ |
|
612 m_charRefs.reserve(5); |
|
613 m_charRefs.insert(QLatin1String("lt"), QLatin1Char('<')); |
|
614 m_charRefs.insert(QLatin1String("gt"), QLatin1Char('>')); |
|
615 m_charRefs.insert(QLatin1String("amp"), QLatin1Char('&')); |
|
616 m_charRefs.insert(QLatin1String("quot"), QLatin1Char('"')); |
|
617 m_charRefs.insert(QLatin1String("apos"), QLatin1Char('\'')); |
|
618 } |
|
619 |
|
620 return m_charRefs.value(reference); |
|
621 } |
|
622 |
|
623 Tokenizer::Token XQueryTokenizer::tokenizeStringLiteral() |
|
624 { |
|
625 const QChar delimiter(current()); |
|
626 /* We cannot unfortunately just scan and then do mid(), |
|
627 * since we can encounter character references. */ |
|
628 QString result; |
|
629 |
|
630 /* This is more likely than QString's default allocation. */ |
|
631 result.reserve(8); |
|
632 |
|
633 CharacterSkips skipEOLNormalization; |
|
634 |
|
635 /* Advance over the initial quote character. */ |
|
636 ++m_pos; |
|
637 |
|
638 for(; m_pos < m_length; ++m_pos) |
|
639 { |
|
640 const QChar c(current()); |
|
641 |
|
642 if(c == QLatin1Char('&')) |
|
643 { |
|
644 const QString charRef(tokenizeCharacterReference()); |
|
645 |
|
646 if(charRef.isNull()) |
|
647 return error(); |
|
648 else |
|
649 { |
|
650 skipEOLNormalization.insert(result.count()); |
|
651 result.append(charRef); |
|
652 } |
|
653 |
|
654 } |
|
655 else if(c == delimiter) |
|
656 { |
|
657 /* Maybe the escaping mechanism is used. For instance, "s""s" |
|
658 * has the value `s"s'. */ |
|
659 ++m_pos; |
|
660 |
|
661 if(current() == delimiter) /* Double quote. */ |
|
662 result += delimiter; |
|
663 else |
|
664 return Token(STRING_LITERAL, normalizeEOL(result, skipEOLNormalization)); |
|
665 } |
|
666 else |
|
667 result += c; |
|
668 } |
|
669 |
|
670 return error(); |
|
671 } |
|
672 |
|
673 Tokenizer::Token XQueryTokenizer::tokenizeNCName() |
|
674 { |
|
675 const int startPos = m_pos; |
|
676 |
|
677 if(m_pos < m_length && isNCNameStart(current())) |
|
678 { |
|
679 ++m_pos; |
|
680 |
|
681 for(; m_pos < m_length; ++m_pos) |
|
682 { |
|
683 if(!isNCNameBody(current())) |
|
684 break; |
|
685 } |
|
686 |
|
687 return Token(NCNAME, m_data.mid(startPos, m_pos - startPos)); |
|
688 } |
|
689 else |
|
690 return error(); |
|
691 } |
|
692 |
|
693 bool XQueryTokenizer::aheadEquals(const char *const chs, |
|
694 const int len, |
|
695 const int offset) const |
|
696 { |
|
697 Q_ASSERT(len > 0); |
|
698 Q_ASSERT(qstrlen(chs) == uint(len)); |
|
699 |
|
700 if(m_pos + len >= m_length) |
|
701 return false; |
|
702 |
|
703 for(int i = offset; i < (len + offset); ++i) |
|
704 { |
|
705 if(m_data.at(m_pos + i).toAscii() != chs[i - offset]) |
|
706 return false; |
|
707 } |
|
708 |
|
709 return true; |
|
710 } |
|
711 |
|
712 const TokenMap *XQueryTokenizer::lookupKeyword(const QString &keyword) |
|
713 { |
|
714 return TokenLookup::value(keyword.toAscii().constData(), keyword.length()); |
|
715 } |
|
716 |
|
717 XQueryTokenizer::State XQueryTokenizer::state() const |
|
718 { |
|
719 return m_state; |
|
720 } |
|
721 |
|
722 void XQueryTokenizer::setState(const State s) |
|
723 { |
|
724 m_state = s; |
|
725 } |
|
726 |
|
727 void XQueryTokenizer::pushState(const State s) |
|
728 { |
|
729 m_stateStack.push(s); |
|
730 } |
|
731 |
|
732 void XQueryTokenizer::pushState() |
|
733 { |
|
734 m_stateStack.push(m_state); |
|
735 } |
|
736 |
|
737 void XQueryTokenizer::popState() |
|
738 { |
|
739 /* QStack::pop() asserts if it's empty, so we need to check |
|
740 * it, since we might receive unbalanced curlies. */ |
|
741 if(!m_stateStack.isEmpty()) |
|
742 m_state = m_stateStack.pop(); |
|
743 } |
|
744 |
|
745 Tokenizer::Token XQueryTokenizer::nextToken() |
|
746 { |
|
747 switch(state()) |
|
748 { |
|
749 /* We want to skip or do special whitespace handling for these |
|
750 * states. So fallthrough all of the following. */ |
|
751 case AposAttributeContent: |
|
752 case Axis: |
|
753 case ElementContent: |
|
754 case EndTag: |
|
755 case Pragma: |
|
756 case PragmaContent: |
|
757 case ProcessingInstructionName: |
|
758 case QuotAttributeContent: |
|
759 case StartTag: |
|
760 case XMLComment: |
|
761 break; |
|
762 default: |
|
763 handleWhitespace(); |
|
764 } |
|
765 |
|
766 switch(state()) |
|
767 { |
|
768 case XMLSpaceDecl: |
|
769 /* Fallthrough. */ |
|
770 case NamespaceKeyword: |
|
771 { |
|
772 switch(peekCurrent()) |
|
773 { |
|
774 case ',': |
|
775 return tokenAndAdvance(COMMA); |
|
776 case '"': |
|
777 /* Fallthrough. */ |
|
778 case '\'': |
|
779 { |
|
780 setState(NamespaceDecl); |
|
781 return tokenizeStringLiteral(); |
|
782 } |
|
783 } |
|
784 |
|
785 const Token id(tokenizeNCName()); |
|
786 |
|
787 if(id.type != NCNAME) |
|
788 return id; |
|
789 |
|
790 const TokenMap *const keyword = lookupKeyword(id.value); |
|
791 if(keyword) |
|
792 { |
|
793 switch(keyword->token) |
|
794 { |
|
795 case INHERIT: |
|
796 /* Fallthrough. */ |
|
797 case NO_INHERIT: |
|
798 { |
|
799 setState(Default); |
|
800 break; |
|
801 } |
|
802 case NAMESPACE: |
|
803 { |
|
804 setState(NamespaceDecl); |
|
805 break; |
|
806 } |
|
807 case ORDERED: |
|
808 /* Fallthrough. */ |
|
809 case UNORDERED: |
|
810 /* Fallthrough. */ |
|
811 case STRIP: |
|
812 { |
|
813 setState(Default); |
|
814 break; |
|
815 } |
|
816 case PRESERVE: |
|
817 { |
|
818 if(state() != NamespaceKeyword) |
|
819 setState(Default); |
|
820 } |
|
821 default: |
|
822 break; |
|
823 } |
|
824 |
|
825 return Token(keyword->token); |
|
826 } |
|
827 else |
|
828 return id; |
|
829 |
|
830 Q_ASSERT(false); |
|
831 } |
|
832 case NamespaceDecl: |
|
833 { |
|
834 switch(peekCurrent()) |
|
835 { |
|
836 case '=': |
|
837 return tokenAndAdvance(G_EQ); |
|
838 case ';': |
|
839 return tokenAndChangeState(SEMI_COLON, Default); |
|
840 case '\'': |
|
841 /* Fallthrough. */ |
|
842 case '\"': |
|
843 return tokenizeStringLiteral(); |
|
844 } |
|
845 |
|
846 const Token nc(tokenizeNCName()); |
|
847 |
|
848 handleWhitespace(); |
|
849 |
|
850 const char pc = peekCurrent(); |
|
851 const TokenMap* const t = lookupKeyword(nc.value); |
|
852 |
|
853 if(pc == '\'' || (pc == '"' && t)) |
|
854 return tokenAndChangeState(t->token, Default, 0); |
|
855 else |
|
856 return nc; |
|
857 |
|
858 Q_ASSERT(false); |
|
859 } |
|
860 case Axis: |
|
861 { |
|
862 if(peekCurrent() == ':') |
|
863 { |
|
864 Q_ASSERT(peekAhead() == ':'); |
|
865 m_pos += 2; |
|
866 setState(AfterAxisSeparator); |
|
867 return Token(COLONCOLON); |
|
868 } |
|
869 /* Fallthrough. */ |
|
870 } |
|
871 case AfterAxisSeparator: |
|
872 /* Fallthrough. */ |
|
873 case Default: |
|
874 /* State Operator and state Default have a lot of tokens in common except |
|
875 * for minor differences. So we treat them the same way, and sprinkles logic |
|
876 * here and there to handle the small differences. */ |
|
877 /* Fallthrough. */ |
|
878 case Operator: |
|
879 { |
|
880 switch(peekCurrent()) |
|
881 { |
|
882 case '=': |
|
883 return tokenAndChangeState(G_EQ, Default); |
|
884 case '-': |
|
885 return tokenAndChangeState(MINUS, Default); |
|
886 case '+': |
|
887 return tokenAndChangeState(PLUS, Default); |
|
888 case '[': |
|
889 return tokenAndChangeState(LBRACKET, Default); |
|
890 case ']': |
|
891 return tokenAndChangeState(RBRACKET, Operator); |
|
892 case ',': |
|
893 return tokenAndChangeState(COMMA, Default); |
|
894 case ';': |
|
895 return tokenAndChangeState(SEMI_COLON, Default); |
|
896 case '$': |
|
897 return tokenAndChangeState(DOLLAR, VarName); |
|
898 case '|': |
|
899 return tokenAndChangeState(BAR, Default); |
|
900 case '?': |
|
901 return tokenAndChangeState(QUESTION, Operator); |
|
902 case ')': |
|
903 return tokenAndChangeState(RPAREN, Operator); |
|
904 case '@': |
|
905 return tokenAndChangeState(AT_SIGN, Default); |
|
906 /* Fallthrough all these. */ |
|
907 case '1': |
|
908 case '2': |
|
909 case '3': |
|
910 case '4': |
|
911 case '5': |
|
912 case '6': |
|
913 case '7': |
|
914 case '8': |
|
915 case '9': |
|
916 case '0': |
|
917 return tokenizeNumberLiteral(); |
|
918 case '.': |
|
919 { |
|
920 const char next = peekAhead(); |
|
921 if(next == '.') |
|
922 return tokenAndChangeState(DOTDOT, Operator, 2); |
|
923 /* .5 is allowed, as short form for 0.5: |
|
924 * <tt>[142] DecimalLiteral ::= ("." Digits) | (Digits "." [0-9]*)</tt> |
|
925 */ |
|
926 else if(isDigit(next)) |
|
927 return tokenizeNumberLiteral(); |
|
928 else |
|
929 return tokenAndChangeState(DOT, Operator); |
|
930 } |
|
931 case '\'': |
|
932 /* Fallthrough. */ |
|
933 case '"': |
|
934 { |
|
935 setState(Operator); |
|
936 return tokenizeStringLiteral(); |
|
937 |
|
938 } |
|
939 case '(': |
|
940 { |
|
941 if(peekAhead() == '#') |
|
942 return tokenAndChangeState(PRAGMA_START, Pragma, 2); |
|
943 else |
|
944 return tokenAndChangeState(LPAREN, Default); |
|
945 } |
|
946 case '*': |
|
947 { |
|
948 if(peekAhead() == ':') |
|
949 { |
|
950 m_pos += 2; /* Consume *:. */ |
|
951 const Token nc = tokenizeNCName(); |
|
952 |
|
953 if(nc.hasError()) |
|
954 return error(); |
|
955 else |
|
956 return tokenAndChangeState(ANY_PREFIX, nc.value, Operator); |
|
957 } |
|
958 else |
|
959 return tokenAndChangeState(STAR, state() == Default ? Operator : Default); |
|
960 } |
|
961 case ':': |
|
962 { |
|
963 switch(peekAhead()) |
|
964 { |
|
965 case '=': |
|
966 return tokenAndChangeState(ASSIGN, Default, 2); |
|
967 case ':': |
|
968 return tokenAndChangeState(COLONCOLON, Default, 2); |
|
969 default: |
|
970 return error(); |
|
971 } |
|
972 } |
|
973 case '!': |
|
974 { |
|
975 if(peekAhead() == '=') |
|
976 return tokenAndChangeState(G_NE, Default, 2); |
|
977 else |
|
978 return error(); |
|
979 } |
|
980 case '<': |
|
981 { |
|
982 switch(peekAhead()) |
|
983 { |
|
984 case '=': |
|
985 return tokenAndChangeState(G_LE, Default, 2); |
|
986 case '<': |
|
987 return tokenAndChangeState(PRECEDES, Default, 2); |
|
988 case '?': |
|
989 { |
|
990 pushState(Operator); |
|
991 return tokenAndChangeState(PI_START, ProcessingInstructionName, 2); |
|
992 } |
|
993 case '!': |
|
994 { |
|
995 if(aheadEquals("!--", 3)) |
|
996 { |
|
997 m_pos += 3; /* Consume "!--". */ |
|
998 pushState(Operator); |
|
999 return tokenAndChangeState(COMMENT_START, XMLComment); |
|
1000 } |
|
1001 /* Fallthrough. It's a syntax error, and this is a good way to report it. */ |
|
1002 } |
|
1003 default: |
|
1004 { |
|
1005 if((m_pos + 1) < m_length && isNCNameStart(m_data.at(m_pos + 1))) |
|
1006 { |
|
1007 /* We assume it's an element constructor. */ |
|
1008 pushState(Operator); |
|
1009 } |
|
1010 |
|
1011 return tokenAndChangeState(G_LT, state() == Operator ? Default : StartTag); |
|
1012 } |
|
1013 } |
|
1014 } |
|
1015 case '>': |
|
1016 { |
|
1017 switch(peekAhead()) |
|
1018 { |
|
1019 case '=': |
|
1020 return tokenAndChangeState(G_GE, Default, 2); |
|
1021 case '>': |
|
1022 return tokenAndChangeState(FOLLOWS, Default, 2); |
|
1023 default: |
|
1024 return tokenAndChangeState(G_GT, Default); |
|
1025 } |
|
1026 } |
|
1027 case '/': |
|
1028 { |
|
1029 if(peekAhead() == '/') |
|
1030 return tokenAndChangeState(SLASHSLASH, Default, 2); |
|
1031 else |
|
1032 return tokenAndChangeState(SLASH, Default); |
|
1033 } |
|
1034 case '{': |
|
1035 { |
|
1036 pushState(Operator); |
|
1037 return tokenAndChangeState(CURLY_LBRACE, Default); |
|
1038 } |
|
1039 case '}': |
|
1040 { |
|
1041 popState(); |
|
1042 |
|
1043 return tokenAndAdvance(CURLY_RBRACE); |
|
1044 } |
|
1045 } |
|
1046 |
|
1047 /* Ok. We're in state Default or Operator, and it wasn't a simple |
|
1048 * character. */ |
|
1049 |
|
1050 const Token id(tokenizeNCName()); |
|
1051 |
|
1052 if(id.type != NCNAME) |
|
1053 return id; |
|
1054 |
|
1055 const TokenMap *const keyword = lookupKeyword(id.value); |
|
1056 |
|
1057 if(state() == Operator) |
|
1058 { |
|
1059 if(keyword) |
|
1060 { |
|
1061 if(keyword->token == DEFAULT || keyword->token == ASCENDING || keyword->token == DESCENDING) |
|
1062 setState(Operator); |
|
1063 else if(keyword->token == RETURN) |
|
1064 setState(Default); |
|
1065 else if(isPhraseKeyword(keyword->token)) |
|
1066 { |
|
1067 const TokenType ws = consumeWhitespace(); |
|
1068 if(ws == ERROR) |
|
1069 return error(); |
|
1070 |
|
1071 const Token id2(tokenizeNCName()); |
|
1072 const TokenMap *const keyword2 = lookupKeyword(id2.value); |
|
1073 |
|
1074 if(keyword2) |
|
1075 { |
|
1076 if(keyword->token == TREAT && keyword2->token == AS) |
|
1077 setState(ItemType); |
|
1078 else if (keyword->token == CAST || (keyword->token == CASTABLE && keyword2->token == AS) || keyword2->token == BY) |
|
1079 setState(Default); |
|
1080 |
|
1081 m_tokenStack.push(Token(keyword2->token)); |
|
1082 } |
|
1083 else |
|
1084 m_tokenStack.push(id2); |
|
1085 |
|
1086 return Token(keyword->token); |
|
1087 } |
|
1088 else |
|
1089 { |
|
1090 /* Such that we tokenize the second token in "empty greatest". */ |
|
1091 if(keyword->token != EMPTY) |
|
1092 setState(Default); |
|
1093 } |
|
1094 |
|
1095 if(keyword->token == AS || keyword->token == CASE) |
|
1096 setState(ItemType); |
|
1097 |
|
1098 return Token(keyword->token); |
|
1099 } |
|
1100 else |
|
1101 return id; |
|
1102 } |
|
1103 |
|
1104 Q_ASSERT(state() == Default || state() == Axis || state() == AfterAxisSeparator); |
|
1105 |
|
1106 /* |
|
1107 * This is hard. Consider this: |
|
1108 * |
|
1109 * Valid: child ::nameTest |
|
1110 * Valid: child:: nameTest |
|
1111 * Syntax Error: child :localName |
|
1112 * Syntax Error: child: localName |
|
1113 * |
|
1114 * Consider "child ::name". Right now, we're here: |
|
1115 * ^ |
|
1116 * We don't know whether "child" is a prefix and hence the whitespace is invalid, |
|
1117 * or whether it's an axis and hence skippable. */ |
|
1118 { |
|
1119 const int wsLength = peekForColonColon(); |
|
1120 /* We cannot call handleWhitespace() because it returns on |
|
1121 * END_OF_FILE, and we have parsed up keyword, and we need to |
|
1122 * deal with that. |
|
1123 * |
|
1124 * If we have a colon colon, which means the whitespace is |
|
1125 * allowed, we skip it. */ |
|
1126 if(wsLength != -1) |
|
1127 m_pos += wsLength; |
|
1128 } |
|
1129 |
|
1130 /* Handle name tests. */ |
|
1131 if(peekCurrent() == ':') |
|
1132 { |
|
1133 switch(peekAhead()) |
|
1134 { |
|
1135 case '=': |
|
1136 return id; |
|
1137 case '*': |
|
1138 { |
|
1139 m_pos += 2; |
|
1140 return tokenAndChangeState(ANY_LOCAL_NAME, id.value, Operator); |
|
1141 } |
|
1142 case ':': |
|
1143 { |
|
1144 /* We have an axis. */ |
|
1145 setState(Axis); |
|
1146 return keyword ? Token(keyword->token) : id; |
|
1147 } |
|
1148 default: |
|
1149 { |
|
1150 /* It's a QName. */ |
|
1151 ++m_pos; /* Consume the colon. */ |
|
1152 |
|
1153 const Token id2(tokenizeNCName()); |
|
1154 |
|
1155 if(id2.type != NCNAME) |
|
1156 { |
|
1157 --m_pos; |
|
1158 return id; |
|
1159 } |
|
1160 |
|
1161 setState(Operator); |
|
1162 const int qNameLen = id.value.length() + id2.value.length() + 1; |
|
1163 return Token(QNAME, m_data.mid(m_pos - qNameLen, qNameLen)); |
|
1164 } |
|
1165 } |
|
1166 } |
|
1167 |
|
1168 if(!keyword || isOperatorKeyword(keyword->token)) |
|
1169 { |
|
1170 setState(Operator); |
|
1171 return id; |
|
1172 } |
|
1173 |
|
1174 const TokenType ws = consumeWhitespace(); |
|
1175 if(ws == ERROR) // TODO this should test for success. Write test. |
|
1176 return Token(ERROR); |
|
1177 |
|
1178 if(atEnd()) |
|
1179 { |
|
1180 setState(Operator); |
|
1181 return id; |
|
1182 } |
|
1183 |
|
1184 /* Let the if-body apply for constructors, and node type tests. */ |
|
1185 if(isTypeToken(keyword->token) || |
|
1186 keyword->token == TYPESWITCH || |
|
1187 keyword->token == ORDERED || |
|
1188 keyword->token == UNORDERED || |
|
1189 keyword->token == IF) |
|
1190 { |
|
1191 switch(peekCurrent()) |
|
1192 { |
|
1193 case '(': |
|
1194 { |
|
1195 // TODO See if we can remove DOCUMENT from isTypeToken. |
|
1196 if(isTypeToken(keyword->token) && keyword->token != DOCUMENT) |
|
1197 { |
|
1198 m_tokenStack.push(Token(LPAREN)); |
|
1199 ++m_pos; /* Consume '('. */ |
|
1200 pushState(Operator); |
|
1201 |
|
1202 if(keyword->token == PROCESSING_INSTRUCTION) |
|
1203 setState(KindTestForPI); |
|
1204 else |
|
1205 setState(KindTest); |
|
1206 |
|
1207 return Token(keyword->token); |
|
1208 } |
|
1209 else if(keyword->token == TYPESWITCH || keyword->token == IF) |
|
1210 return Token(keyword->token); |
|
1211 else /* It's a function call. */ |
|
1212 return id; |
|
1213 } |
|
1214 case '{': |
|
1215 { |
|
1216 m_tokenStack.push(Token(CURLY_LBRACE)); |
|
1217 ++m_pos; /* Consume '{'. */ |
|
1218 pushState(Operator); |
|
1219 /* Stay in state Default. */ |
|
1220 return Token(keyword->token); |
|
1221 } |
|
1222 default: |
|
1223 { |
|
1224 /* We have read in a token which is for instance |
|
1225 * "return", and now it can be an element |
|
1226 * test("element") a node kind test("element()"), or a |
|
1227 * computed element constructor("element name {..."). |
|
1228 * We need to do a two-token lookahead here, because |
|
1229 * "element return" can be an element test followed by |
|
1230 * the return keyword, but it can also be an element |
|
1231 * constructor("element return {"). */ |
|
1232 if(isNCNameStart(current())) |
|
1233 { |
|
1234 const int currentPos = m_pos; |
|
1235 const Token token2 = tokenizeNCNameOrQName(); |
|
1236 |
|
1237 if(token2.hasError()) |
|
1238 return token2; |
|
1239 |
|
1240 handleWhitespace(); |
|
1241 |
|
1242 if(peekCurrent() == '{') |
|
1243 { |
|
1244 /* An element constructor. */ |
|
1245 m_tokenStack.push(token2); |
|
1246 return Token(keyword->token); |
|
1247 } |
|
1248 |
|
1249 /* We jump back in the stream, we need to tokenize token2 according |
|
1250 * to the state. */ |
|
1251 m_pos = currentPos; |
|
1252 setState(Operator); |
|
1253 return Token(NCNAME, QLatin1String(keyword->name)); |
|
1254 } |
|
1255 } |
|
1256 } |
|
1257 } |
|
1258 |
|
1259 if(peekCurrent() == '$') |
|
1260 { |
|
1261 setState(VarName); |
|
1262 return Token(keyword->token); |
|
1263 } |
|
1264 |
|
1265 /* It's not a node type, it's not the typeswitch expression, but it is a function callsite. */ |
|
1266 if(peekCurrent() == '(') |
|
1267 return id; |
|
1268 else if(peekCurrent() == '{' && keyword->token == VALIDATE) |
|
1269 return Token(keyword->token); |
|
1270 |
|
1271 if(!isNCNameStart(current())) |
|
1272 { |
|
1273 setState(Operator); |
|
1274 return id; |
|
1275 } |
|
1276 |
|
1277 const Token id2(tokenizeNCName()); |
|
1278 const TokenMap *const keyword2 = lookupKeyword(id2.value); |
|
1279 |
|
1280 if(!keyword2) |
|
1281 { |
|
1282 /* It's a syntax error. All cases of two subsequent ncnames are keywords(e.g, declarations). */ |
|
1283 setState(Operator); |
|
1284 return id; |
|
1285 } |
|
1286 |
|
1287 switch(keyword->token) |
|
1288 { |
|
1289 case DECLARE: |
|
1290 { |
|
1291 switch(keyword2->token) |
|
1292 { |
|
1293 case VARIABLE: |
|
1294 /* Fallthrough. */ |
|
1295 case FUNCTION: |
|
1296 { |
|
1297 m_tokenStack.push(Token(keyword2->token)); |
|
1298 setState(Default); |
|
1299 return Token(keyword->token); |
|
1300 } |
|
1301 case OPTION: |
|
1302 { |
|
1303 m_tokenStack.push(Token(keyword2->token)); |
|
1304 setState(Default); |
|
1305 return Token(keyword->token); |
|
1306 } |
|
1307 case COPY_NAMESPACES: |
|
1308 /* Fallthrough. */ |
|
1309 case ORDERING: |
|
1310 { |
|
1311 m_tokenStack.push(Token(keyword2->token)); |
|
1312 setState(NamespaceKeyword); |
|
1313 return Token(keyword->token); |
|
1314 } |
|
1315 case CONSTRUCTION: |
|
1316 { |
|
1317 // TODO identical to CONSTRUCTION? |
|
1318 m_tokenStack.push(Token(keyword2->token)); |
|
1319 setState(Operator); |
|
1320 return Token(keyword->token); |
|
1321 } |
|
1322 case NAMESPACE: |
|
1323 /* Fallthrough. */ |
|
1324 case BASEURI: |
|
1325 { |
|
1326 m_tokenStack.push(Token(keyword2->token)); |
|
1327 setState(NamespaceDecl); |
|
1328 return Token(keyword->token); |
|
1329 } |
|
1330 case BOUNDARY_SPACE: |
|
1331 { |
|
1332 m_tokenStack.push(Token(keyword2->token)); |
|
1333 setState(XMLSpaceDecl); |
|
1334 return Token(keyword->token); |
|
1335 } |
|
1336 case DEFAULT: |
|
1337 { |
|
1338 m_tokenStack.push(Token(keyword2->token)); |
|
1339 |
|
1340 const TokenType ws2 = consumeWhitespace(); |
|
1341 if(ws2 != SUCCESS) |
|
1342 { |
|
1343 m_tokenStack.prepend(Token(ws2)); |
|
1344 return Token(keyword->token); |
|
1345 } |
|
1346 |
|
1347 const Token id3(tokenizeNCName()); |
|
1348 |
|
1349 if(id3.type != NCNAME) |
|
1350 { |
|
1351 m_tokenStack.prepend(id3); |
|
1352 return Token(keyword->token); |
|
1353 } |
|
1354 |
|
1355 const TokenMap *const keyword3 = lookupKeyword(id3.value); |
|
1356 if(!keyword3) |
|
1357 { |
|
1358 m_tokenStack.prepend(id3); |
|
1359 return Token(keyword->token); |
|
1360 } |
|
1361 else |
|
1362 { |
|
1363 m_tokenStack.prepend(Token(keyword3->token)); |
|
1364 |
|
1365 if(keyword3->token == ORDER) |
|
1366 setState(Operator); |
|
1367 else |
|
1368 setState(NamespaceDecl); |
|
1369 } |
|
1370 |
|
1371 return Token(keyword->token); |
|
1372 } |
|
1373 default: |
|
1374 { |
|
1375 m_tokenStack.push(Token(keyword2->token)); |
|
1376 setState(Default); |
|
1377 return id; |
|
1378 } |
|
1379 } |
|
1380 } |
|
1381 case XQUERY: |
|
1382 { |
|
1383 m_tokenStack.push(Token(keyword2->token)); |
|
1384 |
|
1385 if(keyword2->token == VERSION) |
|
1386 { |
|
1387 setState(NamespaceDecl); |
|
1388 return Token(keyword->token); |
|
1389 } |
|
1390 else |
|
1391 { |
|
1392 setState(Operator); |
|
1393 return id; |
|
1394 } |
|
1395 } |
|
1396 case IMPORT: |
|
1397 { |
|
1398 m_tokenStack.push(Token(keyword2->token)); |
|
1399 |
|
1400 switch(keyword2->token) |
|
1401 { |
|
1402 case SCHEMA: |
|
1403 /* Fallthrough. */ |
|
1404 case MODULE: |
|
1405 { |
|
1406 setState(NamespaceKeyword); |
|
1407 return Token(keyword->token); |
|
1408 } |
|
1409 default: |
|
1410 { |
|
1411 setState(Operator); |
|
1412 return id; |
|
1413 } |
|
1414 } |
|
1415 } |
|
1416 case VALIDATE: |
|
1417 { |
|
1418 m_tokenStack.push(Token(keyword2->token)); |
|
1419 |
|
1420 switch(keyword2->token) |
|
1421 { |
|
1422 case LAX: |
|
1423 case STRICT: |
|
1424 { |
|
1425 pushState(Operator); |
|
1426 return Token(keyword->token); |
|
1427 } |
|
1428 default: |
|
1429 { |
|
1430 setState(Operator); |
|
1431 return id; |
|
1432 } |
|
1433 } |
|
1434 } |
|
1435 default: |
|
1436 { |
|
1437 m_tokenStack.push(Token(keyword2->token)); |
|
1438 setState(Operator); |
|
1439 return id; |
|
1440 } |
|
1441 } |
|
1442 |
|
1443 Q_ASSERT(false); |
|
1444 |
|
1445 } |
|
1446 case VarName: |
|
1447 { |
|
1448 if(peekCurrent() == '$') |
|
1449 return tokenAndAdvance(DOLLAR); |
|
1450 |
|
1451 setState(Operator); |
|
1452 return tokenizeNCNameOrQName(); |
|
1453 Q_ASSERT(false); |
|
1454 } |
|
1455 case ItemType: |
|
1456 { |
|
1457 switch(peekCurrent()) |
|
1458 { |
|
1459 case '(': |
|
1460 return tokenAndChangeState(LPAREN, KindTest); |
|
1461 case '$': |
|
1462 return tokenAndChangeState(DOLLAR, VarName); |
|
1463 } |
|
1464 |
|
1465 const Token name(tokenizeNCNameOrQName()); |
|
1466 |
|
1467 if(name.hasError()) |
|
1468 return error(); |
|
1469 |
|
1470 else if(name.type == QNAME) |
|
1471 { |
|
1472 setState(OccurrenceIndicator); |
|
1473 return name; |
|
1474 } |
|
1475 else |
|
1476 { |
|
1477 const TokenMap *const keyword = lookupKeyword(name.value); |
|
1478 |
|
1479 if(keyword) |
|
1480 { |
|
1481 pushState(OccurrenceIndicator); |
|
1482 return Token(keyword->token); |
|
1483 } |
|
1484 else |
|
1485 { |
|
1486 setState(Default); |
|
1487 return name; |
|
1488 } |
|
1489 } |
|
1490 Q_ASSERT(false); |
|
1491 } |
|
1492 case KindTest: |
|
1493 { |
|
1494 switch(peekCurrent()) |
|
1495 { |
|
1496 case ')': |
|
1497 { |
|
1498 popState(); |
|
1499 return tokenAndAdvance(RPAREN); |
|
1500 } |
|
1501 case '(': |
|
1502 return tokenAndAdvance(LPAREN); |
|
1503 case ',': |
|
1504 return tokenAndAdvance(COMMA); |
|
1505 case '*': |
|
1506 return tokenAndAdvance(STAR); |
|
1507 case '?': |
|
1508 return tokenAndAdvance(QUESTION); |
|
1509 case '\'': |
|
1510 /* Fallthrough. */ |
|
1511 case '"': |
|
1512 return tokenizeStringLiteral(); |
|
1513 } |
|
1514 |
|
1515 const Token nc(tokenizeNCNameOrQName()); |
|
1516 if(nc.hasError()) |
|
1517 return nc; |
|
1518 |
|
1519 const TokenType ws = consumeWhitespace(); |
|
1520 if(ws == ERROR) |
|
1521 return error(); |
|
1522 |
|
1523 if(peekCurrent() == '(') |
|
1524 { |
|
1525 const TokenMap *const keyword = lookupKeyword(nc.value); |
|
1526 if(keyword) |
|
1527 { |
|
1528 pushState(KindTest); |
|
1529 return Token(keyword->token); |
|
1530 } |
|
1531 else |
|
1532 return nc; |
|
1533 } |
|
1534 else |
|
1535 return nc; |
|
1536 Q_ASSERT(false); |
|
1537 } |
|
1538 case KindTestForPI: |
|
1539 { |
|
1540 switch(peekCurrent()) |
|
1541 { |
|
1542 case ')': |
|
1543 { |
|
1544 popState(); |
|
1545 return tokenAndAdvance(RPAREN); |
|
1546 } |
|
1547 case '\'': |
|
1548 /* Fallthrough. */ |
|
1549 case '"': |
|
1550 return tokenizeStringLiteral(); |
|
1551 default: |
|
1552 return tokenizeNCName(); |
|
1553 } |
|
1554 Q_ASSERT(false); |
|
1555 } |
|
1556 case OccurrenceIndicator: |
|
1557 { |
|
1558 switch(peekCurrent()) |
|
1559 { |
|
1560 case '?': |
|
1561 return tokenAndChangeState(QUESTION, Operator); |
|
1562 case '*': |
|
1563 return tokenAndChangeState(STAR, Operator); |
|
1564 case '+': |
|
1565 return tokenAndChangeState(PLUS, Operator); |
|
1566 default: |
|
1567 { |
|
1568 setState(Operator); |
|
1569 return nextToken(); |
|
1570 } |
|
1571 } |
|
1572 Q_ASSERT(false); |
|
1573 } |
|
1574 case XQueryVersion: |
|
1575 { |
|
1576 switch(peekCurrent()) |
|
1577 { |
|
1578 case '\'': |
|
1579 /* Fallthrough. */ |
|
1580 case '"': |
|
1581 return tokenizeStringLiteral(); |
|
1582 case ';': |
|
1583 return tokenAndChangeState(SEMI_COLON, Default); |
|
1584 } |
|
1585 |
|
1586 const Token id(tokenizeNCName()); |
|
1587 |
|
1588 if(id.type != NCNAME) |
|
1589 return id; |
|
1590 |
|
1591 const TokenMap *const keyword = lookupKeyword(id.value); |
|
1592 if(keyword) |
|
1593 return tokenAndChangeState(keyword->token, Default); |
|
1594 else |
|
1595 return id; |
|
1596 Q_ASSERT(false); |
|
1597 } |
|
1598 case StartTag: |
|
1599 { |
|
1600 if(peekAhead(-1) == '<') |
|
1601 { |
|
1602 if(current().isSpace()) |
|
1603 return Token(ERROR); |
|
1604 } |
|
1605 else |
|
1606 { |
|
1607 if(consumeRawWhitespace()) |
|
1608 return Token(END_OF_FILE); |
|
1609 } |
|
1610 |
|
1611 switch(peekCurrent()) |
|
1612 { |
|
1613 case '/': |
|
1614 { |
|
1615 if(peekAhead() == '>') |
|
1616 { |
|
1617 m_pos += 2; |
|
1618 |
|
1619 if(m_scanOnly) |
|
1620 return Token(POSITION_SET); |
|
1621 else |
|
1622 { |
|
1623 popState(); |
|
1624 return Token(QUICK_TAG_END); |
|
1625 } |
|
1626 } |
|
1627 else |
|
1628 return error(); |
|
1629 } |
|
1630 case '>': |
|
1631 { |
|
1632 if(m_scanOnly) |
|
1633 return tokenAndChangeState(POSITION_SET, StartTag); |
|
1634 else |
|
1635 return tokenAndChangeState(G_GT, ElementContent); |
|
1636 } |
|
1637 case '=': |
|
1638 return tokenAndAdvance(G_EQ); |
|
1639 case '\'': |
|
1640 return tokenAndChangeState(APOS, AposAttributeContent); |
|
1641 case '"': |
|
1642 return tokenAndChangeState(QUOTE, QuotAttributeContent); |
|
1643 default: |
|
1644 return tokenizeNCNameOrQName(); |
|
1645 } |
|
1646 Q_ASSERT(false); |
|
1647 } |
|
1648 case AposAttributeContent: |
|
1649 /* Fallthrough. */ |
|
1650 case QuotAttributeContent: |
|
1651 { |
|
1652 const QChar sep(state() == AposAttributeContent ? QLatin1Char('\'') : QLatin1Char('"')); |
|
1653 QString result; |
|
1654 result.reserve(20); |
|
1655 |
|
1656 if(m_scanOnly) |
|
1657 { |
|
1658 int stack = 0; |
|
1659 return attributeAsRaw(sep, stack, m_pos, true, result); |
|
1660 } |
|
1661 |
|
1662 Q_ASSERT(!m_scanOnly); |
|
1663 while(true) |
|
1664 { |
|
1665 if(atEnd()) |
|
1666 { |
|
1667 /* In the case that the XSL-T tokenizer invokes us with |
|
1668 * default state QuotAttributeContent, we need to be able |
|
1669 * to return a single string, in case that is all we have |
|
1670 * accumulated. */ |
|
1671 if(result.isEmpty()) |
|
1672 return Token(END_OF_FILE); |
|
1673 else |
|
1674 return Token(STRING_LITERAL, result); |
|
1675 } |
|
1676 |
|
1677 const QChar curr(current()); |
|
1678 |
|
1679 if(curr == sep) |
|
1680 { |
|
1681 if(m_pos + 1 == m_length) |
|
1682 return Token(END_OF_FILE); |
|
1683 |
|
1684 if(m_data.at(m_pos + 1) == sep) |
|
1685 { |
|
1686 /* The quoting mechanism was used. */ |
|
1687 m_pos += 2; |
|
1688 result.append(sep); |
|
1689 continue; |
|
1690 } |
|
1691 |
|
1692 const QChar next(m_data.at(m_pos + 1)); |
|
1693 if(!next.isSpace() && next != QLatin1Char('/') && next != QLatin1Char('>')) |
|
1694 return Token(ERROR); // i18n Space must separate attributes |
|
1695 else if(result.isEmpty()) |
|
1696 { |
|
1697 return tokenAndChangeState(state() == AposAttributeContent ? APOS : QUOTE, |
|
1698 StartTag, 1); |
|
1699 } |
|
1700 else |
|
1701 { |
|
1702 /* Don't consume the sep, but leave it so we next time return a token for it. */ |
|
1703 return Token(STRING_LITERAL, result); |
|
1704 } |
|
1705 |
|
1706 ++m_pos; |
|
1707 continue; |
|
1708 } |
|
1709 else if(curr == QLatin1Char('{')) |
|
1710 { |
|
1711 if(m_pos + 1 == m_length) |
|
1712 return Token(END_OF_FILE); |
|
1713 else if(peekAhead() == '{') |
|
1714 { |
|
1715 ++m_pos; |
|
1716 result.append(QLatin1Char('{')); |
|
1717 } |
|
1718 else |
|
1719 { |
|
1720 if(result.isEmpty()) |
|
1721 { |
|
1722 /* The Attribute Value Template appeared directly in the attribute. */ |
|
1723 pushState(); |
|
1724 return tokenAndChangeState(CURLY_LBRACE, Default); |
|
1725 } |
|
1726 else |
|
1727 { |
|
1728 /* We don't advance, keep '{' as next token. */ |
|
1729 return Token(STRING_LITERAL, result); |
|
1730 } |
|
1731 } |
|
1732 } |
|
1733 else if(curr == QLatin1Char('}')) |
|
1734 { |
|
1735 if(m_pos + 1 == m_length) |
|
1736 return Token(END_OF_FILE); |
|
1737 else if(peekAhead() == '}') |
|
1738 { |
|
1739 ++m_pos; |
|
1740 result.append(QLatin1Char('}')); |
|
1741 } |
|
1742 else |
|
1743 return Token(ERROR); |
|
1744 } |
|
1745 else if(curr == QLatin1Char('&')) |
|
1746 { |
|
1747 const QString ret(tokenizeCharacterReference()); |
|
1748 if(ret.isNull()) |
|
1749 return Token(ERROR); |
|
1750 else |
|
1751 result.append(ret); |
|
1752 } |
|
1753 else if(curr == QLatin1Char('<')) |
|
1754 return Token(STRING_LITERAL, result); |
|
1755 else |
|
1756 { |
|
1757 /* See Extensible Markup Language (XML) 1.0 (Fourth Edition), |
|
1758 * 3.3.3 Attribute-Value Normalization. |
|
1759 * |
|
1760 * However, it is complicated a bit by that AVN is defined on top of |
|
1761 * EOL normalization and we do those two in one go here. */ |
|
1762 switch(curr.unicode()) |
|
1763 { |
|
1764 case 0xD: |
|
1765 { |
|
1766 if(peekAhead() == '\n') |
|
1767 { |
|
1768 result.append(QLatin1Char(' ')); |
|
1769 ++m_pos; |
|
1770 break; |
|
1771 } |
|
1772 } |
|
1773 case 0xA: |
|
1774 /* Fallthrough. */ |
|
1775 case 0x9: |
|
1776 { |
|
1777 result.append(QLatin1Char(' ')); |
|
1778 break; |
|
1779 } |
|
1780 default: |
|
1781 result.append(curr); |
|
1782 } |
|
1783 } |
|
1784 |
|
1785 ++m_pos; |
|
1786 } |
|
1787 Q_ASSERT(false); |
|
1788 } |
|
1789 case ElementContent: |
|
1790 { |
|
1791 QString result; |
|
1792 result.reserve(20); |
|
1793 |
|
1794 /* Whether the text node, result, may be whitespace only. Character references |
|
1795 * and CDATA sections disables that. */ |
|
1796 bool mayBeWS = true; |
|
1797 |
|
1798 CharacterSkips skipEOLNormalization; |
|
1799 |
|
1800 while(true) |
|
1801 { |
|
1802 if(atEnd()) |
|
1803 return Token(END_OF_FILE); |
|
1804 |
|
1805 switch(peekCurrent()) |
|
1806 { |
|
1807 case '<': |
|
1808 { |
|
1809 if(!result.isEmpty() && peekAhead(2) != '[') |
|
1810 { |
|
1811 /* We encountered the end, and it was not a CDATA section. */ |
|
1812 /* We don't advance. Next time we'll handle the <... stuff. */ |
|
1813 return Token(mayBeWS ? STRING_LITERAL : NON_BOUNDARY_WS, normalizeEOL(result, skipEOLNormalization)); |
|
1814 } |
|
1815 |
|
1816 ++m_pos; |
|
1817 if(atEnd()) |
|
1818 return Token(END_OF_FILE); |
|
1819 |
|
1820 const QChar ahead(current()); |
|
1821 if(ahead.isSpace()) |
|
1822 return error(); |
|
1823 else if(ahead == QLatin1Char('/')) |
|
1824 { |
|
1825 if(m_pos + 1 == m_length) |
|
1826 return Token(END_OF_FILE); |
|
1827 else if(m_data.at(m_pos + 1).isSpace()) |
|
1828 return error(); |
|
1829 else |
|
1830 return tokenAndChangeState(BEGIN_END_TAG, EndTag); |
|
1831 } |
|
1832 else if(isNCNameStart(ahead)) |
|
1833 { |
|
1834 pushState(); |
|
1835 return tokenAndChangeState(G_LT, StartTag, 0); |
|
1836 } |
|
1837 else if(aheadEquals("!--", 3, 0)) |
|
1838 { |
|
1839 pushState(); |
|
1840 m_pos += 3; |
|
1841 return tokenAndChangeState(COMMENT_START, XMLComment, 0); |
|
1842 } |
|
1843 else if(aheadEquals("![CDATA[", 8, 0)) |
|
1844 { |
|
1845 mayBeWS = false; |
|
1846 m_pos += 8; |
|
1847 const int start = m_pos; |
|
1848 const int len = scanUntil("]]>"); |
|
1849 |
|
1850 if(len == -1) |
|
1851 return Token(END_OF_FILE); |
|
1852 |
|
1853 m_pos += 2; /* Consume "]]>". Note that m_pos is on '!'. */ |
|
1854 result.append(m_data.mid(start, len)); |
|
1855 break; |
|
1856 } |
|
1857 else if(ahead == QLatin1Char('?')) |
|
1858 { |
|
1859 pushState(); |
|
1860 return tokenAndChangeState(PI_START, ProcessingInstructionName); |
|
1861 } |
|
1862 else |
|
1863 return Token(G_LT); |
|
1864 } |
|
1865 case '&': |
|
1866 { |
|
1867 const QString ret(tokenizeCharacterReference()); |
|
1868 if(ret.isNull()) |
|
1869 return Token(ERROR); |
|
1870 else |
|
1871 { |
|
1872 skipEOLNormalization.insert(result.count()); |
|
1873 result.append(ret); |
|
1874 mayBeWS = false; |
|
1875 break; |
|
1876 } |
|
1877 } |
|
1878 case '{': |
|
1879 { |
|
1880 // TODO remove this check, also below. |
|
1881 if(m_pos + 1 == m_length) |
|
1882 return Token(END_OF_FILE); |
|
1883 else if(peekAhead() == '{') |
|
1884 { |
|
1885 ++m_pos; |
|
1886 result.append(QLatin1Char('{')); |
|
1887 } |
|
1888 else |
|
1889 { |
|
1890 if(result.isEmpty()) |
|
1891 { |
|
1892 pushState(); |
|
1893 return tokenAndChangeState(CURLY_LBRACE, Default); |
|
1894 } |
|
1895 else |
|
1896 { |
|
1897 /* We don't advance here. */ |
|
1898 return Token(mayBeWS ? STRING_LITERAL : NON_BOUNDARY_WS, normalizeEOL(result, skipEOLNormalization)); |
|
1899 } |
|
1900 } |
|
1901 break; |
|
1902 } |
|
1903 case '}': |
|
1904 { |
|
1905 if(m_pos + 1 == m_length) |
|
1906 return Token(END_OF_FILE); |
|
1907 else if(peekAhead() == '}') |
|
1908 { |
|
1909 ++m_pos; |
|
1910 result.append(QLatin1Char('}')); |
|
1911 } |
|
1912 else |
|
1913 { |
|
1914 /* This is a parse error, and the grammar won't be able |
|
1915 * to reduce this CURLY_RBRACE. */ |
|
1916 return tokenAndChangeState(CURLY_RBRACE, Default); |
|
1917 } |
|
1918 break; |
|
1919 } |
|
1920 case '\n': |
|
1921 { |
|
1922 /* We want to translate \r\n into \n. */ |
|
1923 if(peekAhead(-1) == '\r') |
|
1924 break; |
|
1925 /* else, fallthrough. */ |
|
1926 } |
|
1927 case '\r': |
|
1928 { |
|
1929 result.append(QLatin1Char('\n')); |
|
1930 break; |
|
1931 } |
|
1932 default: |
|
1933 { |
|
1934 result.append(current()); |
|
1935 break; |
|
1936 } |
|
1937 } |
|
1938 ++m_pos; |
|
1939 } |
|
1940 Q_ASSERT(false); |
|
1941 } |
|
1942 case ProcessingInstructionName: |
|
1943 { |
|
1944 const int start = m_pos; |
|
1945 |
|
1946 while(true) |
|
1947 { |
|
1948 ++m_pos; |
|
1949 if(m_pos >= m_length) |
|
1950 return Token(END_OF_FILE); |
|
1951 |
|
1952 const QChar next(current()); |
|
1953 if(next.isSpace() || next == QLatin1Char('?')) |
|
1954 { |
|
1955 return tokenAndChangeState(PI_TARGET, m_data.mid(start, m_pos - start), |
|
1956 ProcessingInstructionContent); |
|
1957 } |
|
1958 } |
|
1959 Q_ASSERT(false); |
|
1960 } |
|
1961 case ProcessingInstructionContent: |
|
1962 { |
|
1963 /* Consume whitespace between the name and the content. */ |
|
1964 if(consumeRawWhitespace()) |
|
1965 return Token(END_OF_FILE); |
|
1966 |
|
1967 const int start = m_pos; |
|
1968 const int len = scanUntil("?>"); |
|
1969 |
|
1970 if(len == -1) |
|
1971 return Token(END_OF_FILE); |
|
1972 else |
|
1973 { |
|
1974 m_pos += 2; /* Consume "?>" */ |
|
1975 popState(); |
|
1976 return Token(PI_CONTENT, normalizeEOL(m_data.mid(start, len), CharacterSkips())); |
|
1977 } |
|
1978 Q_ASSERT(false); |
|
1979 } |
|
1980 case EndTag: |
|
1981 { |
|
1982 if(consumeRawWhitespace()) |
|
1983 return END_OF_FILE; |
|
1984 |
|
1985 if(peekCurrent() == '>') |
|
1986 { |
|
1987 popState(); |
|
1988 return tokenAndAdvance(G_GT); |
|
1989 } |
|
1990 else |
|
1991 return tokenizeNCNameOrQName(); |
|
1992 Q_ASSERT(false); |
|
1993 } |
|
1994 case XMLComment: |
|
1995 { |
|
1996 const int start = m_pos; |
|
1997 const int len = scanUntil("--"); |
|
1998 |
|
1999 if(len == -1) |
|
2000 return END_OF_FILE; |
|
2001 else |
|
2002 { |
|
2003 m_pos += 2; /* Consume "--". */ |
|
2004 popState(); |
|
2005 |
|
2006 if(peekCurrent() == '>') |
|
2007 { |
|
2008 ++m_pos; |
|
2009 return Token(COMMENT_CONTENT, normalizeEOL(m_data.mid(start, len), CharacterSkips())); |
|
2010 } |
|
2011 else |
|
2012 return error(); |
|
2013 } |
|
2014 Q_ASSERT(false); |
|
2015 } |
|
2016 case Pragma: |
|
2017 { |
|
2018 /* Consume whitespace. */ |
|
2019 if(consumeRawWhitespace()) |
|
2020 return Token(END_OF_FILE); |
|
2021 |
|
2022 setState(PragmaContent); |
|
2023 return tokenizeNCNameOrQName(); |
|
2024 } |
|
2025 case PragmaContent: |
|
2026 { |
|
2027 QString result; |
|
2028 result.reserve(20); |
|
2029 |
|
2030 const bool hasWS = m_pos < m_length && current().isSpace(); |
|
2031 |
|
2032 /* Consume all whitespace up to the pragma content(if any). */ |
|
2033 if(consumeRawWhitespace()) |
|
2034 return Token(END_OF_FILE); |
|
2035 |
|
2036 if(peekCurrent() == '#' && peekAhead() == ')') |
|
2037 { |
|
2038 /* We reached the end, and there's no pragma content. */ |
|
2039 return tokenAndChangeState(PRAGMA_END, Default, 2); |
|
2040 } |
|
2041 else if(!hasWS) |
|
2042 { |
|
2043 /* A separating space is required if there's pragma content. */ |
|
2044 return error(); /* i18n */ |
|
2045 } |
|
2046 |
|
2047 const int start = m_pos; |
|
2048 const int len = scanUntil("#)"); |
|
2049 if(len == -1) |
|
2050 return Token(END_OF_FILE); |
|
2051 |
|
2052 return Token(STRING_LITERAL, m_data.mid(start, len)); |
|
2053 Q_ASSERT(false); |
|
2054 } |
|
2055 } |
|
2056 |
|
2057 Q_ASSERT(false); |
|
2058 return error(); |
|
2059 } |
|
2060 |
|
2061 Tokenizer::Token XQueryTokenizer::attributeAsRaw(const QChar sep, |
|
2062 int &sepStack, |
|
2063 const int startPos, |
|
2064 const bool aInLiteral, |
|
2065 QString &result) |
|
2066 { |
|
2067 bool inLiteral = aInLiteral; |
|
2068 const char otherSep = (sep == QLatin1Char('"') ? '\'' : '"'); |
|
2069 |
|
2070 while(true) |
|
2071 { |
|
2072 if(atEnd()) |
|
2073 return END_OF_FILE; |
|
2074 |
|
2075 if(peekCurrent() == sep.unicode()) |
|
2076 { |
|
2077 if(inLiteral) |
|
2078 inLiteral = false; |
|
2079 else |
|
2080 inLiteral = true; |
|
2081 |
|
2082 if(peekAhead() == sep.unicode()) |
|
2083 { |
|
2084 /* The quoting mechanism was used. */ |
|
2085 result.append(current()); |
|
2086 m_pos += 2; |
|
2087 continue; |
|
2088 } |
|
2089 else |
|
2090 { |
|
2091 /* Don't consume the separator, such that we |
|
2092 * return a token for it next time. */ |
|
2093 if(m_pos == startPos) |
|
2094 { |
|
2095 ++m_pos; |
|
2096 setState(StartTag); |
|
2097 return Token(sep == QLatin1Char('"') ? QUOTE : APOS); |
|
2098 } |
|
2099 |
|
2100 |
|
2101 if(sepStack == 0) |
|
2102 { |
|
2103 return Token(STRING_LITERAL, result); |
|
2104 } |
|
2105 else |
|
2106 { |
|
2107 result.append(current()); |
|
2108 ++m_pos; |
|
2109 continue; |
|
2110 } |
|
2111 } |
|
2112 } |
|
2113 else if(peekCurrent() == '&') |
|
2114 { |
|
2115 const QString ret(tokenizeCharacterReference()); |
|
2116 if(ret.isNull()) |
|
2117 return Token(ERROR); |
|
2118 else |
|
2119 { |
|
2120 result.append(ret); |
|
2121 ++m_pos; |
|
2122 continue; |
|
2123 } |
|
2124 } |
|
2125 else if(peekCurrent() == otherSep) |
|
2126 { |
|
2127 result.append(current()); |
|
2128 ++m_pos; |
|
2129 |
|
2130 if(peekCurrent() == otherSep) |
|
2131 ++m_pos; |
|
2132 |
|
2133 if(inLiteral) |
|
2134 inLiteral = false; |
|
2135 else |
|
2136 inLiteral = true; |
|
2137 |
|
2138 continue; |
|
2139 } |
|
2140 else if(peekCurrent() == '{') |
|
2141 { |
|
2142 result.append(current()); |
|
2143 |
|
2144 if(peekAhead() == '{') |
|
2145 { |
|
2146 m_pos += 2; |
|
2147 continue; |
|
2148 } |
|
2149 else |
|
2150 { |
|
2151 ++m_pos; |
|
2152 ++sepStack; |
|
2153 const Token t(attributeAsRaw(sep, sepStack, startPos, false, result)); |
|
2154 if(t.type != SUCCESS) |
|
2155 return t; |
|
2156 } |
|
2157 |
|
2158 } |
|
2159 else if(peekCurrent() == '}') |
|
2160 { |
|
2161 if(inLiteral && peekAhead() == '}') |
|
2162 { |
|
2163 result.append(current()); |
|
2164 m_pos += 2; |
|
2165 continue; |
|
2166 } |
|
2167 else |
|
2168 { |
|
2169 ++m_pos; |
|
2170 --sepStack; |
|
2171 return Token(SUCCESS); /* The return value is arbitrary. */ |
|
2172 } |
|
2173 } |
|
2174 else |
|
2175 { |
|
2176 result.append(current()); |
|
2177 ++m_pos; |
|
2178 } |
|
2179 } |
|
2180 } |
|
2181 |
|
2182 Tokenizer::Token XQueryTokenizer::nextToken(YYLTYPE *const sourceLocator) |
|
2183 { |
|
2184 sourceLocator->first_line = m_line; |
|
2185 sourceLocator->first_column = m_pos - m_columnOffset + 1; /* Plus 1, since m_pos is 0-based. */ |
|
2186 |
|
2187 if(m_tokenStack.isEmpty()) |
|
2188 return nextToken(); |
|
2189 else |
|
2190 { |
|
2191 const Token retval(m_tokenStack.pop()); |
|
2192 |
|
2193 switch(retval.type) |
|
2194 { |
|
2195 case MODULE: |
|
2196 /* Fallthrough.*/ |
|
2197 case SCHEMA: |
|
2198 /* Fallthrough.*/ |
|
2199 case COPY_NAMESPACES: |
|
2200 { |
|
2201 setState(NamespaceKeyword); |
|
2202 break; |
|
2203 } |
|
2204 case VERSION: |
|
2205 { |
|
2206 setState(XQueryVersion); |
|
2207 break; |
|
2208 } |
|
2209 case AS: |
|
2210 /* Fallthrough. */ |
|
2211 case OF: |
|
2212 { |
|
2213 setState(ItemType); |
|
2214 break; |
|
2215 } |
|
2216 default: |
|
2217 { |
|
2218 if(isOperatorKeyword(retval.type)) |
|
2219 setState(Default); |
|
2220 |
|
2221 break; |
|
2222 } |
|
2223 }; |
|
2224 |
|
2225 return retval; |
|
2226 } |
|
2227 } |
|
2228 |
|
2229 int XQueryTokenizer::commenceScanOnly() |
|
2230 { |
|
2231 m_scanOnly = true; |
|
2232 return m_pos; |
|
2233 } |
|
2234 |
|
2235 void XQueryTokenizer::resumeTokenizationFrom(const int pos) |
|
2236 { |
|
2237 m_scanOnly = false; |
|
2238 m_pos = pos; |
|
2239 } |
|
2240 |
|
2241 void XQueryTokenizer::setParserContext(const ParserContext::Ptr &) |
|
2242 { |
|
2243 } |
|
2244 |
|
2245 #undef handleWhitespace |
|
2246 |
|
2247 } // namespace QPatternist |
|
2248 |
|
2249 QT_END_NAMESPACE |