|
1 /* This file is part of QJson |
|
2 * |
|
3 * Copyright (C) 2008 Flavio Castelli <flavio.castelli@gmail.com> |
|
4 * |
|
5 * This library is free software; you can redistribute it and/or |
|
6 * modify it under the terms of the GNU Library General Public |
|
7 * License as published by the Free Software Foundation; either |
|
8 * version 2 of the License, or (at your option) any later version. |
|
9 * |
|
10 * This library is distributed in the hope that it will be useful, |
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 * Library General Public License for more details. |
|
14 * |
|
15 * You should have received a copy of the GNU Library General Public License |
|
16 * along with this library; see the file COPYING.LIB. If not, write to |
|
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
|
18 * Boston, MA 02110-1301, USA. |
|
19 */ |
|
20 |
|
21 #include "qjson_debug.h" |
|
22 #include "json_scanner.h" |
|
23 #include "json_parser.hh" |
|
24 |
|
25 #include <ctype.h> |
|
26 |
|
27 #include <QtCore/QDebug> |
|
28 #include <QtCore/QRegExp> |
|
29 |
|
30 #include <cassert> |
|
31 |
|
32 bool ishexnstring(const QString& string) { |
|
33 for (int i = 0; i < string.length(); i++) { |
|
34 if (isxdigit(string[i] == 0)) |
|
35 return false; |
|
36 } |
|
37 return true; |
|
38 } |
|
39 |
|
40 JSonScanner::JSonScanner(QIODevice* io) |
|
41 : m_io (io) |
|
42 { |
|
43 m_quotmarkClosed = true; |
|
44 m_quotmarkCount = 0; |
|
45 } |
|
46 |
|
47 static QString unescape( const QByteArray& ba, bool* ok ) { |
|
48 assert( ok ); |
|
49 *ok = false; |
|
50 QString res; |
|
51 QByteArray seg; |
|
52 bool bs = false; |
|
53 for ( int i = 0, size = ba.size(); i < size; ++i ) { |
|
54 const char ch = ba[i]; |
|
55 if ( !bs ) { |
|
56 if ( ch == '\\' ) |
|
57 bs = true; |
|
58 else |
|
59 seg += ch; |
|
60 } else { |
|
61 bs = false; |
|
62 switch ( ch ) { |
|
63 case 'b': |
|
64 seg += '\b'; |
|
65 break; |
|
66 case 'f': |
|
67 seg += '\f'; |
|
68 break; |
|
69 case 'n': |
|
70 seg += '\n'; |
|
71 break; |
|
72 case 'r': |
|
73 seg += '\r'; |
|
74 break; |
|
75 case 't': |
|
76 seg += '\t'; |
|
77 break; |
|
78 case 'u': |
|
79 { |
|
80 res += QString::fromUtf8( seg ); |
|
81 seg.clear(); |
|
82 |
|
83 if ( i > size - 5 ) { |
|
84 //error |
|
85 return QString(); |
|
86 } |
|
87 |
|
88 const QString hex_digit1 = QString::fromUtf8( ba.mid( i + 1, 2 ) ); |
|
89 const QString hex_digit2 = QString::fromUtf8( ba.mid( i + 3, 2 ) ); |
|
90 i += 4; |
|
91 |
|
92 if ( !ishexnstring( hex_digit1 ) || !ishexnstring( hex_digit2 ) ) { |
|
93 qCritical() << "Not an hex string:" << hex_digit1 << hex_digit2; |
|
94 return QString(); |
|
95 } |
|
96 bool hexOk; |
|
97 const ushort hex_code1 = hex_digit1.toShort( &hexOk, 16 ); |
|
98 if (!hexOk) { |
|
99 qCritical() << "error converting hex value to short:" << hex_digit1; |
|
100 return QString(); |
|
101 } |
|
102 const ushort hex_code2 = hex_digit2.toShort( &hexOk, 16 ); |
|
103 if (!hexOk) { |
|
104 qCritical() << "error converting hex value to short:" << hex_digit2; |
|
105 return QString(); |
|
106 } |
|
107 |
|
108 res += QChar(hex_code2, hex_code1); |
|
109 break; |
|
110 } |
|
111 case '\\': |
|
112 seg += '\\'; |
|
113 break; |
|
114 default: |
|
115 seg += ch; |
|
116 break; |
|
117 } |
|
118 } |
|
119 } |
|
120 res += QString::fromUtf8( seg ); |
|
121 *ok = true; |
|
122 return res; |
|
123 } |
|
124 |
|
125 int JSonScanner::yylex(YYSTYPE* yylval, yy::location *yylloc) |
|
126 { |
|
127 char ch; |
|
128 |
|
129 if (!m_io->isOpen()) { |
|
130 qCritical() << "JSonScanner::yylex - io device is not open"; |
|
131 return -1; |
|
132 } |
|
133 |
|
134 yylloc->step(); |
|
135 |
|
136 do { |
|
137 bool ret; |
|
138 if (m_io->atEnd()) { |
|
139 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::END"; |
|
140 return yy::json_parser::token::END; |
|
141 } |
|
142 else |
|
143 ret = m_io->getChar(&ch); |
|
144 |
|
145 if (!ret) { |
|
146 qCritical() << "JSonScanner::yylex - error reading from io device"; |
|
147 return -1; |
|
148 } |
|
149 |
|
150 qjsonDebug() << "JSonScanner::yylex - got |" << ch << "|"; |
|
151 |
|
152 yylloc->columns(); |
|
153 |
|
154 if (ch == '\n' || ch == '\r') |
|
155 yylloc->lines(); |
|
156 |
|
157 } while (m_quotmarkClosed && (isspace(ch) != 0)); |
|
158 |
|
159 if (m_quotmarkClosed && ((ch == 't') || (ch == 'T') |
|
160 || (ch == 'n') || (ch == 'N'))) { |
|
161 // check true & null value |
|
162 const QByteArray buf = m_io->peek(3).toLower(); |
|
163 |
|
164 if (buf.length() == 3) { |
|
165 if (buf == "rue") { |
|
166 m_io->read (3); |
|
167 yylloc->columns(3); |
|
168 qjsonDebug() << "JSonScanner::yylex - TRUE_VAL"; |
|
169 return yy::json_parser::token::TRUE_VAL; |
|
170 } |
|
171 else if (buf == "ull") { |
|
172 m_io->read (3); |
|
173 yylloc->columns(3); |
|
174 qjsonDebug() << "JSonScanner::yylex - NULL_VAL"; |
|
175 return yy::json_parser::token::NULL_VAL; |
|
176 } |
|
177 } |
|
178 } |
|
179 else if (m_quotmarkClosed && ((ch == 'f') || (ch == 'F'))) { |
|
180 // check false value |
|
181 const QByteArray buf = m_io->peek(4).toLower(); |
|
182 if (buf.length() == 4) { |
|
183 if (buf == "alse") { |
|
184 m_io->read (4); |
|
185 yylloc->columns(4); |
|
186 qjsonDebug() << "JSonScanner::yylex - FALSE_VAL"; |
|
187 return yy::json_parser::token::FALSE_VAL; |
|
188 } |
|
189 } |
|
190 } |
|
191 else if (m_quotmarkClosed && ((ch == 'e') || (ch == 'E'))) { |
|
192 QByteArray ret(1, ch); |
|
193 const QByteArray buf = m_io->peek(1); |
|
194 if (!buf.isEmpty()) { |
|
195 if ((buf[0] == '+' ) || (buf[0] == '-' )) { |
|
196 ret += m_io->read (1); |
|
197 yylloc->columns(); |
|
198 } |
|
199 } |
|
200 *yylval = QVariant(QString::fromUtf8(ret)); |
|
201 return yy::json_parser::token::E; |
|
202 } |
|
203 |
|
204 if (ch != '"' && !m_quotmarkClosed) { |
|
205 // we're inside a " " block |
|
206 QByteArray raw; |
|
207 raw += ch; |
|
208 char prevCh = ch; |
|
209 bool escape_on = (ch == '\\') ? true : false; |
|
210 |
|
211 while ( true ) { |
|
212 char nextCh; |
|
213 qint64 ret = m_io->peek(&nextCh, 1); |
|
214 if (ret != 1) { |
|
215 if (m_io->atEnd()) |
|
216 return yy::json_parser::token::END; |
|
217 else |
|
218 return -1; |
|
219 } else if ( !escape_on && nextCh == '\"' ) { |
|
220 bool ok; |
|
221 const QString str = unescape( raw, &ok ); |
|
222 *yylval = ok ? str : QString(); |
|
223 return ok ? yy::json_parser::token::STRING : -1; |
|
224 } |
|
225 #if 0 |
|
226 if ( prevCh == '\\' && nextCh != '"' && nextCh != '\\' && nextCh != '/' && |
|
227 nextCh != 'b' && nextCh != 'f' && nextCh != 'n' && |
|
228 nextCh != 'r' && nextCh != 't' && nextCh != 'u') { |
|
229 qjsonDebug() << "Just read" << nextCh; |
|
230 qjsonDebug() << "JSonScanner::yylex - error decoding escaped sequence"; |
|
231 return -1; |
|
232 } |
|
233 #endif |
|
234 m_io->read(1); // consume |
|
235 raw += nextCh; |
|
236 prevCh = nextCh; |
|
237 if (escape_on) |
|
238 escape_on = false; |
|
239 else |
|
240 escape_on = (prevCh == '\\') ? true : false; |
|
241 #if 0 |
|
242 if (nextCh == '\\') { |
|
243 char buf; |
|
244 if (m_io->getChar (&buf)) { |
|
245 yylloc->columns(); |
|
246 if (((buf != '"') && (buf != '\\') && (buf != '/') && |
|
247 (buf != 'b') && (buf != 'f') && (buf != 'n') && |
|
248 (buf != 'r') && (buf != 't') && (buf != 'u'))) { |
|
249 qjsonDebug() << "Just read" << buf; |
|
250 qjsonDebug() << "JSonScanner::yylex - error decoding escaped sequence"; |
|
251 return -1; |
|
252 } |
|
253 } else { |
|
254 qCritical() << "JSonScanner::yylex - error decoding escaped sequence : io error"; |
|
255 return -1; |
|
256 } |
|
257 } |
|
258 #endif |
|
259 } |
|
260 } |
|
261 else if (isdigit(ch) != 0 && m_quotmarkClosed) { |
|
262 *yylval = QVariant(QString::fromLatin1(QByteArray(&ch,1))); |
|
263 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::DIGIT"; |
|
264 return yy::json_parser::token::DIGIT; |
|
265 } |
|
266 else if (isalnum(ch) != 0) { |
|
267 *yylval = QVariant(QString(QChar::fromLatin1(ch))); |
|
268 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::WORD (" |
|
269 << ch << ")"; |
|
270 return yy::json_parser::token::STRING; |
|
271 } |
|
272 else if (ch == ':') { |
|
273 // set yylval |
|
274 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::COLON"; |
|
275 return yy::json_parser::token::COLON; |
|
276 } |
|
277 else if (ch == '"') { |
|
278 // yy::json_parser::token::QUOTMARK (") |
|
279 |
|
280 // set yylval |
|
281 m_quotmarkCount++; |
|
282 if (m_quotmarkCount %2 == 0) { |
|
283 m_quotmarkClosed = true; |
|
284 m_quotmarkCount = 0; |
|
285 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::QUOTMARKCLOSE"; |
|
286 return yy::json_parser::token::QUOTMARKCLOSE; |
|
287 } |
|
288 else { |
|
289 m_quotmarkClosed = false; |
|
290 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::QUOTMARKOPEN"; |
|
291 return yy::json_parser::token::QUOTMARKOPEN; |
|
292 } |
|
293 } |
|
294 else if (ch == ',') { |
|
295 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::COMMA"; |
|
296 return yy::json_parser::token::COMMA; |
|
297 } |
|
298 else if (ch == '.') { |
|
299 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::DOT"; |
|
300 return yy::json_parser::token::DOT; |
|
301 } |
|
302 else if (ch == '-') { |
|
303 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::MINUS"; |
|
304 return yy::json_parser::token::MINUS; |
|
305 } |
|
306 else if (ch == '[') { |
|
307 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::SQUARE_BRACKET_OPEN"; |
|
308 return yy::json_parser::token::SQUARE_BRACKET_OPEN; |
|
309 } |
|
310 else if (ch == ']') { |
|
311 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::SQUARE_BRACKET_CLOSE"; |
|
312 return yy::json_parser::token::SQUARE_BRACKET_CLOSE; |
|
313 } |
|
314 else if (ch == '{') { |
|
315 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::CURLY_BRACKET_OPEN"; |
|
316 return yy::json_parser::token::CURLY_BRACKET_OPEN; |
|
317 } |
|
318 else if (ch == '}') { |
|
319 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::CURLY_BRACKET_CLOSE"; |
|
320 return yy::json_parser::token::CURLY_BRACKET_CLOSE; |
|
321 } |
|
322 |
|
323 //unknown char! |
|
324 //TODO yyerror? |
|
325 qCritical() << "JSonScanner::yylex - unknown char, returning -1"; |
|
326 return -1; |
|
327 } |
|
328 |
|
329 |