|
1 /* |
|
2 ** 2001 September 15 |
|
3 ** |
|
4 ** The author disclaims copyright to this source code. In place of |
|
5 ** a legal notice, here is a blessing: |
|
6 ** |
|
7 ** May you do good and not evil. |
|
8 ** May you find forgiveness for yourself and forgive others. |
|
9 ** May you share freely, never taking more than you give. |
|
10 ** |
|
11 ************************************************************************* |
|
12 ** An tokenizer for SQL |
|
13 ** |
|
14 ** This file contains C code that splits an SQL input string up into |
|
15 ** individual tokens and sends those tokens one-by-one over to the |
|
16 ** parser for analysis. |
|
17 ** |
|
18 ** $Id: tokenize.cpp 1282 2008-11-13 09:31:33Z LarsPson $ |
|
19 */ |
|
20 #include "sqliteInt.h" |
|
21 #include <ctype.h> |
|
22 #include <stdlib.h> |
|
23 |
|
24 /* |
|
25 ** The charMap() macro maps alphabetic characters into their |
|
26 ** lower-case ASCII equivalent. On ASCII machines, this is just |
|
27 ** an upper-to-lower case map. On EBCDIC machines we also need |
|
28 ** to adjust the encoding. Only alphabetic characters and underscores |
|
29 ** need to be translated. |
|
30 */ |
|
31 #ifdef SQLITE_ASCII |
|
32 # define charMap(X) sqlite3UpperToLower[(unsigned char)X] |
|
33 #endif |
|
34 #ifdef SQLITE_EBCDIC |
|
35 # define charMap(X) ebcdicToAscii[(unsigned char)X] |
|
36 const unsigned char ebcdicToAscii[] = { |
|
37 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ |
|
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ |
|
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ |
|
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ |
|
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ |
|
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ |
|
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ |
|
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */ |
|
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ |
|
46 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */ |
|
47 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */ |
|
48 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */ |
|
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ |
|
50 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */ |
|
51 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */ |
|
52 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */ |
|
53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */ |
|
54 }; |
|
55 #endif |
|
56 |
|
57 /* |
|
58 ** The sqlite3KeywordCode function looks up an identifier to determine if |
|
59 ** it is a keyword. If it is a keyword, the token code of that keyword is |
|
60 ** returned. If the input is not a keyword, TK_ID is returned. |
|
61 ** |
|
62 ** The implementation of this routine was generated by a program, |
|
63 ** mkkeywordhash.h, located in the tool subdirectory of the distribution. |
|
64 ** The output of the mkkeywordhash.c program is written into a file |
|
65 ** named keywordhash.h and then included into this source file by |
|
66 ** the #include below. |
|
67 */ |
|
68 #include "keywordhash.h" |
|
69 |
|
70 |
|
71 |
|
72 |
|
73 /* |
|
74 ** Return the length of the token that begins at z[0]. |
|
75 ** Store the token type in *tokenType before returning. |
|
76 */ |
|
77 static int getToken(const unsigned char *z, int *tokenType){ |
|
78 int i, c; |
|
79 switch( *z ){ |
|
80 case ' ': case '\t': case '\n': case '\f': case '\r': { |
|
81 for(i=1; isspace(z[i]); i++){} |
|
82 *tokenType = TK_SPACE; |
|
83 return i; |
|
84 } |
|
85 case '-': { |
|
86 if( z[1]=='-' ){ |
|
87 for(i=2; (c=z[i])!=0 && c!='\n'; i++){} |
|
88 *tokenType = TK_COMMENT; |
|
89 return i; |
|
90 } |
|
91 *tokenType = TK_MINUS; |
|
92 return 1; |
|
93 } |
|
94 case '(': { |
|
95 *tokenType = TK_LP; |
|
96 return 1; |
|
97 } |
|
98 case ')': { |
|
99 *tokenType = TK_RP; |
|
100 return 1; |
|
101 } |
|
102 case ';': { |
|
103 *tokenType = TK_SEMI; |
|
104 return 1; |
|
105 } |
|
106 case '+': { |
|
107 *tokenType = TK_PLUS; |
|
108 return 1; |
|
109 } |
|
110 case '*': { |
|
111 *tokenType = TK_STAR; |
|
112 return 1; |
|
113 } |
|
114 case '/': { |
|
115 if( z[1]!='*' || z[2]==0 ){ |
|
116 *tokenType = TK_SLASH; |
|
117 return 1; |
|
118 } |
|
119 for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} |
|
120 if( c ) i++; |
|
121 *tokenType = TK_COMMENT; |
|
122 return i; |
|
123 } |
|
124 case '%': { |
|
125 *tokenType = TK_REM; |
|
126 return 1; |
|
127 } |
|
128 case '=': { |
|
129 *tokenType = TK_EQ; |
|
130 return 1 + (z[1]=='='); |
|
131 } |
|
132 case '<': { |
|
133 if( (c=z[1])=='=' ){ |
|
134 *tokenType = TK_LE; |
|
135 return 2; |
|
136 }else if( c=='>' ){ |
|
137 *tokenType = TK_NE; |
|
138 return 2; |
|
139 }else if( c=='<' ){ |
|
140 *tokenType = TK_LSHIFT; |
|
141 return 2; |
|
142 }else{ |
|
143 *tokenType = TK_LT; |
|
144 return 1; |
|
145 } |
|
146 } |
|
147 case '>': { |
|
148 if( (c=z[1])=='=' ){ |
|
149 *tokenType = TK_GE; |
|
150 return 2; |
|
151 }else if( c=='>' ){ |
|
152 *tokenType = TK_RSHIFT; |
|
153 return 2; |
|
154 }else{ |
|
155 *tokenType = TK_GT; |
|
156 return 1; |
|
157 } |
|
158 } |
|
159 case '!': { |
|
160 if( z[1]!='=' ){ |
|
161 *tokenType = TK_ILLEGAL; |
|
162 return 2; |
|
163 }else{ |
|
164 *tokenType = TK_NE; |
|
165 return 2; |
|
166 } |
|
167 } |
|
168 case '|': { |
|
169 if( z[1]!='|' ){ |
|
170 *tokenType = TK_BITOR; |
|
171 return 1; |
|
172 }else{ |
|
173 *tokenType = TK_CONCAT; |
|
174 return 2; |
|
175 } |
|
176 } |
|
177 case ',': { |
|
178 *tokenType = TK_COMMA; |
|
179 return 1; |
|
180 } |
|
181 case '&': { |
|
182 *tokenType = TK_BITAND; |
|
183 return 1; |
|
184 } |
|
185 case '~': { |
|
186 *tokenType = TK_BITNOT; |
|
187 return 1; |
|
188 } |
|
189 case '`': |
|
190 case '\'': |
|
191 case '"': { |
|
192 int delim = z[0]; |
|
193 for(i=1; (c=z[i])!=0; i++){ |
|
194 if( c==delim ){ |
|
195 if( z[i+1]==delim ){ |
|
196 i++; |
|
197 }else{ |
|
198 break; |
|
199 } |
|
200 } |
|
201 } |
|
202 if( c ){ |
|
203 *tokenType = TK_STRING; |
|
204 return i+1; |
|
205 }else{ |
|
206 *tokenType = TK_ILLEGAL; |
|
207 return i; |
|
208 } |
|
209 } |
|
210 case '.': { |
|
211 #ifndef SQLITE_OMIT_FLOATING_POINT |
|
212 if( !isdigit(z[1]) ) |
|
213 #endif |
|
214 { |
|
215 *tokenType = TK_DOT; |
|
216 return 1; |
|
217 } |
|
218 /* If the next character is a digit, this is a floating point |
|
219 ** number that begins with ".". Fall thru into the next case */ |
|
220 } |
|
221 case '0': case '1': case '2': case '3': case '4': |
|
222 case '5': case '6': case '7': case '8': case '9': { |
|
223 *tokenType = TK_INTEGER; |
|
224 for(i=0; isdigit(z[i]); i++){} |
|
225 #ifndef SQLITE_OMIT_FLOATING_POINT |
|
226 if( z[i]=='.' ){ |
|
227 i++; |
|
228 while( isdigit(z[i]) ){ i++; } |
|
229 *tokenType = TK_FLOAT; |
|
230 } |
|
231 if( (z[i]=='e' || z[i]=='E') && |
|
232 ( isdigit(z[i+1]) |
|
233 || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2])) |
|
234 ) |
|
235 ){ |
|
236 i += 2; |
|
237 while( isdigit(z[i]) ){ i++; } |
|
238 *tokenType = TK_FLOAT; |
|
239 } |
|
240 #endif |
|
241 while( IdChar(z[i]) ){ |
|
242 *tokenType = TK_ILLEGAL; |
|
243 i++; |
|
244 } |
|
245 return i; |
|
246 } |
|
247 case '[': { |
|
248 for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} |
|
249 *tokenType = TK_ID; |
|
250 return i; |
|
251 } |
|
252 case '?': { |
|
253 *tokenType = TK_VARIABLE; |
|
254 for(i=1; isdigit(z[i]); i++){} |
|
255 return i; |
|
256 } |
|
257 case '#': { |
|
258 for(i=1; isdigit(z[i]); i++){} |
|
259 if( i>1 ){ |
|
260 /* Parameters of the form #NNN (where NNN is a number) are used |
|
261 ** internally by sqlite3NestedParse. */ |
|
262 *tokenType = TK_REGISTER; |
|
263 return i; |
|
264 } |
|
265 /* Fall through into the next case if the '#' is not followed by |
|
266 ** a digit. Try to match #AAAA where AAAA is a parameter name. */ |
|
267 } |
|
268 #ifndef SQLITE_OMIT_TCL_VARIABLE |
|
269 case '$': |
|
270 #endif |
|
271 case '@': /* For compatibility with MS SQL Server */ |
|
272 case ':': { |
|
273 int n = 0; |
|
274 *tokenType = TK_VARIABLE; |
|
275 for(i=1; (c=z[i])!=0; i++){ |
|
276 if( IdChar(c) ){ |
|
277 n++; |
|
278 #ifndef SQLITE_OMIT_TCL_VARIABLE |
|
279 }else if( c=='(' && n>0 ){ |
|
280 do{ |
|
281 i++; |
|
282 }while( (c=z[i])!=0 && !isspace(c) && c!=')' ); |
|
283 if( c==')' ){ |
|
284 i++; |
|
285 }else{ |
|
286 *tokenType = TK_ILLEGAL; |
|
287 } |
|
288 break; |
|
289 }else if( c==':' && z[i+1]==':' ){ |
|
290 i++; |
|
291 #endif |
|
292 }else{ |
|
293 break; |
|
294 } |
|
295 } |
|
296 if( n==0 ) *tokenType = TK_ILLEGAL; |
|
297 return i; |
|
298 } |
|
299 #ifndef SQLITE_OMIT_BLOB_LITERAL |
|
300 case 'x': case 'X': { |
|
301 if( (c=z[1])=='\'' || c=='"' ){ |
|
302 int delim = c; |
|
303 *tokenType = TK_BLOB; |
|
304 for(i=2; (c=z[i])!=0; i++){ |
|
305 if( c==delim ){ |
|
306 if( i%2 ) *tokenType = TK_ILLEGAL; |
|
307 break; |
|
308 } |
|
309 if( !isxdigit(c) ){ |
|
310 *tokenType = TK_ILLEGAL; |
|
311 return i; |
|
312 } |
|
313 } |
|
314 if( c ) i++; |
|
315 return i; |
|
316 } |
|
317 /* Otherwise fall through to the next case */ |
|
318 } |
|
319 #endif |
|
320 default: { |
|
321 if( !IdChar(*z) ){ |
|
322 break; |
|
323 } |
|
324 for(i=1; IdChar(z[i]); i++){} |
|
325 *tokenType = keywordCode((char*)z, i); |
|
326 return i; |
|
327 } |
|
328 } |
|
329 *tokenType = TK_ILLEGAL; |
|
330 return 1; |
|
331 } |
|
332 int sqlite3GetToken(const unsigned char *z, int *tokenType){ |
|
333 return getToken(z, tokenType); |
|
334 } |
|
335 |
|
336 /* |
|
337 ** Run the parser on the given SQL string. The parser structure is |
|
338 ** passed in. An SQLITE_ status code is returned. If an error occurs |
|
339 ** and pzErrMsg!=NULL then an error message might be written into |
|
340 ** memory obtained from sqlite3_malloc() and *pzErrMsg made to point to that |
|
341 ** error message. Or maybe not. |
|
342 */ |
|
343 int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ |
|
344 int nErr = 0; |
|
345 int i; |
|
346 void *pEngine; |
|
347 int tokenType; |
|
348 int lastTokenParsed = -1; |
|
349 sqlite3 *db = pParse->db; |
|
350 |
|
351 if( db->activeVdbeCnt==0 ){ |
|
352 db->u1.isInterrupted = 0; |
|
353 } |
|
354 pParse->rc = SQLITE_OK; |
|
355 i = 0; |
|
356 pEngine = sqlite3ParserAlloc((void*(*)(size_t))sqlite3_malloc); |
|
357 if( pEngine==0 ){ |
|
358 db->mallocFailed = 1; |
|
359 return SQLITE_NOMEM; |
|
360 } |
|
361 assert( pParse->sLastToken.dyn==0 ); |
|
362 assert( pParse->pNewTable==0 ); |
|
363 assert( pParse->pNewTrigger==0 ); |
|
364 assert( pParse->nVar==0 ); |
|
365 assert( pParse->nVarExpr==0 ); |
|
366 assert( pParse->nVarExprAlloc==0 ); |
|
367 assert( pParse->apVarExpr==0 ); |
|
368 pParse->zTail = pParse->zSql = zSql; |
|
369 while( !db->mallocFailed && zSql[i]!=0 ){ |
|
370 assert( i>=0 ); |
|
371 pParse->sLastToken.z = (u8*)&zSql[i]; |
|
372 assert( pParse->sLastToken.dyn==0 ); |
|
373 pParse->sLastToken.n = getToken((unsigned char*)&zSql[i],&tokenType); |
|
374 i += pParse->sLastToken.n; |
|
375 if( i>SQLITE_MAX_SQL_LENGTH ){ |
|
376 pParse->rc = SQLITE_TOOBIG; |
|
377 break; |
|
378 } |
|
379 switch( tokenType ){ |
|
380 case TK_SPACE: |
|
381 case TK_COMMENT: { |
|
382 if( db->u1.isInterrupted ){ |
|
383 pParse->rc = SQLITE_INTERRUPT; |
|
384 sqlite3SetString(pzErrMsg, "interrupt", (char*)0); |
|
385 goto abort_parse; |
|
386 } |
|
387 break; |
|
388 } |
|
389 case TK_ILLEGAL: { |
|
390 if( pzErrMsg ){ |
|
391 sqlite3_free(*pzErrMsg); |
|
392 *pzErrMsg = sqlite3MPrintf(db, "unrecognized token: \"%T\"", |
|
393 &pParse->sLastToken); |
|
394 } |
|
395 nErr++; |
|
396 goto abort_parse; |
|
397 } |
|
398 case TK_SEMI: { |
|
399 pParse->zTail = &zSql[i]; |
|
400 /* Fall thru into the default case */ |
|
401 } |
|
402 default: { |
|
403 sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse); |
|
404 lastTokenParsed = tokenType; |
|
405 if( pParse->rc!=SQLITE_OK ){ |
|
406 goto abort_parse; |
|
407 } |
|
408 break; |
|
409 } |
|
410 } |
|
411 } |
|
412 abort_parse: |
|
413 if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){ |
|
414 if( lastTokenParsed!=TK_SEMI ){ |
|
415 sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); |
|
416 pParse->zTail = &zSql[i]; |
|
417 } |
|
418 sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse); |
|
419 } |
|
420 sqlite3ParserFree(pEngine, sqlite3_free); |
|
421 if( db->mallocFailed ){ |
|
422 pParse->rc = SQLITE_NOMEM; |
|
423 } |
|
424 if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ |
|
425 sqlite3SetString(&pParse->zErrMsg, sqlite3ErrStr(pParse->rc), (char*)0); |
|
426 } |
|
427 if( pParse->zErrMsg ){ |
|
428 if( pzErrMsg && *pzErrMsg==0 ){ |
|
429 *pzErrMsg = pParse->zErrMsg; |
|
430 }else{ |
|
431 sqlite3_free(pParse->zErrMsg); |
|
432 } |
|
433 pParse->zErrMsg = 0; |
|
434 if( !nErr ) nErr++; |
|
435 } |
|
436 if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){ |
|
437 sqlite3VdbeDelete(pParse->pVdbe); |
|
438 pParse->pVdbe = 0; |
|
439 } |
|
440 #ifndef SQLITE_OMIT_SHARED_CACHE |
|
441 if( pParse->nested==0 ){ |
|
442 sqlite3_free(pParse->aTableLock); |
|
443 pParse->aTableLock = 0; |
|
444 pParse->nTableLock = 0; |
|
445 } |
|
446 #endif |
|
447 |
|
448 if( !IN_DECLARE_VTAB ){ |
|
449 /* If the pParse->declareVtab flag is set, do not delete any table |
|
450 ** structure built up in pParse->pNewTable. The calling code (see vtab.c) |
|
451 ** will take responsibility for freeing the Table structure. |
|
452 */ |
|
453 sqlite3DeleteTable(pParse->pNewTable); |
|
454 } |
|
455 |
|
456 sqlite3DeleteTrigger(pParse->pNewTrigger); |
|
457 sqlite3_free(pParse->apVarExpr); |
|
458 if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){ |
|
459 pParse->rc = SQLITE_ERROR; |
|
460 } |
|
461 return nErr; |
|
462 } |