|
1 /* |
|
2 ** 2001 September 15 |
|
3 ** |
|
4 ** The author disclaims copyright to this source code. In place of |
|
5 ** a legal notice, here is a blessing: |
|
6 ** |
|
7 ** May you do good and not evil. |
|
8 ** May you find forgiveness for yourself and forgive others. |
|
9 ** May you share freely, never taking more than you give. |
|
10 ** |
|
11 ************************************************************************* |
|
12 ** An tokenizer for SQL |
|
13 ** |
|
14 ** This file contains C code that splits an SQL input string up into |
|
15 ** individual tokens and sends those tokens one-by-one over to the |
|
16 ** parser for analysis. |
|
17 ** |
|
18 ** $Id: tokenize.c,v 1.124 2006/08/12 12:33:14 drh Exp $ |
|
19 */ |
|
20 #include "sqliteInt.h" |
|
21 #include "os.h" |
|
22 #include <ctype.h> |
|
23 #include <stdlib.h> |
|
24 |
|
25 /* |
|
26 ** The charMap() macro maps alphabetic characters into their |
|
27 ** lower-case ASCII equivalent. On ASCII machines, this is just |
|
28 ** an upper-to-lower case map. On EBCDIC machines we also need |
|
29 ** to adjust the encoding. Only alphabetic characters and underscores |
|
30 ** need to be translated. |
|
31 */ |
|
32 #ifdef SQLITE_ASCII |
|
33 # define charMap(X) sqlite3UpperToLower[(unsigned char)X] |
|
34 #endif |
|
35 #ifdef SQLITE_EBCDIC |
|
36 # define charMap(X) ebcdicToAscii[(unsigned char)X] |
|
37 const unsigned char ebcdicToAscii[] = { |
|
38 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ |
|
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ |
|
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ |
|
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ |
|
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ |
|
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ |
|
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ |
|
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */ |
|
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ |
|
47 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */ |
|
48 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */ |
|
49 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */ |
|
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ |
|
51 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */ |
|
52 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */ |
|
53 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */ |
|
54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */ |
|
55 }; |
|
56 #endif |
|
57 |
|
58 /* |
|
59 ** The sqlite3KeywordCode function looks up an identifier to determine if |
|
60 ** it is a keyword. If it is a keyword, the token code of that keyword is |
|
61 ** returned. If the input is not a keyword, TK_ID is returned. |
|
62 ** |
|
63 ** The implementation of this routine was generated by a program, |
|
64 ** mkkeywordhash.h, located in the tool subdirectory of the distribution. |
|
65 ** The output of the mkkeywordhash.c program is written into a file |
|
66 ** named keywordhash.h and then included into this source file by |
|
67 ** the #include below. |
|
68 */ |
|
69 #include "keywordhash.h" |
|
70 |
|
71 |
|
72 /* |
|
73 ** If X is a character that can be used in an identifier then |
|
74 ** IdChar(X) will be true. Otherwise it is false. |
|
75 ** |
|
76 ** For ASCII, any character with the high-order bit set is |
|
77 ** allowed in an identifier. For 7-bit characters, |
|
78 ** sqlite3IsIdChar[X] must be 1. |
|
79 ** |
|
80 ** For EBCDIC, the rules are more complex but have the same |
|
81 ** end result. |
|
82 ** |
|
83 ** Ticket #1066. the SQL standard does not allow '$' in the |
|
84 ** middle of identfiers. But many SQL implementations do. |
|
85 ** SQLite will allow '$' in identifiers for compatibility. |
|
86 ** But the feature is undocumented. |
|
87 */ |
|
88 #ifdef SQLITE_ASCII |
|
89 const char sqlite3IsIdChar[] = { |
|
90 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ |
|
91 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ |
|
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ |
|
93 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ |
|
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ |
|
95 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ |
|
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ |
|
97 }; |
|
98 #define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsIdChar[c-0x20])) |
|
99 #endif |
|
100 #ifdef SQLITE_EBCDIC |
|
101 const char sqlite3IsIdChar[] = { |
|
102 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ |
|
103 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */ |
|
104 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */ |
|
105 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */ |
|
106 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */ |
|
107 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */ |
|
108 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */ |
|
109 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */ |
|
110 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ |
|
111 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */ |
|
112 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */ |
|
113 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */ |
|
114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */ |
|
115 }; |
|
116 #define IdChar(C) (((c=C)>=0x42 && sqlite3IsIdChar[c-0x40])) |
|
117 #endif |
|
118 |
|
119 |
|
120 /* |
|
121 ** Return the length of the token that begins at z[0]. |
|
122 ** Store the token type in *tokenType before returning. |
|
123 */ |
|
124 static int getToken(const unsigned char *z, int *tokenType){ |
|
125 int i, c; |
|
126 switch( *z ){ |
|
127 case ' ': case '\t': case '\n': case '\f': case '\r': { |
|
128 for(i=1; isspace(z[i]); i++){} |
|
129 *tokenType = TK_SPACE; |
|
130 return i; |
|
131 } |
|
132 case '-': { |
|
133 if( z[1]=='-' ){ |
|
134 for(i=2; (c=z[i])!=0 && c!='\n'; i++){} |
|
135 *tokenType = TK_COMMENT; |
|
136 return i; |
|
137 } |
|
138 *tokenType = TK_MINUS; |
|
139 return 1; |
|
140 } |
|
141 case '(': { |
|
142 *tokenType = TK_LP; |
|
143 return 1; |
|
144 } |
|
145 case ')': { |
|
146 *tokenType = TK_RP; |
|
147 return 1; |
|
148 } |
|
149 case ';': { |
|
150 *tokenType = TK_SEMI; |
|
151 return 1; |
|
152 } |
|
153 case '+': { |
|
154 *tokenType = TK_PLUS; |
|
155 return 1; |
|
156 } |
|
157 case '*': { |
|
158 *tokenType = TK_STAR; |
|
159 return 1; |
|
160 } |
|
161 case '/': { |
|
162 if( z[1]!='*' || z[2]==0 ){ |
|
163 *tokenType = TK_SLASH; |
|
164 return 1; |
|
165 } |
|
166 for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} |
|
167 if( c ) i++; |
|
168 *tokenType = TK_COMMENT; |
|
169 return i; |
|
170 } |
|
171 case '%': { |
|
172 *tokenType = TK_REM; |
|
173 return 1; |
|
174 } |
|
175 case '=': { |
|
176 *tokenType = TK_EQ; |
|
177 return 1 + (z[1]=='='); |
|
178 } |
|
179 case '<': { |
|
180 if( (c=z[1])=='=' ){ |
|
181 *tokenType = TK_LE; |
|
182 return 2; |
|
183 }else if( c=='>' ){ |
|
184 *tokenType = TK_NE; |
|
185 return 2; |
|
186 }else if( c=='<' ){ |
|
187 *tokenType = TK_LSHIFT; |
|
188 return 2; |
|
189 }else{ |
|
190 *tokenType = TK_LT; |
|
191 return 1; |
|
192 } |
|
193 } |
|
194 case '>': { |
|
195 if( (c=z[1])=='=' ){ |
|
196 *tokenType = TK_GE; |
|
197 return 2; |
|
198 }else if( c=='>' ){ |
|
199 *tokenType = TK_RSHIFT; |
|
200 return 2; |
|
201 }else{ |
|
202 *tokenType = TK_GT; |
|
203 return 1; |
|
204 } |
|
205 } |
|
206 case '!': { |
|
207 if( z[1]!='=' ){ |
|
208 *tokenType = TK_ILLEGAL; |
|
209 return 2; |
|
210 }else{ |
|
211 *tokenType = TK_NE; |
|
212 return 2; |
|
213 } |
|
214 } |
|
215 case '|': { |
|
216 if( z[1]!='|' ){ |
|
217 *tokenType = TK_BITOR; |
|
218 return 1; |
|
219 }else{ |
|
220 *tokenType = TK_CONCAT; |
|
221 return 2; |
|
222 } |
|
223 } |
|
224 case ',': { |
|
225 *tokenType = TK_COMMA; |
|
226 return 1; |
|
227 } |
|
228 case '&': { |
|
229 *tokenType = TK_BITAND; |
|
230 return 1; |
|
231 } |
|
232 case '~': { |
|
233 *tokenType = TK_BITNOT; |
|
234 return 1; |
|
235 } |
|
236 case '`': |
|
237 case '\'': |
|
238 case '"': { |
|
239 int delim = z[0]; |
|
240 for(i=1; (c=z[i])!=0; i++){ |
|
241 if( c==delim ){ |
|
242 if( z[i+1]==delim ){ |
|
243 i++; |
|
244 }else{ |
|
245 break; |
|
246 } |
|
247 } |
|
248 } |
|
249 if( c ){ |
|
250 *tokenType = TK_STRING; |
|
251 return i+1; |
|
252 }else{ |
|
253 *tokenType = TK_ILLEGAL; |
|
254 return i; |
|
255 } |
|
256 } |
|
257 case '.': { |
|
258 #ifndef SQLITE_OMIT_FLOATING_POINT |
|
259 if( !isdigit(z[1]) ) |
|
260 #endif |
|
261 { |
|
262 *tokenType = TK_DOT; |
|
263 return 1; |
|
264 } |
|
265 /* If the next character is a digit, this is a floating point |
|
266 ** number that begins with ".". Fall thru into the next case */ |
|
267 } |
|
268 case '0': case '1': case '2': case '3': case '4': |
|
269 case '5': case '6': case '7': case '8': case '9': { |
|
270 *tokenType = TK_INTEGER; |
|
271 for(i=0; isdigit(z[i]); i++){} |
|
272 #ifndef SQLITE_OMIT_FLOATING_POINT |
|
273 if( z[i]=='.' ){ |
|
274 i++; |
|
275 while( isdigit(z[i]) ){ i++; } |
|
276 *tokenType = TK_FLOAT; |
|
277 } |
|
278 if( (z[i]=='e' || z[i]=='E') && |
|
279 ( isdigit(z[i+1]) |
|
280 || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2])) |
|
281 ) |
|
282 ){ |
|
283 i += 2; |
|
284 while( isdigit(z[i]) ){ i++; } |
|
285 *tokenType = TK_FLOAT; |
|
286 } |
|
287 #endif |
|
288 while( IdChar(z[i]) ){ |
|
289 *tokenType = TK_ILLEGAL; |
|
290 i++; |
|
291 } |
|
292 return i; |
|
293 } |
|
294 case '[': { |
|
295 for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} |
|
296 *tokenType = TK_ID; |
|
297 return i; |
|
298 } |
|
299 case '?': { |
|
300 *tokenType = TK_VARIABLE; |
|
301 for(i=1; isdigit(z[i]); i++){} |
|
302 return i; |
|
303 } |
|
304 case '#': { |
|
305 for(i=1; isdigit(z[i]); i++){} |
|
306 if( i>1 ){ |
|
307 /* Parameters of the form #NNN (where NNN is a number) are used |
|
308 ** internally by sqlite3NestedParse. */ |
|
309 *tokenType = TK_REGISTER; |
|
310 return i; |
|
311 } |
|
312 /* Fall through into the next case if the '#' is not followed by |
|
313 ** a digit. Try to match #AAAA where AAAA is a parameter name. */ |
|
314 } |
|
315 #ifndef SQLITE_OMIT_TCL_VARIABLE |
|
316 case '$': |
|
317 #endif |
|
318 case '@': /* For compatibility with MS SQL Server */ |
|
319 case ':': { |
|
320 int n = 0; |
|
321 *tokenType = TK_VARIABLE; |
|
322 for(i=1; (c=z[i])!=0; i++){ |
|
323 if( IdChar(c) ){ |
|
324 n++; |
|
325 #ifndef SQLITE_OMIT_TCL_VARIABLE |
|
326 }else if( c=='(' && n>0 ){ |
|
327 do{ |
|
328 i++; |
|
329 }while( (c=z[i])!=0 && !isspace(c) && c!=')' ); |
|
330 if( c==')' ){ |
|
331 i++; |
|
332 }else{ |
|
333 *tokenType = TK_ILLEGAL; |
|
334 } |
|
335 break; |
|
336 }else if( c==':' && z[i+1]==':' ){ |
|
337 i++; |
|
338 #endif |
|
339 }else{ |
|
340 break; |
|
341 } |
|
342 } |
|
343 if( n==0 ) *tokenType = TK_ILLEGAL; |
|
344 return i; |
|
345 } |
|
346 #ifndef SQLITE_OMIT_BLOB_LITERAL |
|
347 case 'x': case 'X': { |
|
348 if( (c=z[1])=='\'' || c=='"' ){ |
|
349 int delim = c; |
|
350 *tokenType = TK_BLOB; |
|
351 for(i=2; (c=z[i])!=0; i++){ |
|
352 if( c==delim ){ |
|
353 if( i%2 ) *tokenType = TK_ILLEGAL; |
|
354 break; |
|
355 } |
|
356 if( !isxdigit(c) ){ |
|
357 *tokenType = TK_ILLEGAL; |
|
358 return i; |
|
359 } |
|
360 } |
|
361 if( c ) i++; |
|
362 return i; |
|
363 } |
|
364 /* Otherwise fall through to the next case */ |
|
365 } |
|
366 #endif |
|
367 default: { |
|
368 if( !IdChar(*z) ){ |
|
369 break; |
|
370 } |
|
371 for(i=1; IdChar(z[i]); i++){} |
|
372 *tokenType = keywordCode((char*)z, i); |
|
373 return i; |
|
374 } |
|
375 } |
|
376 *tokenType = TK_ILLEGAL; |
|
377 return 1; |
|
378 } |
|
379 int sqlite3GetToken(const unsigned char *z, int *tokenType){ |
|
380 return getToken(z, tokenType); |
|
381 } |
|
382 |
|
383 /* |
|
384 ** Run the parser on the given SQL string. The parser structure is |
|
385 ** passed in. An SQLITE_ status code is returned. If an error occurs |
|
386 ** and pzErrMsg!=NULL then an error message might be written into |
|
387 ** memory obtained from malloc() and *pzErrMsg made to point to that |
|
388 ** error message. Or maybe not. |
|
389 */ |
|
390 int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ |
|
391 int nErr = 0; |
|
392 int i; |
|
393 void *pEngine; |
|
394 int tokenType; |
|
395 int lastTokenParsed = -1; |
|
396 sqlite3 *db = pParse->db; |
|
397 extern void *sqlite3ParserAlloc(void*(*)(int)); |
|
398 extern void sqlite3ParserFree(void*, void(*)(void*)); |
|
399 extern int sqlite3Parser(void*, int, Token, Parse*); |
|
400 |
|
401 if( db->activeVdbeCnt==0 ){ |
|
402 db->u1.isInterrupted = 0; |
|
403 } |
|
404 pParse->rc = SQLITE_OK; |
|
405 i = 0; |
|
406 pEngine = sqlite3ParserAlloc((void*(*)(int))sqlite3MallocX); |
|
407 if( pEngine==0 ){ |
|
408 return SQLITE_NOMEM; |
|
409 } |
|
410 assert( pParse->sLastToken.dyn==0 ); |
|
411 assert( pParse->pNewTable==0 ); |
|
412 assert( pParse->pNewTrigger==0 ); |
|
413 assert( pParse->nVar==0 ); |
|
414 assert( pParse->nVarExpr==0 ); |
|
415 assert( pParse->nVarExprAlloc==0 ); |
|
416 assert( pParse->apVarExpr==0 ); |
|
417 pParse->zTail = pParse->zSql = zSql; |
|
418 while( !sqlite3MallocFailed() && zSql[i]!=0 ){ |
|
419 assert( i>=0 ); |
|
420 pParse->sLastToken.z = (u8*)&zSql[i]; |
|
421 assert( pParse->sLastToken.dyn==0 ); |
|
422 pParse->sLastToken.n = getToken((unsigned char*)&zSql[i],&tokenType); |
|
423 i += pParse->sLastToken.n; |
|
424 switch( tokenType ){ |
|
425 case TK_SPACE: |
|
426 case TK_COMMENT: { |
|
427 if( db->u1.isInterrupted ){ |
|
428 pParse->rc = SQLITE_INTERRUPT; |
|
429 sqlite3SetString(pzErrMsg, "interrupt", (char*)0); |
|
430 goto abort_parse; |
|
431 } |
|
432 break; |
|
433 } |
|
434 case TK_ILLEGAL: { |
|
435 if( pzErrMsg ){ |
|
436 sqliteFree(*pzErrMsg); |
|
437 *pzErrMsg = sqlite3MPrintf("unrecognized token: \"%T\"", |
|
438 &pParse->sLastToken); |
|
439 } |
|
440 nErr++; |
|
441 goto abort_parse; |
|
442 } |
|
443 case TK_SEMI: { |
|
444 pParse->zTail = &zSql[i]; |
|
445 /* Fall thru into the default case */ |
|
446 } |
|
447 default: { |
|
448 sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse); |
|
449 lastTokenParsed = tokenType; |
|
450 if( pParse->rc!=SQLITE_OK ){ |
|
451 goto abort_parse; |
|
452 } |
|
453 break; |
|
454 } |
|
455 } |
|
456 } |
|
457 abort_parse: |
|
458 if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){ |
|
459 if( lastTokenParsed!=TK_SEMI ){ |
|
460 sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); |
|
461 pParse->zTail = &zSql[i]; |
|
462 } |
|
463 sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse); |
|
464 } |
|
465 sqlite3ParserFree(pEngine, sqlite3FreeX); |
|
466 if( sqlite3MallocFailed() ){ |
|
467 pParse->rc = SQLITE_NOMEM; |
|
468 } |
|
469 if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ |
|
470 sqlite3SetString(&pParse->zErrMsg, sqlite3ErrStr(pParse->rc), (char*)0); |
|
471 } |
|
472 if( pParse->zErrMsg ){ |
|
473 if( pzErrMsg && *pzErrMsg==0 ){ |
|
474 *pzErrMsg = pParse->zErrMsg; |
|
475 }else{ |
|
476 sqliteFree(pParse->zErrMsg); |
|
477 } |
|
478 pParse->zErrMsg = 0; |
|
479 if( !nErr ) nErr++; |
|
480 } |
|
481 if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){ |
|
482 sqlite3VdbeDelete(pParse->pVdbe); |
|
483 pParse->pVdbe = 0; |
|
484 } |
|
485 #ifndef SQLITE_OMIT_SHARED_CACHE |
|
486 if( pParse->nested==0 ){ |
|
487 sqliteFree(pParse->aTableLock); |
|
488 pParse->aTableLock = 0; |
|
489 pParse->nTableLock = 0; |
|
490 } |
|
491 #endif |
|
492 |
|
493 if( !IN_DECLARE_VTAB ){ |
|
494 /* If the pParse->declareVtab flag is set, do not delete any table |
|
495 ** structure built up in pParse->pNewTable. The calling code (see vtab.c) |
|
496 ** will take responsibility for freeing the Table structure. |
|
497 */ |
|
498 sqlite3DeleteTable(pParse->db, pParse->pNewTable); |
|
499 } |
|
500 |
|
501 sqlite3DeleteTrigger(pParse->pNewTrigger); |
|
502 sqliteFree(pParse->apVarExpr); |
|
503 if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){ |
|
504 pParse->rc = SQLITE_ERROR; |
|
505 } |
|
506 return nErr; |
|
507 } |