webengine/webkitutils/SqliteSymbian/tokenize.c
changeset 0 dd21522fd290
equal deleted inserted replaced
-1:000000000000 0:dd21522fd290
       
     1 /*
       
     2 ** 2001 September 15
       
     3 **
       
     4 ** The author disclaims copyright to this source code.  In place of
       
     5 ** a legal notice, here is a blessing:
       
     6 **
       
     7 **    May you do good and not evil.
       
     8 **    May you find forgiveness for yourself and forgive others.
       
     9 **    May you share freely, never taking more than you give.
       
    10 **
       
    11 *************************************************************************
       
    12 ** An tokenizer for SQL
       
    13 **
       
    14 ** This file contains C code that splits an SQL input string up into
       
    15 ** individual tokens and sends those tokens one-by-one over to the
       
    16 ** parser for analysis.
       
    17 **
       
    18 ** $Id: tokenize.c,v 1.124 2006/08/12 12:33:14 drh Exp $
       
    19 */
       
    20 #include "sqliteInt.h"
       
    21 #include "os.h"
       
    22 #include <ctype.h>
       
    23 #include <stdlib.h>
       
    24 
       
    25 /*
       
    26 ** The charMap() macro maps alphabetic characters into their
       
    27 ** lower-case ASCII equivalent.  On ASCII machines, this is just
       
    28 ** an upper-to-lower case map.  On EBCDIC machines we also need
       
    29 ** to adjust the encoding.  Only alphabetic characters and underscores
       
    30 ** need to be translated.
       
    31 */
       
    32 #ifdef SQLITE_ASCII
       
    33 # define charMap(X) sqlite3UpperToLower[(unsigned char)X]
       
    34 #endif
       
    35 #ifdef SQLITE_EBCDIC
       
    36 # define charMap(X) ebcdicToAscii[(unsigned char)X]
       
    37 const unsigned char ebcdicToAscii[] = {
       
    38 /* 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F */
       
    39    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 0x */
       
    40    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 1x */
       
    41    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 2x */
       
    42    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 3x */
       
    43    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 4x */
       
    44    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 5x */
       
    45    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 95,  0,  0,  /* 6x */
       
    46    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 7x */
       
    47    0, 97, 98, 99,100,101,102,103,104,105,  0,  0,  0,  0,  0,  0,  /* 8x */
       
    48    0,106,107,108,109,110,111,112,113,114,  0,  0,  0,  0,  0,  0,  /* 9x */
       
    49    0,  0,115,116,117,118,119,120,121,122,  0,  0,  0,  0,  0,  0,  /* Ax */
       
    50    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* Bx */
       
    51    0, 97, 98, 99,100,101,102,103,104,105,  0,  0,  0,  0,  0,  0,  /* Cx */
       
    52    0,106,107,108,109,110,111,112,113,114,  0,  0,  0,  0,  0,  0,  /* Dx */
       
    53    0,  0,115,116,117,118,119,120,121,122,  0,  0,  0,  0,  0,  0,  /* Ex */
       
    54    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* Fx */
       
    55 };
       
    56 #endif
       
    57 
       
    58 /*
       
    59 ** The sqlite3KeywordCode function looks up an identifier to determine if
       
    60 ** it is a keyword.  If it is a keyword, the token code of that keyword is 
       
    61 ** returned.  If the input is not a keyword, TK_ID is returned.
       
    62 **
       
    63 ** The implementation of this routine was generated by a program,
       
    64 ** mkkeywordhash.h, located in the tool subdirectory of the distribution.
       
    65 ** The output of the mkkeywordhash.c program is written into a file
       
    66 ** named keywordhash.h and then included into this source file by
       
    67 ** the #include below.
       
    68 */
       
    69 #include "keywordhash.h"
       
    70 
       
    71 
       
    72 /*
       
    73 ** If X is a character that can be used in an identifier then
       
    74 ** IdChar(X) will be true.  Otherwise it is false.
       
    75 **
       
    76 ** For ASCII, any character with the high-order bit set is
       
    77 ** allowed in an identifier.  For 7-bit characters, 
       
    78 ** sqlite3IsIdChar[X] must be 1.
       
    79 **
       
    80 ** For EBCDIC, the rules are more complex but have the same
       
    81 ** end result.
       
    82 **
       
    83 ** Ticket #1066.  the SQL standard does not allow '$' in the
       
    84 ** middle of identfiers.  But many SQL implementations do. 
       
    85 ** SQLite will allow '$' in identifiers for compatibility.
       
    86 ** But the feature is undocumented.
       
    87 */
       
    88 #ifdef SQLITE_ASCII
       
    89 const char sqlite3IsIdChar[] = {
       
    90 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
       
    91     0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
       
    92     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
       
    93     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
       
    94     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
       
    95     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
       
    96     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
       
    97 };
       
    98 #define IdChar(C)  (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsIdChar[c-0x20]))
       
    99 #endif
       
   100 #ifdef SQLITE_EBCDIC
       
   101 const char sqlite3IsIdChar[] = {
       
   102 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
       
   103     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 4x */
       
   104     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0,  /* 5x */
       
   105     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0,  /* 6x */
       
   106     0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,  /* 7x */
       
   107     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0,  /* 8x */
       
   108     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,  /* 9x */
       
   109     1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,  /* Ax */
       
   110     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* Bx */
       
   111     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Cx */
       
   112     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Dx */
       
   113     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Ex */
       
   114     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,  /* Fx */
       
   115 };
       
   116 #define IdChar(C)  (((c=C)>=0x42 && sqlite3IsIdChar[c-0x40]))
       
   117 #endif
       
   118 
       
   119 
       
   120 /*
       
   121 ** Return the length of the token that begins at z[0]. 
       
   122 ** Store the token type in *tokenType before returning.
       
   123 */
       
   124 static int getToken(const unsigned char *z, int *tokenType){
       
   125   int i, c;
       
   126   switch( *z ){
       
   127     case ' ': case '\t': case '\n': case '\f': case '\r': {
       
   128       for(i=1; isspace(z[i]); i++){}
       
   129       *tokenType = TK_SPACE;
       
   130       return i;
       
   131     }
       
   132     case '-': {
       
   133       if( z[1]=='-' ){
       
   134         for(i=2; (c=z[i])!=0 && c!='\n'; i++){}
       
   135         *tokenType = TK_COMMENT;
       
   136         return i;
       
   137       }
       
   138       *tokenType = TK_MINUS;
       
   139       return 1;
       
   140     }
       
   141     case '(': {
       
   142       *tokenType = TK_LP;
       
   143       return 1;
       
   144     }
       
   145     case ')': {
       
   146       *tokenType = TK_RP;
       
   147       return 1;
       
   148     }
       
   149     case ';': {
       
   150       *tokenType = TK_SEMI;
       
   151       return 1;
       
   152     }
       
   153     case '+': {
       
   154       *tokenType = TK_PLUS;
       
   155       return 1;
       
   156     }
       
   157     case '*': {
       
   158       *tokenType = TK_STAR;
       
   159       return 1;
       
   160     }
       
   161     case '/': {
       
   162       if( z[1]!='*' || z[2]==0 ){
       
   163         *tokenType = TK_SLASH;
       
   164         return 1;
       
   165       }
       
   166       for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){}
       
   167       if( c ) i++;
       
   168       *tokenType = TK_COMMENT;
       
   169       return i;
       
   170     }
       
   171     case '%': {
       
   172       *tokenType = TK_REM;
       
   173       return 1;
       
   174     }
       
   175     case '=': {
       
   176       *tokenType = TK_EQ;
       
   177       return 1 + (z[1]=='=');
       
   178     }
       
   179     case '<': {
       
   180       if( (c=z[1])=='=' ){
       
   181         *tokenType = TK_LE;
       
   182         return 2;
       
   183       }else if( c=='>' ){
       
   184         *tokenType = TK_NE;
       
   185         return 2;
       
   186       }else if( c=='<' ){
       
   187         *tokenType = TK_LSHIFT;
       
   188         return 2;
       
   189       }else{
       
   190         *tokenType = TK_LT;
       
   191         return 1;
       
   192       }
       
   193     }
       
   194     case '>': {
       
   195       if( (c=z[1])=='=' ){
       
   196         *tokenType = TK_GE;
       
   197         return 2;
       
   198       }else if( c=='>' ){
       
   199         *tokenType = TK_RSHIFT;
       
   200         return 2;
       
   201       }else{
       
   202         *tokenType = TK_GT;
       
   203         return 1;
       
   204       }
       
   205     }
       
   206     case '!': {
       
   207       if( z[1]!='=' ){
       
   208         *tokenType = TK_ILLEGAL;
       
   209         return 2;
       
   210       }else{
       
   211         *tokenType = TK_NE;
       
   212         return 2;
       
   213       }
       
   214     }
       
   215     case '|': {
       
   216       if( z[1]!='|' ){
       
   217         *tokenType = TK_BITOR;
       
   218         return 1;
       
   219       }else{
       
   220         *tokenType = TK_CONCAT;
       
   221         return 2;
       
   222       }
       
   223     }
       
   224     case ',': {
       
   225       *tokenType = TK_COMMA;
       
   226       return 1;
       
   227     }
       
   228     case '&': {
       
   229       *tokenType = TK_BITAND;
       
   230       return 1;
       
   231     }
       
   232     case '~': {
       
   233       *tokenType = TK_BITNOT;
       
   234       return 1;
       
   235     }
       
   236     case '`':
       
   237     case '\'':
       
   238     case '"': {
       
   239       int delim = z[0];
       
   240       for(i=1; (c=z[i])!=0; i++){
       
   241         if( c==delim ){
       
   242           if( z[i+1]==delim ){
       
   243             i++;
       
   244           }else{
       
   245             break;
       
   246           }
       
   247         }
       
   248       }
       
   249       if( c ){
       
   250         *tokenType = TK_STRING;
       
   251         return i+1;
       
   252       }else{
       
   253         *tokenType = TK_ILLEGAL;
       
   254         return i;
       
   255       }
       
   256     }
       
   257     case '.': {
       
   258 #ifndef SQLITE_OMIT_FLOATING_POINT
       
   259       if( !isdigit(z[1]) )
       
   260 #endif
       
   261       {
       
   262         *tokenType = TK_DOT;
       
   263         return 1;
       
   264       }
       
   265       /* If the next character is a digit, this is a floating point
       
   266       ** number that begins with ".".  Fall thru into the next case */
       
   267     }
       
   268     case '0': case '1': case '2': case '3': case '4':
       
   269     case '5': case '6': case '7': case '8': case '9': {
       
   270       *tokenType = TK_INTEGER;
       
   271       for(i=0; isdigit(z[i]); i++){}
       
   272 #ifndef SQLITE_OMIT_FLOATING_POINT
       
   273       if( z[i]=='.' ){
       
   274         i++;
       
   275         while( isdigit(z[i]) ){ i++; }
       
   276         *tokenType = TK_FLOAT;
       
   277       }
       
   278       if( (z[i]=='e' || z[i]=='E') &&
       
   279            ( isdigit(z[i+1]) 
       
   280             || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2]))
       
   281            )
       
   282       ){
       
   283         i += 2;
       
   284         while( isdigit(z[i]) ){ i++; }
       
   285         *tokenType = TK_FLOAT;
       
   286       }
       
   287 #endif
       
   288       while( IdChar(z[i]) ){
       
   289         *tokenType = TK_ILLEGAL;
       
   290         i++;
       
   291       }
       
   292       return i;
       
   293     }
       
   294     case '[': {
       
   295       for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
       
   296       *tokenType = TK_ID;
       
   297       return i;
       
   298     }
       
   299     case '?': {
       
   300       *tokenType = TK_VARIABLE;
       
   301       for(i=1; isdigit(z[i]); i++){}
       
   302       return i;
       
   303     }
       
   304     case '#': {
       
   305       for(i=1; isdigit(z[i]); i++){}
       
   306       if( i>1 ){
       
   307         /* Parameters of the form #NNN (where NNN is a number) are used
       
   308         ** internally by sqlite3NestedParse.  */
       
   309         *tokenType = TK_REGISTER;
       
   310         return i;
       
   311       }
       
   312       /* Fall through into the next case if the '#' is not followed by
       
   313       ** a digit. Try to match #AAAA where AAAA is a parameter name. */
       
   314     }
       
   315 #ifndef SQLITE_OMIT_TCL_VARIABLE
       
   316     case '$':
       
   317 #endif
       
   318     case '@':  /* For compatibility with MS SQL Server */
       
   319     case ':': {
       
   320       int n = 0;
       
   321       *tokenType = TK_VARIABLE;
       
   322       for(i=1; (c=z[i])!=0; i++){
       
   323         if( IdChar(c) ){
       
   324           n++;
       
   325 #ifndef SQLITE_OMIT_TCL_VARIABLE
       
   326         }else if( c=='(' && n>0 ){
       
   327           do{
       
   328             i++;
       
   329           }while( (c=z[i])!=0 && !isspace(c) && c!=')' );
       
   330           if( c==')' ){
       
   331             i++;
       
   332           }else{
       
   333             *tokenType = TK_ILLEGAL;
       
   334           }
       
   335           break;
       
   336         }else if( c==':' && z[i+1]==':' ){
       
   337           i++;
       
   338 #endif
       
   339         }else{
       
   340           break;
       
   341         }
       
   342       }
       
   343       if( n==0 ) *tokenType = TK_ILLEGAL;
       
   344       return i;
       
   345     }
       
   346 #ifndef SQLITE_OMIT_BLOB_LITERAL
       
   347     case 'x': case 'X': {
       
   348       if( (c=z[1])=='\'' || c=='"' ){
       
   349         int delim = c;
       
   350         *tokenType = TK_BLOB;
       
   351         for(i=2; (c=z[i])!=0; i++){
       
   352           if( c==delim ){
       
   353             if( i%2 ) *tokenType = TK_ILLEGAL;
       
   354             break;
       
   355           }
       
   356           if( !isxdigit(c) ){
       
   357             *tokenType = TK_ILLEGAL;
       
   358             return i;
       
   359           }
       
   360         }
       
   361         if( c ) i++;
       
   362         return i;
       
   363       }
       
   364       /* Otherwise fall through to the next case */
       
   365     }
       
   366 #endif
       
   367     default: {
       
   368       if( !IdChar(*z) ){
       
   369         break;
       
   370       }
       
   371       for(i=1; IdChar(z[i]); i++){}
       
   372       *tokenType = keywordCode((char*)z, i);
       
   373       return i;
       
   374     }
       
   375   }
       
   376   *tokenType = TK_ILLEGAL;
       
   377   return 1;
       
   378 }
       
   379 int sqlite3GetToken(const unsigned char *z, int *tokenType){
       
   380   return getToken(z, tokenType);
       
   381 }
       
   382 
       
   383 /*
       
   384 ** Run the parser on the given SQL string.  The parser structure is
       
   385 ** passed in.  An SQLITE_ status code is returned.  If an error occurs
       
   386 ** and pzErrMsg!=NULL then an error message might be written into 
       
   387 ** memory obtained from malloc() and *pzErrMsg made to point to that
       
   388 ** error message.  Or maybe not.
       
   389 */
       
   390 int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
       
   391   int nErr = 0;
       
   392   int i;
       
   393   void *pEngine;
       
   394   int tokenType;
       
   395   int lastTokenParsed = -1;
       
   396   sqlite3 *db = pParse->db;
       
   397   extern void *sqlite3ParserAlloc(void*(*)(int));
       
   398   extern void sqlite3ParserFree(void*, void(*)(void*));
       
   399   extern int sqlite3Parser(void*, int, Token, Parse*);
       
   400 
       
   401   if( db->activeVdbeCnt==0 ){
       
   402     db->u1.isInterrupted = 0;
       
   403   }
       
   404   pParse->rc = SQLITE_OK;
       
   405   i = 0;
       
   406   pEngine = sqlite3ParserAlloc((void*(*)(int))sqlite3MallocX);
       
   407   if( pEngine==0 ){
       
   408     return SQLITE_NOMEM;
       
   409   }
       
   410   assert( pParse->sLastToken.dyn==0 );
       
   411   assert( pParse->pNewTable==0 );
       
   412   assert( pParse->pNewTrigger==0 );
       
   413   assert( pParse->nVar==0 );
       
   414   assert( pParse->nVarExpr==0 );
       
   415   assert( pParse->nVarExprAlloc==0 );
       
   416   assert( pParse->apVarExpr==0 );
       
   417   pParse->zTail = pParse->zSql = zSql;
       
   418   while( !sqlite3MallocFailed() && zSql[i]!=0 ){
       
   419     assert( i>=0 );
       
   420     pParse->sLastToken.z = (u8*)&zSql[i];
       
   421     assert( pParse->sLastToken.dyn==0 );
       
   422     pParse->sLastToken.n = getToken((unsigned char*)&zSql[i],&tokenType);
       
   423     i += pParse->sLastToken.n;
       
   424     switch( tokenType ){
       
   425       case TK_SPACE:
       
   426       case TK_COMMENT: {
       
   427         if( db->u1.isInterrupted ){
       
   428           pParse->rc = SQLITE_INTERRUPT;
       
   429           sqlite3SetString(pzErrMsg, "interrupt", (char*)0);
       
   430           goto abort_parse;
       
   431         }
       
   432         break;
       
   433       }
       
   434       case TK_ILLEGAL: {
       
   435         if( pzErrMsg ){
       
   436           sqliteFree(*pzErrMsg);
       
   437           *pzErrMsg = sqlite3MPrintf("unrecognized token: \"%T\"",
       
   438                           &pParse->sLastToken);
       
   439         }
       
   440         nErr++;
       
   441         goto abort_parse;
       
   442       }
       
   443       case TK_SEMI: {
       
   444         pParse->zTail = &zSql[i];
       
   445         /* Fall thru into the default case */
       
   446       }
       
   447       default: {
       
   448         sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse);
       
   449         lastTokenParsed = tokenType;
       
   450         if( pParse->rc!=SQLITE_OK ){
       
   451           goto abort_parse;
       
   452         }
       
   453         break;
       
   454       }
       
   455     }
       
   456   }
       
   457 abort_parse:
       
   458   if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){
       
   459     if( lastTokenParsed!=TK_SEMI ){
       
   460       sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse);
       
   461       pParse->zTail = &zSql[i];
       
   462     }
       
   463     sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse);
       
   464   }
       
   465   sqlite3ParserFree(pEngine, sqlite3FreeX);
       
   466   if( sqlite3MallocFailed() ){
       
   467     pParse->rc = SQLITE_NOMEM;
       
   468   }
       
   469   if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){
       
   470     sqlite3SetString(&pParse->zErrMsg, sqlite3ErrStr(pParse->rc), (char*)0);
       
   471   }
       
   472   if( pParse->zErrMsg ){
       
   473     if( pzErrMsg && *pzErrMsg==0 ){
       
   474       *pzErrMsg = pParse->zErrMsg;
       
   475     }else{
       
   476       sqliteFree(pParse->zErrMsg);
       
   477     }
       
   478     pParse->zErrMsg = 0;
       
   479     if( !nErr ) nErr++;
       
   480   }
       
   481   if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){
       
   482     sqlite3VdbeDelete(pParse->pVdbe);
       
   483     pParse->pVdbe = 0;
       
   484   }
       
   485 #ifndef SQLITE_OMIT_SHARED_CACHE
       
   486   if( pParse->nested==0 ){
       
   487     sqliteFree(pParse->aTableLock);
       
   488     pParse->aTableLock = 0;
       
   489     pParse->nTableLock = 0;
       
   490   }
       
   491 #endif
       
   492 
       
   493   if( !IN_DECLARE_VTAB ){
       
   494     /* If the pParse->declareVtab flag is set, do not delete any table 
       
   495     ** structure built up in pParse->pNewTable. The calling code (see vtab.c)
       
   496     ** will take responsibility for freeing the Table structure.
       
   497     */
       
   498     sqlite3DeleteTable(pParse->db, pParse->pNewTable);
       
   499   }
       
   500 
       
   501   sqlite3DeleteTrigger(pParse->pNewTrigger);
       
   502   sqliteFree(pParse->apVarExpr);
       
   503   if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){
       
   504     pParse->rc = SQLITE_ERROR;
       
   505   }
       
   506   return nErr;
       
   507 }