engine/sqlite/src/tokenize.cpp
changeset 2 29cda98b007e
equal deleted inserted replaced
1:5f8e5adbbed9 2:29cda98b007e
       
     1 /*
       
     2 ** 2001 September 15
       
     3 **
       
     4 ** The author disclaims copyright to this source code.  In place of
       
     5 ** a legal notice, here is a blessing:
       
     6 **
       
     7 **    May you do good and not evil.
       
     8 **    May you find forgiveness for yourself and forgive others.
       
     9 **    May you share freely, never taking more than you give.
       
    10 **
       
    11 *************************************************************************
       
    12 ** An tokenizer for SQL
       
    13 **
       
    14 ** This file contains C code that splits an SQL input string up into
       
    15 ** individual tokens and sends those tokens one-by-one over to the
       
    16 ** parser for analysis.
       
    17 **
       
    18 ** $Id: tokenize.cpp 1282 2008-11-13 09:31:33Z LarsPson $
       
    19 */
       
    20 #include "sqliteInt.h"
       
    21 #include <ctype.h>
       
    22 #include <stdlib.h>
       
    23 
       
    24 /*
       
    25 ** The charMap() macro maps alphabetic characters into their
       
    26 ** lower-case ASCII equivalent.  On ASCII machines, this is just
       
    27 ** an upper-to-lower case map.  On EBCDIC machines we also need
       
    28 ** to adjust the encoding.  Only alphabetic characters and underscores
       
    29 ** need to be translated.
       
    30 */
       
    31 #ifdef SQLITE_ASCII
       
    32 # define charMap(X) sqlite3UpperToLower[(unsigned char)X]
       
    33 #endif
       
    34 #ifdef SQLITE_EBCDIC
       
    35 # define charMap(X) ebcdicToAscii[(unsigned char)X]
       
    36 const unsigned char ebcdicToAscii[] = {
       
    37 /* 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F */
       
    38    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 0x */
       
    39    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 1x */
       
    40    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 2x */
       
    41    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 3x */
       
    42    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 4x */
       
    43    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 5x */
       
    44    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 95,  0,  0,  /* 6x */
       
    45    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 7x */
       
    46    0, 97, 98, 99,100,101,102,103,104,105,  0,  0,  0,  0,  0,  0,  /* 8x */
       
    47    0,106,107,108,109,110,111,112,113,114,  0,  0,  0,  0,  0,  0,  /* 9x */
       
    48    0,  0,115,116,117,118,119,120,121,122,  0,  0,  0,  0,  0,  0,  /* Ax */
       
    49    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* Bx */
       
    50    0, 97, 98, 99,100,101,102,103,104,105,  0,  0,  0,  0,  0,  0,  /* Cx */
       
    51    0,106,107,108,109,110,111,112,113,114,  0,  0,  0,  0,  0,  0,  /* Dx */
       
    52    0,  0,115,116,117,118,119,120,121,122,  0,  0,  0,  0,  0,  0,  /* Ex */
       
    53    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* Fx */
       
    54 };
       
    55 #endif
       
    56 
       
    57 /*
       
    58 ** The sqlite3KeywordCode function looks up an identifier to determine if
       
    59 ** it is a keyword.  If it is a keyword, the token code of that keyword is 
       
    60 ** returned.  If the input is not a keyword, TK_ID is returned.
       
    61 **
       
    62 ** The implementation of this routine was generated by a program,
       
    63 ** mkkeywordhash.h, located in the tool subdirectory of the distribution.
       
    64 ** The output of the mkkeywordhash.c program is written into a file
       
    65 ** named keywordhash.h and then included into this source file by
       
    66 ** the #include below.
       
    67 */
       
    68 #include "keywordhash.h"
       
    69 
       
    70 
       
    71 
       
    72 
       
    73 /*
       
    74 ** Return the length of the token that begins at z[0]. 
       
    75 ** Store the token type in *tokenType before returning.
       
    76 */
       
    77 static int getToken(const unsigned char *z, int *tokenType){
       
    78   int i, c;
       
    79   switch( *z ){
       
    80     case ' ': case '\t': case '\n': case '\f': case '\r': {
       
    81       for(i=1; isspace(z[i]); i++){}
       
    82       *tokenType = TK_SPACE;
       
    83       return i;
       
    84     }
       
    85     case '-': {
       
    86       if( z[1]=='-' ){
       
    87         for(i=2; (c=z[i])!=0 && c!='\n'; i++){}
       
    88         *tokenType = TK_COMMENT;
       
    89         return i;
       
    90       }
       
    91       *tokenType = TK_MINUS;
       
    92       return 1;
       
    93     }
       
    94     case '(': {
       
    95       *tokenType = TK_LP;
       
    96       return 1;
       
    97     }
       
    98     case ')': {
       
    99       *tokenType = TK_RP;
       
   100       return 1;
       
   101     }
       
   102     case ';': {
       
   103       *tokenType = TK_SEMI;
       
   104       return 1;
       
   105     }
       
   106     case '+': {
       
   107       *tokenType = TK_PLUS;
       
   108       return 1;
       
   109     }
       
   110     case '*': {
       
   111       *tokenType = TK_STAR;
       
   112       return 1;
       
   113     }
       
   114     case '/': {
       
   115       if( z[1]!='*' || z[2]==0 ){
       
   116         *tokenType = TK_SLASH;
       
   117         return 1;
       
   118       }
       
   119       for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){}
       
   120       if( c ) i++;
       
   121       *tokenType = TK_COMMENT;
       
   122       return i;
       
   123     }
       
   124     case '%': {
       
   125       *tokenType = TK_REM;
       
   126       return 1;
       
   127     }
       
   128     case '=': {
       
   129       *tokenType = TK_EQ;
       
   130       return 1 + (z[1]=='=');
       
   131     }
       
   132     case '<': {
       
   133       if( (c=z[1])=='=' ){
       
   134         *tokenType = TK_LE;
       
   135         return 2;
       
   136       }else if( c=='>' ){
       
   137         *tokenType = TK_NE;
       
   138         return 2;
       
   139       }else if( c=='<' ){
       
   140         *tokenType = TK_LSHIFT;
       
   141         return 2;
       
   142       }else{
       
   143         *tokenType = TK_LT;
       
   144         return 1;
       
   145       }
       
   146     }
       
   147     case '>': {
       
   148       if( (c=z[1])=='=' ){
       
   149         *tokenType = TK_GE;
       
   150         return 2;
       
   151       }else if( c=='>' ){
       
   152         *tokenType = TK_RSHIFT;
       
   153         return 2;
       
   154       }else{
       
   155         *tokenType = TK_GT;
       
   156         return 1;
       
   157       }
       
   158     }
       
   159     case '!': {
       
   160       if( z[1]!='=' ){
       
   161         *tokenType = TK_ILLEGAL;
       
   162         return 2;
       
   163       }else{
       
   164         *tokenType = TK_NE;
       
   165         return 2;
       
   166       }
       
   167     }
       
   168     case '|': {
       
   169       if( z[1]!='|' ){
       
   170         *tokenType = TK_BITOR;
       
   171         return 1;
       
   172       }else{
       
   173         *tokenType = TK_CONCAT;
       
   174         return 2;
       
   175       }
       
   176     }
       
   177     case ',': {
       
   178       *tokenType = TK_COMMA;
       
   179       return 1;
       
   180     }
       
   181     case '&': {
       
   182       *tokenType = TK_BITAND;
       
   183       return 1;
       
   184     }
       
   185     case '~': {
       
   186       *tokenType = TK_BITNOT;
       
   187       return 1;
       
   188     }
       
   189     case '`':
       
   190     case '\'':
       
   191     case '"': {
       
   192       int delim = z[0];
       
   193       for(i=1; (c=z[i])!=0; i++){
       
   194         if( c==delim ){
       
   195           if( z[i+1]==delim ){
       
   196             i++;
       
   197           }else{
       
   198             break;
       
   199           }
       
   200         }
       
   201       }
       
   202       if( c ){
       
   203         *tokenType = TK_STRING;
       
   204         return i+1;
       
   205       }else{
       
   206         *tokenType = TK_ILLEGAL;
       
   207         return i;
       
   208       }
       
   209     }
       
   210     case '.': {
       
   211 #ifndef SQLITE_OMIT_FLOATING_POINT
       
   212       if( !isdigit(z[1]) )
       
   213 #endif
       
   214       {
       
   215         *tokenType = TK_DOT;
       
   216         return 1;
       
   217       }
       
   218       /* If the next character is a digit, this is a floating point
       
   219       ** number that begins with ".".  Fall thru into the next case */
       
   220     }
       
   221     case '0': case '1': case '2': case '3': case '4':
       
   222     case '5': case '6': case '7': case '8': case '9': {
       
   223       *tokenType = TK_INTEGER;
       
   224       for(i=0; isdigit(z[i]); i++){}
       
   225 #ifndef SQLITE_OMIT_FLOATING_POINT
       
   226       if( z[i]=='.' ){
       
   227         i++;
       
   228         while( isdigit(z[i]) ){ i++; }
       
   229         *tokenType = TK_FLOAT;
       
   230       }
       
   231       if( (z[i]=='e' || z[i]=='E') &&
       
   232            ( isdigit(z[i+1]) 
       
   233             || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2]))
       
   234            )
       
   235       ){
       
   236         i += 2;
       
   237         while( isdigit(z[i]) ){ i++; }
       
   238         *tokenType = TK_FLOAT;
       
   239       }
       
   240 #endif
       
   241       while( IdChar(z[i]) ){
       
   242         *tokenType = TK_ILLEGAL;
       
   243         i++;
       
   244       }
       
   245       return i;
       
   246     }
       
   247     case '[': {
       
   248       for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
       
   249       *tokenType = TK_ID;
       
   250       return i;
       
   251     }
       
   252     case '?': {
       
   253       *tokenType = TK_VARIABLE;
       
   254       for(i=1; isdigit(z[i]); i++){}
       
   255       return i;
       
   256     }
       
   257     case '#': {
       
   258       for(i=1; isdigit(z[i]); i++){}
       
   259       if( i>1 ){
       
   260         /* Parameters of the form #NNN (where NNN is a number) are used
       
   261         ** internally by sqlite3NestedParse.  */
       
   262         *tokenType = TK_REGISTER;
       
   263         return i;
       
   264       }
       
   265       /* Fall through into the next case if the '#' is not followed by
       
   266       ** a digit. Try to match #AAAA where AAAA is a parameter name. */
       
   267     }
       
   268 #ifndef SQLITE_OMIT_TCL_VARIABLE
       
   269     case '$':
       
   270 #endif
       
   271     case '@':  /* For compatibility with MS SQL Server */
       
   272     case ':': {
       
   273       int n = 0;
       
   274       *tokenType = TK_VARIABLE;
       
   275       for(i=1; (c=z[i])!=0; i++){
       
   276         if( IdChar(c) ){
       
   277           n++;
       
   278 #ifndef SQLITE_OMIT_TCL_VARIABLE
       
   279         }else if( c=='(' && n>0 ){
       
   280           do{
       
   281             i++;
       
   282           }while( (c=z[i])!=0 && !isspace(c) && c!=')' );
       
   283           if( c==')' ){
       
   284             i++;
       
   285           }else{
       
   286             *tokenType = TK_ILLEGAL;
       
   287           }
       
   288           break;
       
   289         }else if( c==':' && z[i+1]==':' ){
       
   290           i++;
       
   291 #endif
       
   292         }else{
       
   293           break;
       
   294         }
       
   295       }
       
   296       if( n==0 ) *tokenType = TK_ILLEGAL;
       
   297       return i;
       
   298     }
       
   299 #ifndef SQLITE_OMIT_BLOB_LITERAL
       
   300     case 'x': case 'X': {
       
   301       if( (c=z[1])=='\'' || c=='"' ){
       
   302         int delim = c;
       
   303         *tokenType = TK_BLOB;
       
   304         for(i=2; (c=z[i])!=0; i++){
       
   305           if( c==delim ){
       
   306             if( i%2 ) *tokenType = TK_ILLEGAL;
       
   307             break;
       
   308           }
       
   309           if( !isxdigit(c) ){
       
   310             *tokenType = TK_ILLEGAL;
       
   311             return i;
       
   312           }
       
   313         }
       
   314         if( c ) i++;
       
   315         return i;
       
   316       }
       
   317       /* Otherwise fall through to the next case */
       
   318     }
       
   319 #endif
       
   320     default: {
       
   321       if( !IdChar(*z) ){
       
   322         break;
       
   323       }
       
   324       for(i=1; IdChar(z[i]); i++){}
       
   325       *tokenType = keywordCode((char*)z, i);
       
   326       return i;
       
   327     }
       
   328   }
       
   329   *tokenType = TK_ILLEGAL;
       
   330   return 1;
       
   331 }
       
   332 int sqlite3GetToken(const unsigned char *z, int *tokenType){
       
   333   return getToken(z, tokenType);
       
   334 }
       
   335 
       
   336 /*
       
   337 ** Run the parser on the given SQL string.  The parser structure is
       
   338 ** passed in.  An SQLITE_ status code is returned.  If an error occurs
       
   339 ** and pzErrMsg!=NULL then an error message might be written into 
       
   340 ** memory obtained from sqlite3_malloc() and *pzErrMsg made to point to that
       
   341 ** error message.  Or maybe not.
       
   342 */
       
   343 int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
       
   344   int nErr = 0;
       
   345   int i;
       
   346   void *pEngine;
       
   347   int tokenType;
       
   348   int lastTokenParsed = -1;
       
   349   sqlite3 *db = pParse->db;
       
   350 
       
   351   if( db->activeVdbeCnt==0 ){
       
   352     db->u1.isInterrupted = 0;
       
   353   }
       
   354   pParse->rc = SQLITE_OK;
       
   355   i = 0;
       
   356   pEngine = sqlite3ParserAlloc((void*(*)(size_t))sqlite3_malloc);
       
   357   if( pEngine==0 ){
       
   358     db->mallocFailed = 1;
       
   359     return SQLITE_NOMEM;
       
   360   }
       
   361   assert( pParse->sLastToken.dyn==0 );
       
   362   assert( pParse->pNewTable==0 );
       
   363   assert( pParse->pNewTrigger==0 );
       
   364   assert( pParse->nVar==0 );
       
   365   assert( pParse->nVarExpr==0 );
       
   366   assert( pParse->nVarExprAlloc==0 );
       
   367   assert( pParse->apVarExpr==0 );
       
   368   pParse->zTail = pParse->zSql = zSql;
       
   369   while( !db->mallocFailed && zSql[i]!=0 ){
       
   370     assert( i>=0 );
       
   371     pParse->sLastToken.z = (u8*)&zSql[i];
       
   372     assert( pParse->sLastToken.dyn==0 );
       
   373     pParse->sLastToken.n = getToken((unsigned char*)&zSql[i],&tokenType);
       
   374     i += pParse->sLastToken.n;
       
   375     if( i>SQLITE_MAX_SQL_LENGTH ){
       
   376       pParse->rc = SQLITE_TOOBIG;
       
   377       break;
       
   378     }
       
   379     switch( tokenType ){
       
   380       case TK_SPACE:
       
   381       case TK_COMMENT: {
       
   382         if( db->u1.isInterrupted ){
       
   383           pParse->rc = SQLITE_INTERRUPT;
       
   384           sqlite3SetString(pzErrMsg, "interrupt", (char*)0);
       
   385           goto abort_parse;
       
   386         }
       
   387         break;
       
   388       }
       
   389       case TK_ILLEGAL: {
       
   390         if( pzErrMsg ){
       
   391           sqlite3_free(*pzErrMsg);
       
   392           *pzErrMsg = sqlite3MPrintf(db, "unrecognized token: \"%T\"",
       
   393                           &pParse->sLastToken);
       
   394         }
       
   395         nErr++;
       
   396         goto abort_parse;
       
   397       }
       
   398       case TK_SEMI: {
       
   399         pParse->zTail = &zSql[i];
       
   400         /* Fall thru into the default case */
       
   401       }
       
   402       default: {
       
   403         sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse);
       
   404         lastTokenParsed = tokenType;
       
   405         if( pParse->rc!=SQLITE_OK ){
       
   406           goto abort_parse;
       
   407         }
       
   408         break;
       
   409       }
       
   410     }
       
   411   }
       
   412 abort_parse:
       
   413   if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){
       
   414     if( lastTokenParsed!=TK_SEMI ){
       
   415       sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse);
       
   416       pParse->zTail = &zSql[i];
       
   417     }
       
   418     sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse);
       
   419   }
       
   420   sqlite3ParserFree(pEngine, sqlite3_free);
       
   421   if( db->mallocFailed ){
       
   422     pParse->rc = SQLITE_NOMEM;
       
   423   }
       
   424   if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){
       
   425     sqlite3SetString(&pParse->zErrMsg, sqlite3ErrStr(pParse->rc), (char*)0);
       
   426   }
       
   427   if( pParse->zErrMsg ){
       
   428     if( pzErrMsg && *pzErrMsg==0 ){
       
   429       *pzErrMsg = pParse->zErrMsg;
       
   430     }else{
       
   431       sqlite3_free(pParse->zErrMsg);
       
   432     }
       
   433     pParse->zErrMsg = 0;
       
   434     if( !nErr ) nErr++;
       
   435   }
       
   436   if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){
       
   437     sqlite3VdbeDelete(pParse->pVdbe);
       
   438     pParse->pVdbe = 0;
       
   439   }
       
   440 #ifndef SQLITE_OMIT_SHARED_CACHE
       
   441   if( pParse->nested==0 ){
       
   442     sqlite3_free(pParse->aTableLock);
       
   443     pParse->aTableLock = 0;
       
   444     pParse->nTableLock = 0;
       
   445   }
       
   446 #endif
       
   447 
       
   448   if( !IN_DECLARE_VTAB ){
       
   449     /* If the pParse->declareVtab flag is set, do not delete any table 
       
   450     ** structure built up in pParse->pNewTable. The calling code (see vtab.c)
       
   451     ** will take responsibility for freeing the Table structure.
       
   452     */
       
   453     sqlite3DeleteTable(pParse->pNewTable);
       
   454   }
       
   455 
       
   456   sqlite3DeleteTrigger(pParse->pNewTrigger);
       
   457   sqlite3_free(pParse->apVarExpr);
       
   458   if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){
       
   459     pParse->rc = SQLITE_ERROR;
       
   460   }
       
   461   return nErr;
       
   462 }