persistentstorage/dbms/pcdbms/usql/UQ_LEXER.CPP
changeset 0 08ec8eefde2f
equal deleted inserted replaced
-1:000000000000 0:08ec8eefde2f
       
     1 // Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // SQL parser tokeniser
       
    15 // 
       
    16 //
       
    17 
       
    18 #include "UQ_STD.H"
       
    19 
       
    20 // optimised tables for ASCII character set
       
    21 
       
    22 const TUint8 KAlpha=0x1;
       
    23 const TUint8 KDigitOr_=0x2;
       
    24 
       
    25 const TUint8 KCharAttrib[]=
       
    26 	{
       
    27 	KDigitOr_,KDigitOr_,KDigitOr_,KDigitOr_,KDigitOr_,KDigitOr_,KDigitOr_,KDigitOr_,
       
    28 	KDigitOr_,KDigitOr_,0,0,0,0,0,0,
       
    29 	0,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,
       
    30 	KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,
       
    31 	KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,
       
    32 	KAlpha,KAlpha,KAlpha,0,0,0,0,KDigitOr_,
       
    33 	0,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,
       
    34 	KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,
       
    35 	KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,KAlpha,
       
    36 	KAlpha,KAlpha,KAlpha
       
    37 	};
       
    38 
       
    39 #define ISALPHA(aChar) (TUint(aChar-'0')<=TUint('z'-'0') && KCharAttrib[aChar-'0']&KAlpha)
       
    40 #define ISALPHA_DIGIT_OR_(aChar) (TUint(aChar-'0')<=TUint('z'-'0') && KCharAttrib[aChar-'0']&(KAlpha|KDigitOr_))
       
    41 #define LCASE(aChar) (aChar|0x20)
       
    42 
       
    43 // The keywords
       
    44 // These are always stored as ASCII because DBMS has its own 
       
    45 
       
    46 const TInt KMaxKeywordLength=15;
       
    47 
       
    48 static const TText8 KSqlKeywords[][KMaxKeywordLength+1]=
       
    49 	{
       
    50 #define KEYWORD(s) #s
       
    51 #include "UQ_KEYWD.H"
       
    52 #undef KEYWORD
       
    53 	};
       
    54 const TInt KSqlKeywordCount=sizeof(KSqlKeywords)/sizeof(*KSqlKeywords);
       
    55 
       
    56 #if defined(_ASSERTIONS)
       
    57 TInt CheckKeywords()
       
    58 //
       
    59 // ensure that the keyword table is in alphabetical order
       
    60 //
       
    61 	{
       
    62 	for (TInt ii=1;ii<KSqlKeywordCount;++ii)
       
    63 		__ASSERT(TPtrC8(KSqlKeywords[ii-1])<TPtrC8(KSqlKeywords[ii]));
       
    64 	return 1;
       
    65 	}
       
    66 #endif
       
    67 
       
    68 // class TSqlLexer
       
    69 
       
    70 TInt TSqlLexer::CompareKeyword(TInt aKeyword,const RSqlLiteral& aIdentifier)
       
    71 //
       
    72 // Check if the identifer in aIdentifier is a keyword
       
    73 // uses a case-insensitive match, not folding
       
    74 //
       
    75 	{
       
    76 	__ASSERT(TUint(aKeyword)<TUint(KSqlKeywordCount));
       
    77 //
       
    78 	const TText* ptr=aIdentifier.Ptr();
       
    79 	const TText* end=aIdentifier.End();
       
    80 	const TText8* pk=&KSqlKeywords[aKeyword][0];
       
    81 	for (;;)
       
    82 		{
       
    83 		TUint ck=*pk++;
       
    84 		if (ptr==end)
       
    85 			return ck;
       
    86 		if (!ck)
       
    87 			return -1;
       
    88 		TInt d=ck-LCASE(TUint(*ptr++));
       
    89 		if (d)
       
    90 			return d;
       
    91 		}
       
    92 	}
       
    93 
       
    94 TSqlKeyword TSqlLexer::Keyword(const TSqlToken& aToken)
       
    95 //
       
    96 // non-member function: Return the keyword value
       
    97 //
       
    98 	{
       
    99 	if (aToken==ESqlIdentifier)
       
   100 		{
       
   101 		TInt r=KSqlKeywordCount;
       
   102 		TInt l=0;
       
   103 		while (r>l)
       
   104 			{
       
   105 			TInt m=(l+r)>>1;
       
   106 			TInt k=CompareKeyword(m,aToken.Literal());
       
   107 			if (k>0)
       
   108 				r=m;
       
   109 			else if (k<0)
       
   110 				l=m+1;
       
   111 			else
       
   112 				return TSqlKeyword(m);		// keyword
       
   113 			}
       
   114 		}
       
   115 	// identifier
       
   116 	return ESqlNotKeyword;
       
   117 	}
       
   118 
       
   119 TSqlLexer::TSqlLexer(const TDesC& aSql)
       
   120 	: iNext(aSql.Ptr()),iEnd(iNext+aSql.Length())
       
   121 	{
       
   122 	__ASSERT(CheckKeywords());
       
   123 	}
       
   124 
       
   125 TSqlTokenType TSqlLexer::GetIdentifier(TSqlToken& aToken)
       
   126 //
       
   127 // Get a keyword or identifier. Do not resolve a keyword at this stage
       
   128 //
       
   129 	{
       
   130 	const TText* end=iEnd;
       
   131 	const TText* next=iNext-1;
       
   132 	while (++next<end)
       
   133 		{
       
   134 		TUint ch=*next;
       
   135 		if (ISALPHA_DIGIT_OR_(ch))
       
   136 			continue;
       
   137 		if (!TChar(ch).IsAlphaDigit())
       
   138 			break;
       
   139 		}
       
   140 	aToken.iLiteral.SetText(iNext-1,next);
       
   141 	iNext=next;
       
   142 	return ESqlIdentifier;
       
   143 	}
       
   144 
       
   145 TInt TSqlLexer::GetInteger(TInt64 &aVal)
       
   146 //
       
   147 // A rather more optimised version of TLex::Val(TInt64&)
       
   148 // initially accumulate the value in a TUint32, and only switch to 64-bit arithmetic
       
   149 // if the value overflows. Always return a 64-bit value
       
   150 //
       
   151 	{
       
   152 	const TUint KPreMultiplyLimit32=429496728;	//(KMaxTUint-9)/10
       
   153 	const TUint KPreMultiplyLimit64=214748364;	//(KMaxTInt+1)/10
       
   154 //
       
   155 	const TText* ptr=iNext;
       
   156 	const TText* const end=iEnd;
       
   157 	__ASSERT(ptr<end);
       
   158 	TUint sign=0;
       
   159 	TUint c=*ptr;
       
   160 	if (c=='-')
       
   161 		{
       
   162 		sign=1;
       
   163 		if (++ptr==end)
       
   164 			return KErrGeneral;
       
   165 		c=*ptr;
       
   166 		}
       
   167 	else if (c=='+')
       
   168 		{
       
   169 		if (++ptr==end)
       
   170 			return KErrGeneral;
       
   171 		c=*ptr;
       
   172 		}
       
   173 	c-='0';
       
   174 	if (c>=10u)
       
   175 		return KErrGeneral;		// no digits at all
       
   176 	TUint val32=c;
       
   177 	while (++ptr<end)
       
   178 		{
       
   179 		c=*ptr-'0';
       
   180 		if (c>=10u)
       
   181 			break;
       
   182 		if (val32>KPreMultiplyLimit32)
       
   183 			goto overflow64;	// will not fit into 32 bit arithmetic
       
   184 		val32*=10;
       
   185 		val32+=c;
       
   186 		}
       
   187 // we have result, just set the sign and finish
       
   188 	aVal=val32;
       
   189 	goto checksign;
       
   190 //
       
   191 // continue the accumulation with a 64-bit integer
       
   192 overflow64:
       
   193 	aVal=val32;
       
   194 	for (;;)
       
   195 		{
       
   196 		I64MUL10(aVal);
       
   197 		aVal+=c;
       
   198 		if (++ptr==end)
       
   199 			break;
       
   200 		c=*ptr-'0';
       
   201 		if (c>=10u)
       
   202 			break;
       
   203 		if (I64HIGH(aVal)>KPreMultiplyLimit64)
       
   204 			return KErrOverflow;	// the value is certain to overflow
       
   205 		}
       
   206 	if (I64HIGH(aVal)&0x80000000u)
       
   207 		{	// greater than the "half way mark"
       
   208 		if (!sign)
       
   209 			return KErrOverflow;
       
   210 		if (I64LOW(aVal)!=0)
       
   211 			return KErrOverflow;
       
   212 		}
       
   213 checksign:
       
   214 	iNext=ptr;
       
   215 	if (sign)
       
   216 		aVal=-aVal;
       
   217 	return KErrNone;
       
   218 	}
       
   219 
       
   220 TSqlTokenType TSqlLexer::GetNumber(TSqlToken& aToken)
       
   221 	{
       
   222 	const TText* mark=--iNext;			// rewind past initial character
       
   223 	// attempt to parse a integer
       
   224 	TInt r=GetInteger(aToken.iLiteral.SetInt());
       
   225 	if (r==KErrNone)
       
   226 		{
       
   227 		if (iNext<iEnd)
       
   228 			{
       
   229 			TUint c=*iNext;
       
   230 			if (c!='.' && c!='e' && c!='E')
       
   231 				return ESqlLiteralInt;		// it is an integer
       
   232 			}
       
   233 		else
       
   234 			return ESqlLiteralInt;		// it is an integer
       
   235 		}
       
   236 	TLex lex(TPtrC(mark,iEnd-mark));
       
   237 	r=lex.Val(aToken.iLiteral.SetReal(),TChar('.'));
       
   238 	if (r!=KErrNone)
       
   239 		return SqlError(r);
       
   240 	iNext=mark+lex.Offset();
       
   241 	return ESqlLiteralReal;
       
   242 	}
       
   243 
       
   244 TSqlTokenType TSqlLexer::GetString(TSqlToken& aToken)
       
   245 	{
       
   246 	const TText* next=iNext;
       
   247 	const TText* end=iEnd;
       
   248 	for (;;)
       
   249 		{
       
   250 		if (next==end)
       
   251 			return SqlError();
       
   252 		TUint c=*next++;
       
   253 		if (c=='\'')
       
   254 			{
       
   255 			if (next==end)
       
   256 				break;
       
   257 			if (*next!='\'')
       
   258 				break;
       
   259 			next++;
       
   260 			}
       
   261 		}
       
   262 	aToken.iLiteral.SetText(iNext,next-1);
       
   263 	iNext=next;
       
   264 	return ESqlLiteralText;
       
   265 	}
       
   266 
       
   267 TSqlTokenType TSqlLexer::GetDate(TSqlToken& aToken)
       
   268 	{
       
   269 	const TText* end=iEnd;
       
   270 	const TText* next=iNext;
       
   271 	do
       
   272 		{
       
   273 		if (next==end)
       
   274 			return SqlError();
       
   275 		} while (*next++!='#');
       
   276 	TInt r=aToken.iLiteral.SetTime().Parse(TPtrC(iNext,(next-1)-iNext));
       
   277 	if (r<0)
       
   278 		return SqlError(r);
       
   279 	iNext=next;
       
   280 	return ESqlLiteralTime;
       
   281 	}
       
   282 
       
   283 TSqlTokenType TSqlLexer::GetBlob(TSqlToken& aToken)
       
   284 	{
       
   285 	const TText* end=iEnd;
       
   286 	const TText* next=iNext;
       
   287 	// Blob literalisation format X'<hex-data>'
       
   288 	// first char must be single quote - '
       
   289 	if ( *next != '\'' ) {
       
   290 		return SqlError(KErrArgument);
       
   291 	}
       
   292 	const TText* start = ++next;
       
   293 	do
       
   294 		{
       
   295 		if (next==end)
       
   296 			return SqlError();
       
   297 		} while (*next++!='\'');
       
   298 	const TText* blobend = next-1;
       
   299 
       
   300 	aToken.iLiteral.SetBlob(start,blobend);
       
   301 	iNext=next;
       
   302 	return ESqlLiteralBlob;
       
   303 	}
       
   304 
       
   305 TSqlTokenType TSqlLexer::GetNextToken(TSqlToken& aToken)
       
   306 	{
       
   307 	const TText* ptr=iNext;
       
   308 	const TText* const end=iEnd;
       
   309 	for (;;)
       
   310 		{
       
   311 		if (ptr==end)
       
   312 			return ESqlEos;
       
   313 		TUint ch=*ptr++;
       
   314 		iNext=ptr;
       
   315 		switch (ch)
       
   316 			{
       
   317 		case ' ':			// a "normal" space
       
   318 			continue;
       
   319 		case '0': case '1': case '2': case '3': case '4':	// literal number
       
   320 		case '5': case '6': case '7': case '8': case '9':
       
   321 		case '+': case '-': case '.':
       
   322 			return GetNumber(aToken);
       
   323 		case '\'':
       
   324 			return GetString(aToken);
       
   325 		case '#':
       
   326 			return GetDate(aToken);
       
   327 		case 'X':
       
   328 			return GetBlob(aToken);
       
   329 		case '*':
       
   330 			return ESqlAsterisk;
       
   331 		case ',':
       
   332 			return ESqlComma;
       
   333 		case '(':
       
   334 			return ESqlLeftBracket;
       
   335 		case ')':
       
   336 			return ESqlRightBracket;
       
   337 		case '=':
       
   338 			return ESqlEqual;
       
   339 		case '<':
       
   340 			{
       
   341 			ch=*ptr++;				
       
   342 			if (ch=='=')
       
   343 				{
       
   344 				iNext=ptr;
       
   345 				return ESqlLessEqual;
       
   346 				}
       
   347 			if (ch=='>')
       
   348 				{
       
   349 				iNext=ptr;
       
   350 				return ESqlNotEqual;
       
   351 				}
       
   352 			return ESqlLess;
       
   353 			}
       
   354 		case '>':
       
   355 			{
       
   356 			ch=*ptr++;			
       
   357 			if (ch=='=')
       
   358 				{
       
   359 				iNext=ptr;
       
   360 				return ESqlGreaterEqual;
       
   361 				}
       
   362 			return ESqlGreater;
       
   363 			}
       
   364 		default:
       
   365 			break;
       
   366 			}
       
   367 		if (ISALPHA(ch))
       
   368 			return GetIdentifier(aToken);		// keyword or identifier
       
   369 		TChar cc(ch);
       
   370 		if (cc.IsAlpha())
       
   371 			return GetIdentifier(aToken);		// keyword or identifier
       
   372 		if (!cc.IsSpace())
       
   373 			return SqlError();
       
   374 		}
       
   375 	}
       
   376 	
       
   377 const TText* TSqlLexer::Next() const
       
   378 	{
       
   379 	return iNext;
       
   380 	}
       
   381 void TSqlLexer::Set(const TText* aNext) 
       
   382 	{
       
   383 	iNext = aNext ;
       
   384 	}