searchengine/util/cpixtools/src/cpixparsetools.cpp
changeset 0 671dee74050a
child 8 6547bf8ca13a
equal deleted inserted replaced
-1:000000000000 0:671dee74050a
       
     1 /*
       
     2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 /*
       
    18  * cpixparsetools.cpp
       
    19  *
       
    20  *  Created on: Apr 14, 2009
       
    21  *      Author: admin
       
    22  */
       
    23 
       
    24 #include "cpixparsetools.h"
       
    25 #include "cpixtools.h"
       
    26 
       
    27 #include <iostream>
       
    28 #include <sstream>
       
    29 #include <stdlib.h>
       
    30 
       
    31 namespace Cpt {
       
    32 
       
    33 
       
    34     namespace Lex {
       
    35 	
       
    36         const wchar_t ESCAPE_SYMBOL = '\\';
       
    37 	
       
    38         Tokenizer::~Tokenizer() {}
       
    39 	
       
    40         LexException::LexException(const wchar_t* wWhat, 
       
    41                                    const wchar_t* where) 
       
    42             : wWhat_(wWhat), 
       
    43               where_(where) {
       
    44             ;
       
    45         }
       
    46 
       
    47         LexException::~LexException() 
       
    48         {
       
    49             ;
       
    50         }
       
    51 
       
    52         const wchar_t* LexException::where() const {
       
    53             return where_;
       
    54         }
       
    55 
       
    56         const wchar_t* LexException::wWhat() const throw() {
       
    57             return wWhat_.c_str();
       
    58         }
       
    59 
       
    60         void LexException::setContext(const wchar_t * context)
       
    61         {
       
    62             // TODO legacy of implementation of obsoleted describe() -
       
    63             // it can be optimized by doind direct substring - concat
       
    64             // operations instead of looping through context
       
    65             std::wstring tmp;
       
    66             tmp += wWhat_; 
       
    67             tmp += L" at: \""; 
       
    68             for (; ; context++) {
       
    69                 if (context == where_) {
       
    70                     tmp += L"*here*";
       
    71                 }
       
    72                 if (!*context) {
       
    73                     break; 
       
    74                 }
       
    75                 tmp += *context;
       
    76             }
       
    77             tmp += L"\"";
       
    78 
       
    79             wWhat_ = tmp;
       
    80         }
       
    81 
       
    82 
       
    83         Token::Token(int type, const wchar_t* begin, const wchar_t* end) 
       
    84             : type_(type), begin_(begin), end_(end) {
       
    85         }
       
    86 
       
    87         Token::Token() 
       
    88             : type_(0), begin_(0), end_(0) {
       
    89         }
       
    90 		
       
    91         int Token::type() const { return type_; }; 
       
    92         const wchar_t* Token::begin() const { return begin_; };
       
    93         const wchar_t* Token::end() const { return end_; };
       
    94         int Token::length() const { return end_ - begin_; };
       
    95         std::wstring Token::text() const {
       
    96             std::wstring ret;
       
    97             for (const wchar_t* i = begin_; i != end_; i++) {
       
    98                 ret += *i; 
       
    99             }
       
   100             return ret; 
       
   101         }
       
   102 
       
   103         StrLitTokenizer::StrLitTokenizer(wchar_t citate) 
       
   104             : 	citate_(citate)
       
   105         {	
       
   106             reset(); 
       
   107         }
       
   108 						
       
   109         void StrLitTokenizer::reset() 
       
   110         { 
       
   111             escape_ = false, 
       
   112                 opened_ = false, 
       
   113                 begin_ = 0;
       
   114             end_ = 0; 
       
   115         }
       
   116         Token StrLitTokenizer::get() 
       
   117         { 
       
   118             return Token( TOKEN_STRLIT, begin_, end_ ); 
       
   119         }
       
   120         TokenizerState StrLitTokenizer::consume(const wchar_t* cursor) 
       
   121         {
       
   122             if (!*cursor) return TOKENIZER_FAILED; // fail always on EOF
       
   123             if (!opened_) 
       
   124                 {
       
   125                     if (*cursor == citate_) 
       
   126                         {
       
   127                             opened_ = true;
       
   128                             begin_ = cursor; 
       
   129                         } else {
       
   130                         return TOKENIZER_FAILED; 
       
   131                     }
       
   132                 } else if (escape_)  {
       
   133                 escape_ = false;
       
   134             } else {
       
   135                 if (*cursor == citate_) {
       
   136                     end_ = cursor+1; 
       
   137                     return TOKENIZER_FINISHED;
       
   138                 } else if (*cursor == '\\') {
       
   139                     escape_ = true;
       
   140                 }
       
   141             } 
       
   142             return TOKENIZER_HUNGRY; 
       
   143         }
       
   144 		
       
   145         IntLitTokenizer::IntLitTokenizer() {
       
   146             reset();
       
   147         }
       
   148 
       
   149         void IntLitTokenizer::reset() {
       
   150             begin_ = NULL;
       
   151             end_ = NULL;
       
   152             beginning_ = true;
       
   153         }
       
   154 
       
   155         Token IntLitTokenizer::get() {
       
   156             return Token(TOKEN_INTLIT, begin_, end_);
       
   157         }
       
   158 
       
   159         TokenizerState IntLitTokenizer::consume(const wchar_t * cursor) {
       
   160             TokenizerState
       
   161                 rv = TOKENIZER_HUNGRY;
       
   162 
       
   163             if (beginning_)
       
   164                 {
       
   165                     if (*cursor != L'+'
       
   166                         && *cursor != L'-'
       
   167                         && !isdigit(*cursor))
       
   168                         {
       
   169                             rv = TOKENIZER_FAILED;
       
   170                         }
       
   171                     beginning_ = false;
       
   172                     begin_ = cursor;
       
   173                 }
       
   174             else if (!isdigit(*cursor))
       
   175                 {
       
   176                     rv = TOKENIZER_FINISHED;
       
   177                     end_ = cursor;
       
   178                 }
       
   179 
       
   180             return rv;
       
   181         }
       
   182 
       
   183         RealLitTokenizer::RealLitTokenizer() {
       
   184             reset();
       
   185         }
       
   186 
       
   187         void RealLitTokenizer::reset() {
       
   188             begin_ = NULL;
       
   189             end_ = NULL;
       
   190             beginning_ = true;
       
   191             hadDotAlready_ = false;
       
   192         }
       
   193 
       
   194         Token RealLitTokenizer::get() {
       
   195             return Token(TOKEN_REALLIT, begin_, end_);
       
   196         }
       
   197 
       
   198         TokenizerState RealLitTokenizer::consume(const wchar_t * cursor) {
       
   199             TokenizerState
       
   200                 rv = TOKENIZER_HUNGRY;
       
   201 
       
   202             if (beginning_)
       
   203                 {
       
   204                     if (*cursor != L'+'
       
   205                         && *cursor != L'-'
       
   206                         && !isdigit(*cursor)
       
   207                         && *cursor != L'.')
       
   208                         {
       
   209                             rv = TOKENIZER_FAILED;
       
   210                         }
       
   211                     beginning_ = false;
       
   212                     begin_ = cursor;
       
   213                 }
       
   214             else if (*cursor == L'.')
       
   215                 {
       
   216                     if (hadDotAlready_)
       
   217                         {
       
   218                             rv = TOKENIZER_FINISHED;
       
   219                             end_ = cursor;
       
   220                         }
       
   221 
       
   222                     hadDotAlready_ = true;
       
   223                 }
       
   224             else if (!isdigit(*cursor))
       
   225                 {
       
   226                     rv = TOKENIZER_FINISHED;
       
   227                     end_ = cursor;
       
   228                 }
       
   229 
       
   230             return rv;
       
   231         }
       
   232 
       
   233         WhitespaceTokenizer::WhitespaceTokenizer() { 
       
   234             reset(); 
       
   235         }
       
   236 
       
   237         void WhitespaceTokenizer::reset() 
       
   238         { 
       
   239             empty_ = true; 
       
   240             begin_ = 0;
       
   241             end_ = 0; 
       
   242         }
       
   243 		
       
   244         Token WhitespaceTokenizer::get() 
       
   245         {
       
   246             return Token( TOKEN_WS, begin_, end_ );
       
   247         }
       
   248 		
       
   249         TokenizerState WhitespaceTokenizer::consume(const wchar_t* cursor) 
       
   250         {
       
   251             if (!begin_) begin_ = cursor; 
       
   252 			
       
   253             if (isspace(*cursor))  
       
   254                 {
       
   255                     empty_ = false;
       
   256                 } else {
       
   257                 end_ = cursor; 
       
   258                 return empty_ ? TOKENIZER_FAILED : TOKENIZER_FINISHED; 
       
   259             }
       
   260             return TOKENIZER_HUNGRY;  
       
   261         }
       
   262 		
       
   263         IdTokenizer::IdTokenizer() 
       
   264         { 
       
   265             reset();
       
   266         }
       
   267 		
       
   268         void IdTokenizer::reset() 
       
   269         {
       
   270             begin_ = 0; 
       
   271             end_ = 0;  
       
   272         }
       
   273 		
       
   274 		
       
   275         Token IdTokenizer::get() 
       
   276         {
       
   277             return Token( TOKEN_ID, begin_, end_ );
       
   278         }
       
   279 		
       
   280         TokenizerState IdTokenizer::consume(const wchar_t* cursor) 
       
   281         {
       
   282             if (!begin_) begin_ = cursor; 
       
   283             if (cursor == begin_ && !isalpha(*cursor)) {
       
   284                 return TOKENIZER_FAILED;
       
   285             } else if (cursor > begin_ && !isalnum(*cursor)) {  
       
   286                 end_ = cursor;
       
   287                 return TOKENIZER_FINISHED; 
       
   288             } 
       
   289             return TOKENIZER_HUNGRY; 
       
   290         }
       
   291 
       
   292         SymbolTokenizer::SymbolTokenizer(int tokenType, const wchar_t* symbol) 
       
   293             : tokenType_( tokenType ), 
       
   294               symbol_( symbol ) 
       
   295         {
       
   296         }
       
   297 		
       
   298         void SymbolTokenizer::reset() {
       
   299             begin_ = 0; 
       
   300         }
       
   301 		
       
   302         Token SymbolTokenizer::get() {
       
   303             return Token( tokenType_, begin_, end_ );
       
   304         }
       
   305 		
       
   306         TokenizerState SymbolTokenizer::consume(const wchar_t* cursor) {
       
   307             if (!begin_) begin_ = cursor; 
       
   308             if (symbol_[cursor-begin_] == *cursor) {
       
   309                 if (!symbol_[cursor-begin_+1]) {
       
   310                     // we reached end of symbol
       
   311                     end_ = cursor + 1; 
       
   312                     return TOKENIZER_FINISHED;
       
   313                 } 
       
   314                 return TOKENIZER_HUNGRY; 
       
   315             } else {
       
   316                 return TOKENIZER_FAILED; 
       
   317             }
       
   318         }
       
   319 		
       
   320         MultiTokenizer::MultiTokenizer(Tokenizer** tokenizers, bool ownTokenizers) 
       
   321             : ownTokenizers_(ownTokenizers)
       
   322         {
       
   323             int len = 0; while (tokenizers[len]) len++; 
       
   324             tokenizers_.assign(tokenizers,
       
   325                                tokenizers + len);
       
   326             states_ = new TokenizerState[len]; 
       
   327             reset(); 
       
   328         }
       
   329 
       
   330         MultiTokenizer::~MultiTokenizer()
       
   331         {
       
   332             if (ownTokenizers_) 
       
   333                 {
       
   334                     typedef std::vector<Tokenizer*>::iterator iterator; 
       
   335                     for (iterator i = tokenizers_.begin(); i != tokenizers_.end(); ) 
       
   336                         {
       
   337                             delete *(i++); 
       
   338                         }
       
   339                 }
       
   340             delete[] states_; 
       
   341         }
       
   342 		
       
   343 
       
   344         void MultiTokenizer::reset() 
       
   345         {
       
   346             TokenizerState* s = states_;
       
   347             running_ = 0; 
       
   348             std::vector<Tokenizer*>::iterator
       
   349                 i = tokenizers_.begin(),
       
   350                 end = tokenizers_.end();
       
   351 
       
   352             for (; i != end; ++i, ++s) {
       
   353                 (*i)->reset();
       
   354                 (*s) = TOKENIZER_HUNGRY;
       
   355                 running_++; 
       
   356             }
       
   357             found_ = false;
       
   358         }
       
   359 		 
       
   360         Token MultiTokenizer::get() 
       
   361         {
       
   362             Token token(TOKEN_UNKNOWN, 0, 0); 
       
   363             TokenizerState* s = states_;
       
   364             std::vector<Tokenizer*>::iterator
       
   365                 i = tokenizers_.begin(),
       
   366                 end = tokenizers_.end();
       
   367 
       
   368             for (; i != end; ++i, ++s ) {
       
   369                 if (*s == TOKENIZER_FINISHED) {
       
   370                     Token c = (*i)->get(); 
       
   371                     if (c.length() > token.length()) {
       
   372                         token = c; 
       
   373                     }
       
   374                 }
       
   375             }
       
   376             if (token.length() == 0) {
       
   377                 // NOTE: not really a lexical exception, but logical one
       
   378                 throw LexException(L"Trying to get token without a token ready.", 0); 
       
   379             }
       
   380             return token;
       
   381         }
       
   382 
       
   383         TokenizerState MultiTokenizer::consume(const wchar_t* cursor) {
       
   384             TokenizerState* s = states_;
       
   385             std::vector<Tokenizer*>::iterator
       
   386                 i = tokenizers_.begin(),
       
   387                 end = tokenizers_.end();
       
   388 
       
   389             for (; i != end; ++i, ++s) {
       
   390                 if (*s == TOKENIZER_HUNGRY) 
       
   391                     {
       
   392                         *s = (*i)->consume(cursor);
       
   393                         if (*s != TOKENIZER_HUNGRY) running_--; 
       
   394                         if (*s == TOKENIZER_FINISHED) {
       
   395                             found_ = true; 
       
   396                         }
       
   397                     }
       
   398             }
       
   399             if (running_ == 0) {
       
   400                 return found_ ? TOKENIZER_FINISHED : TOKENIZER_FAILED; 
       
   401             }
       
   402             return TOKENIZER_HUNGRY;
       
   403         }
       
   404 		
       
   405 
       
   406         LitTokenizer::LitTokenizer(wchar_t citate)
       
   407             : multiTokenizer_(NULL)
       
   408         {
       
   409             using namespace std;
       
   410 
       
   411             auto_ptr<StrLitTokenizer>
       
   412                 s(new StrLitTokenizer(citate));
       
   413             auto_ptr<IntLitTokenizer>
       
   414                 i(new IntLitTokenizer);
       
   415             auto_ptr<RealLitTokenizer>
       
   416                 r(new RealLitTokenizer);
       
   417 
       
   418             Tokenizer * tokenizers[] = {
       
   419                 s.get(),
       
   420                 i.get(),
       
   421                 r.get(),
       
   422                 NULL
       
   423             };
       
   424 
       
   425             multiTokenizer_ = new MultiTokenizer(tokenizers, true);
       
   426                 
       
   427             s.release();
       
   428             i.release();
       
   429             r.release();
       
   430 
       
   431             reset();
       
   432         }
       
   433 
       
   434 
       
   435         LitTokenizer::~LitTokenizer()
       
   436         {
       
   437             delete multiTokenizer_;
       
   438         }
       
   439 
       
   440         void LitTokenizer::reset()
       
   441         {
       
   442             multiTokenizer_->reset();
       
   443         }
       
   444 
       
   445         Token LitTokenizer::get()
       
   446         {
       
   447             Token
       
   448                 subToken = multiTokenizer_->get();
       
   449 
       
   450             return Token(TOKEN_LIT,
       
   451                          subToken.begin(),
       
   452                          subToken.end());
       
   453         }
       
   454 
       
   455         TokenizerState LitTokenizer::consume(const wchar_t * cursor)
       
   456         {
       
   457             return multiTokenizer_->consume(cursor);
       
   458         }
       
   459             
       
   460         TokenIterator::~TokenIterator() {}
       
   461 
       
   462         Tokens::Tokens(Tokenizer& tokenizer, const wchar_t* text)
       
   463             :	cursor_(text),
       
   464                 tokenizer_(tokenizer), 
       
   465                 hasNext_(false)
       
   466         {}
       
   467 		
       
   468         Tokens::operator bool() {
       
   469             prepareNext(); 
       
   470             return hasNext_;
       
   471         } 
       
   472 		
       
   473         Token Tokens::operator++(int) {
       
   474             prepareNext();
       
   475             if (!hasNext_) {
       
   476                 throw LexException(L"Out of tokens.", cursor_);
       
   477             }
       
   478             hasNext_ = false;
       
   479             // get the token
       
   480             Token ret = tokenizer_.get();
       
   481             cursor_ = ret.end();
       
   482             return ret;
       
   483         }
       
   484 				
       
   485         void Tokens::prepareNext() {
       
   486             if (!hasNext_ && *cursor_) {
       
   487                 const wchar_t* begin = cursor_; 
       
   488                 tokenizer_.reset(); 
       
   489                 TokenizerState state = TOKENIZER_HUNGRY;
       
   490                 while (state == TOKENIZER_HUNGRY) {
       
   491                     state = tokenizer_.consume(cursor_);
       
   492                     if (*cursor_) cursor_++; // don't go beyond eof. 
       
   493                 }
       
   494                 if (state == TOKENIZER_FAILED) {
       
   495                     std::wostringstream msg; 
       
   496                     msg<<L"Unrecognized syntax: '";
       
   497                     for (int i = 0; &begin[i] < cursor_; i++) msg<<begin[i];
       
   498                     msg<<L"'";
       
   499                     throw LexException(msg.str().c_str(), begin); 
       
   500                 } else { 
       
   501                     // Means that: state == TOKENIZER_FINISHED
       
   502                     hasNext_ = true; 
       
   503                 }
       
   504             }
       
   505         }
       
   506 
       
   507         WhiteSpaceFilter::WhiteSpaceFilter(TokenIterator& tokens) 
       
   508             :	tokens_(tokens), next_(), hasNext_(false) {}
       
   509 		
       
   510         WhiteSpaceFilter::operator bool()
       
   511         {
       
   512             prepareNext();
       
   513             return hasNext_; 
       
   514         }
       
   515 		
       
   516         Token WhiteSpaceFilter::operator++(int)
       
   517         {
       
   518             prepareNext();
       
   519             if (!hasNext_) {
       
   520                 throw LexException(L"Out of tokens", 0); 
       
   521             }
       
   522             hasNext_ = false;
       
   523             return next_;
       
   524         }
       
   525         void WhiteSpaceFilter::prepareNext()
       
   526         {
       
   527             while (!hasNext_ && tokens_) {
       
   528                 next_ = tokens_++;
       
   529                 if (next_.type() != TOKEN_WS) {
       
   530                     hasNext_ = true; 
       
   531                 }
       
   532             }
       
   533         }
       
   534 		
       
   535         TokenReader::TokenReader(TokenIterator& tokens) 
       
   536             :	tokens_(tokens), 
       
   537                 location_(0),
       
   538                 forward_(), 
       
   539                 backward_(), 
       
   540                 marks_()
       
   541         {}
       
   542 		
       
   543 		
       
   544         TokenReader::operator bool() {
       
   545             return !forward_.empty() || tokens_; 
       
   546         }
       
   547 		
       
   548         Token TokenReader::operator++(int) {
       
   549             Token token; 
       
   550             if (forward_.size() > 0) {
       
   551                 token = forward_.back();
       
   552                 forward_.pop_back(); 
       
   553             } else {
       
   554                 token = tokens_++; 
       
   555             }
       
   556             if (!marks_.empty()) {
       
   557                 backward_.push_back(token);  
       
   558             }
       
   559             location_++; 
       
   560             return token; 
       
   561         }
       
   562 
       
   563         Token TokenReader::peek() {
       
   564             if (forward_.empty()) {
       
   565                 Token token = (*this)++;
       
   566                 forward_.push_back(token); 
       
   567                 return token; 
       
   568             } else {
       
   569                 return forward_.back(); 
       
   570             }
       
   571         }
       
   572 
       
   573         void TokenReader::pushMark() {
       
   574             marks_.push_back(location_); 
       
   575         }
       
   576 		
       
   577         void TokenReader::popMark() {
       
   578             int mark = marks_.back(); marks_.pop_back();
       
   579             while (location_ > mark) {
       
   580                 forward_.push_back(backward_.back()); 
       
   581                 backward_.pop_back();
       
   582                 location_--;
       
   583             }
       
   584         }
       
   585 		
       
   586         void TokenReader::clearMark() {
       
   587             marks_.back(); marks_.pop_back();
       
   588             if (marks_.empty()) {
       
   589                 backward_.clear(); 
       
   590             }
       
   591         }
       
   592 		
       
   593     } // Lex 
       
   594 	
       
   595     namespace Parser {
       
   596 	
       
   597         ParseException::ParseException(const wchar_t* wWhat, 
       
   598                                        const Lex::Token& where) 
       
   599             : wWhat_(wWhat), 
       
   600               where_(where) {
       
   601             ;
       
   602         }
       
   603 		
       
   604 
       
   605         Lex::Token ParseException::where() const {
       
   606             return where_;
       
   607         }
       
   608 
       
   609 
       
   610         const wchar_t* ParseException::wWhat() const throw() {
       
   611             return wWhat_.c_str();
       
   612         }
       
   613 		
       
   614         void ParseException::setContext(const wchar_t * context)
       
   615         {
       
   616             // TODO legacy of implementation of obsoleted describe() -
       
   617             // it can be optimized by doind direct substring - concat
       
   618             // operations instead of looping through context
       
   619             std::wstring tmp;
       
   620             tmp += wWhat_; 
       
   621             tmp += L" at: \""; 
       
   622             if (where_.type() == Lex::TOKEN_EOF) {
       
   623                 tmp += context; 
       
   624                 tmp += L"*here*";
       
   625             } else {
       
   626                 for (; ; context++) {
       
   627                     if (context == where_.begin()) {
       
   628                         tmp += L"*here*";
       
   629                     }
       
   630                     if (context == where_.end()) {
       
   631                         tmp += L"*here*";
       
   632                     }
       
   633                     if (!*context) break; 
       
   634                     tmp += *context;
       
   635                 }
       
   636             }
       
   637             tmp += L"\"";
       
   638 
       
   639             wWhat_ = tmp;
       
   640         }
       
   641 		
       
   642         namespace Lit {
       
   643 		
       
   644             std::wstring ParseString(const Lex::Token& token) {
       
   645                 if (token.type() != Lex::TOKEN_STRLIT) {
       
   646                     std::wostringstream msg; 
       
   647                     msg<<L"Expected literal instead of token '"<<token.text()<<"' of type "<<token.type(); 
       
   648                     throw ParseException(msg.str().c_str(), token);  
       
   649                 }
       
   650                 std::wstring ret; 
       
   651                 const wchar_t* text = token.begin(); 
       
   652                 // NOTE: We are assuming that the literal sitation marks are one character wide
       
   653                 for (int i = 1; &text[i] < token.end()-1; i++) {// skip first and last characters
       
   654                     if (text[i] == Lex::ESCAPE_SYMBOL) {
       
   655                         i++; 
       
   656                         switch (text[i]) {
       
   657                         case '0':
       
   658                             ret += L"\0";
       
   659                             break;
       
   660                         case 'n':
       
   661                             ret += L"\n";
       
   662                             break;
       
   663                         case 'r':
       
   664                             ret += L"\r";
       
   665                             break;
       
   666                         case 't':
       
   667                             ret += L"\t";
       
   668                             break;
       
   669                         default: 
       
   670                             ret += text[i]; 
       
   671                         }
       
   672                     } else {
       
   673                         ret += text[i];
       
   674                     }
       
   675                 }
       
   676                 return ret; 
       
   677             }
       
   678             long ParseInteger(const Lex::Token& token) {
       
   679                 if (token.type() != Lex::TOKEN_INTLIT) {
       
   680                     std::wostringstream msg; 
       
   681                     msg<<L"Expected literal instead of token '"<<token.text()<<"' of type "<<token.type(); 
       
   682                     throw ParseException(msg.str().c_str(), token);  
       
   683                 }
       
   684                 wchar_t* end = const_cast<wchar_t*>(token.end());
       
   685                 return wcstol(token.begin(), &end, 10);
       
   686             }
       
   687             double ParseReal(const Lex::Token& token) {
       
   688                 if (token.type() != Lex::TOKEN_REALLIT) {
       
   689                     std::wostringstream msg; 
       
   690                     msg<<L"Expected literal instead of token '"<<token.text()<<"' of type "<<token.type(); 
       
   691                     throw ParseException(msg.str().c_str(), token);  
       
   692                 }
       
   693                 wchar_t* end = const_cast<wchar_t*>(token.end());
       
   694                 return wcstod(token.begin(), &end);
       
   695             }
       
   696         }
       
   697 		
       
   698 
       
   699         Lexer::Lexer(Lex::TokenIterator& tokens) : Lex::TokenReader(tokens) {
       
   700         }
       
   701 	
       
   702         Lex::Token Lexer::operator++(int) {
       
   703             if (*this) {
       
   704                 return Lex::TokenReader::operator++(0); 
       
   705             }
       
   706             throw ParseException(L"Unexpected EOF", Lex::Token(Lex::TOKEN_EOF, 0, 0));  
       
   707         }
       
   708 
       
   709         Lex::Token Lexer::eat(int tokenType) {
       
   710             Lex::Token token = ((*this)++);
       
   711             if (token.type() != tokenType) {
       
   712                 std::wostringstream msg; 
       
   713                 msg<<"Expected token of type "<<tokenType<<" instead of token '"<<token.text()<<"' of type "<<token.type();  
       
   714                 throw ParseException(msg.str().c_str(), token);  
       
   715             }
       
   716             return token; 
       
   717         }
       
   718         std::wstring Lexer::eatId() {
       
   719             Lex::Token token = ((*this)++);
       
   720             if (token.type() != Lex::TOKEN_ID) {
       
   721                 std::wostringstream msg; 
       
   722                 msg<<L"Expected identifier instead of token '"<<token.text()<<"' of type "<<token.type(); 
       
   723                 throw ParseException(msg.str().c_str(), token);  
       
   724             }
       
   725             return token.text(); 
       
   726         }
       
   727 
       
   728         void Lexer::eatEof() {
       
   729             if (*this) {
       
   730                 Lex::Token token = ((*this)++);
       
   731                 std::wostringstream msg; 
       
   732                 msg<<L"Expected EOF instead of '"<<token.text()<<"' of type "<<token.type(); 
       
   733                 throw ParseException(msg.str().c_str(), token);  
       
   734             }
       
   735         }
       
   736 
       
   737         std::wstring Lexer::eatString() {
       
   738             return Lit::ParseString((*this)++); 
       
   739         }
       
   740 
       
   741         long Lexer::eatInteger() {
       
   742             return Lit::ParseInteger((*this)++); 
       
   743         }
       
   744 
       
   745         double Lexer::eatReal() {
       
   746             return Lit::ParseReal((*this)++); 
       
   747         }
       
   748 
       
   749         StdLexer::StdLexer(Lex::Tokenizer& tokenizer, const wchar_t* text) 
       
   750             : Lexer(ws_),
       
   751               tokens_(tokenizer, text), 
       
   752               ws_(tokens_)
       
   753               
       
   754         {}
       
   755 		
       
   756 		
       
   757     } // Parser
       
   758 } // Cpt
       
   759