commsfwtools/commstools/utracedecoder/src/messagedefparser/definitiontokenizer.cpp
changeset 0 dfb7c4ff071f
equal deleted inserted replaced
-1:000000000000 0:dfb7c4ff071f
       
     1 // Copyright (c) 2007-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 //
       
    15 
       
    16 #include <ctype.h>
       
    17 #include <memory.h>
       
    18 #include <string.h>
       
    19 
       
    20 #include "messagedefparser\definitiontokenizer.h"
       
    21 
       
    22 namespace Tokens
       
    23 {
       
    24 
       
    25 struct TTokenTypeText 
       
    26     {
       
    27     const char* iText;
       
    28     TTokenType iType;
       
    29     };
       
    30 
       
    31 
       
    32 static const TTokenTypeText keywords[] =
       
    33     {
       
    34         { "enum", EEnum },
       
    35         { "message", EMessage },
       
    36         { "struct", EStruct },
       
    37         { "signature", ESignature }, 
       
    38         { "context", EContext },
       
    39         { "end", EEnd },
       
    40         { "const", EConst },
       
    41         { "include", EInclude },
       
    42         { "alias", EAlias },
       
    43 
       
    44         { "decimal", EDisplayDec },
       
    45         { "hex", EDisplayHex },
       
    46 
       
    47         { "uint8", EIntType },
       
    48         { "uint16", EIntType },
       
    49         { "uint32", EIntType },
       
    50         { "int8", EIntType },
       
    51         { "int16", EIntType },
       
    52         { "int32", EIntType },
       
    53         { "tmessageid", EMessageIdType },
       
    54         { "pad", EPadType },
       
    55 
       
    56         { "typeid", ETypeId },
       
    57         { "messageid", EMessageId },
       
    58 
       
    59         { NULL, EUnknown }
       
    60     };
       
    61 
       
    62 
       
    63 static const TTokenTypeText tokenTypes[] =
       
    64     {
       
    65         { "EColon", EColon },
       
    66         { "EEquals", EEquals },
       
    67         { "EInclude", EInclude },
       
    68         { "EAlias", EAlias },
       
    69         { "EConst", EConst },
       
    70         { "EEnd", EEnd },
       
    71         { "EEnum", EEnum },
       
    72         { "EStruct", EStruct },
       
    73         { "ESignature", ESignature },
       
    74         { "EContext", EContext },
       
    75         { "EMessage", EMessage },
       
    76         { "EIdentifier", EIdentifier },
       
    77         { "ENumberDec", ENumberDec },
       
    78         { "ENumberHex", ENumberHex },
       
    79         { "EIntType", EIntType },
       
    80         { "EPadType", EPadType },
       
    81         { "EString", EString },
       
    82         { "ETypeId", ETypeId },
       
    83         { "EMessageId", EMessageId },
       
    84         { "EMessageIdType", EMessageIdType },
       
    85         { "EUnknown", EUnknown }
       
    86     };
       
    87 
       
    88 
       
    89 const char* TokenTypeToString(Tokens::TTokenType aType)
       
    90     {
       
    91     int i = 0;
       
    92     while (tokenTypes[i].iType != EUnknown)
       
    93         {
       
    94         if (aType == tokenTypes[i].iType)
       
    95             {
       
    96             break;
       
    97             }
       
    98         ++i;
       
    99         }
       
   100     return tokenTypes[i].iText;
       
   101     }
       
   102 
       
   103 
       
   104 CDefinitionTokenizer::CDefinitionTokenizer()
       
   105     {
       
   106     this->iToken = new char[KMaxTokenSize];
       
   107     this->iInputBuffer = new char[KBufferSize];
       
   108     this->iTokenOffset = 0;
       
   109     this->iLastBufferOffset = 0;
       
   110     this->iBufferValid = false;
       
   111     this->iBufferOffset = 0;
       
   112     this->iLine = 1;
       
   113     }
       
   114 
       
   115 
       
   116 CDefinitionTokenizer::~CDefinitionTokenizer()
       
   117     {
       
   118     iInputFile.close();
       
   119     delete iToken;
       
   120     delete iInputBuffer;
       
   121     }
       
   122 
       
   123 
       
   124 Tokens::TResult CDefinitionTokenizer::LoadDefinitionFile(const std::string& aFilename)
       
   125     {
       
   126     iInputFile.open(aFilename.c_str(), std::ios::in);
       
   127     iLine = 1;
       
   128     if (iInputFile.is_open())
       
   129         {
       
   130         RefillBuffer();
       
   131         iError = ENoError;
       
   132         }
       
   133     else
       
   134         {
       
   135         iError = EFileNotFound;
       
   136         }
       
   137     
       
   138     return iError;
       
   139     }
       
   140 
       
   141 
       
   142 Tokens::TResult CDefinitionTokenizer::GetNextToken()
       
   143     {
       
   144     iTokenOffset = 0;
       
   145     iLastBufferOffset = iBufferOffset;
       
   146     iTokenType = EUnknown;
       
   147 //    TTokenState state = EStateStartToken;
       
   148     iState = EStateStartToken;
       
   149     iError = EUnexpectedToken;
       
   150 
       
   151     while (iBufferValid)
       
   152         {
       
   153         // start to collect the token
       
   154         while (iBufferOffset < iBufferSize)
       
   155             {
       
   156             if (iTokenOffset == KMaxTokenSize)
       
   157                 {
       
   158                 iError = ETokenTooBig;
       
   159                 return ETokenTooBig;
       
   160                 }
       
   161 
       
   162             iToken[iTokenOffset] = iInputBuffer[iBufferOffset];
       
   163             if (iToken[iTokenOffset] == '\r')
       
   164                 {
       
   165                 ++iBufferOffset;
       
   166                 continue;
       
   167                 }
       
   168             
       
   169             switch (iState)
       
   170                 {
       
   171                 case EStateStartToken:
       
   172                     iState = ProcessStateStartToken();
       
   173                     break;
       
   174                 
       
   175                 case EStateMaybeComment:
       
   176                     iState = ProcessStateMaybeComment();
       
   177                     break;
       
   178 
       
   179                 case EStateMultiLineComment:
       
   180                 case EStateMaybeEndMultiLineComment:
       
   181                     iState = ProcessStateMultiLineComment();
       
   182                     break;
       
   183 
       
   184                 case EStateComment:
       
   185                     iState = ProcessStateComment();
       
   186                     break;
       
   187 
       
   188                 case EStateNumber:
       
   189                     iState = ProcessStateNumber();
       
   190                     break;
       
   191 
       
   192                 case EStateDecimalNumber:
       
   193                     iState = ProcessStateDecimalNumber();
       
   194                     break;
       
   195 
       
   196                 case EStateMaybeHexNumber:
       
   197                     iState = ProcessStateMaybeHexNumber();
       
   198                     break;
       
   199 
       
   200                 case EStateHexNumber:
       
   201                     iState = ProcessStateHexNumber();
       
   202                     break;
       
   203 
       
   204                 case EStateIdentifier:
       
   205                     iState = ProcessStateIdentifier();
       
   206                     break;
       
   207 
       
   208                 case EStateMaybeString:
       
   209                     iState = ProcessStateMaybeString();
       
   210                     break;
       
   211 
       
   212                 case EStateMaybeNegativeNumber:
       
   213                     iState = ProcessStateMaybeNegativeNumber();
       
   214                     break;
       
   215 
       
   216                 default:
       
   217                     iState = EStateError;
       
   218                     iError = EUnknownState;
       
   219                     break;
       
   220                 }
       
   221 
       
   222             
       
   223             // New state processing
       
   224             switch (iState)
       
   225                 {
       
   226                 case EStateError:
       
   227                     iTokenType = EUnknown;
       
   228                     iToken[++iTokenOffset] = 0; // include the invalid character in the token
       
   229                     iBufferOffset++;
       
   230                     return iError;
       
   231                     break;
       
   232 
       
   233                 case EStateStartToken:
       
   234                     iTokenOffset = 0; 
       
   235                     iLastBufferOffset = iBufferOffset;
       
   236                     iBufferOffset++;
       
   237                     break;
       
   238                 
       
   239                 case EStateComplete:
       
   240                     // TODO: clear error
       
   241                     iLastBufferOffset = iBufferOffset;
       
   242                     iToken[iTokenOffset] = 0;
       
   243                     if (iTokenType == EIdentifier)
       
   244                         {
       
   245                         ExamineIdentifierForKeyword();
       
   246                         }
       
   247                     iError = ETokenFound;
       
   248                     return ETokenFound;
       
   249                     //break;
       
   250 
       
   251                 case EStateComment:
       
   252                 case EStateMultiLineComment:
       
   253                 case EStateMaybeEndMultiLineComment:
       
   254                     iTokenOffset = 0;
       
   255                     iLastBufferOffset = iBufferOffset;
       
   256                     iBufferOffset++;
       
   257                     break;
       
   258 
       
   259                 default:
       
   260                     ++iBufferOffset;
       
   261                     ++iTokenOffset;
       
   262                     break;
       
   263                 }
       
   264             }
       
   265         
       
   266         RefillBuffer();
       
   267         }
       
   268 
       
   269     iToken[iTokenOffset] = 0;
       
   270     if (iTokenOffset == 0)
       
   271         {
       
   272         iError = EEndOfFile;
       
   273         }
       
   274     else
       
   275         {
       
   276         if (iTokenType == EIdentifier)
       
   277             {
       
   278             ExamineIdentifierForKeyword();
       
   279             }
       
   280         iError = ETokenFound;
       
   281         }
       
   282 
       
   283     return iError;
       
   284     }
       
   285 
       
   286 
       
   287 void CDefinitionTokenizer::ExamineIdentifierForKeyword()
       
   288     {
       
   289     int i = 0;
       
   290     while (keywords[i].iText != NULL)
       
   291         {
       
   292         if (!_strcmpi(keywords[i].iText, iToken))
       
   293             {
       
   294             iTokenType = keywords[i].iType;
       
   295             break;
       
   296             }
       
   297         ++i;
       
   298         }
       
   299     }
       
   300 
       
   301 
       
   302 void CDefinitionTokenizer::RefillBuffer()
       
   303     {
       
   304     int bufferSpace = KBufferSize;
       
   305     int bufferInUse = 0;
       
   306 
       
   307     if (iLastBufferOffset)
       
   308         {
       
   309         bufferInUse = iBufferSize - iLastBufferOffset;
       
   310         bufferSpace = KBufferSize - bufferInUse;
       
   311         memcpy(iInputBuffer, &iInputBuffer[iLastBufferOffset], bufferInUse);
       
   312         }
       
   313 
       
   314     memset(&iInputBuffer[bufferInUse], 0, bufferSpace);
       
   315     iInputFile.read(&iInputBuffer[bufferInUse], bufferSpace);
       
   316     iBufferSize = iInputFile.gcount();
       
   317     iBufferValid = (iBufferSize > 0);
       
   318     iBufferSize += bufferInUse;
       
   319 
       
   320     iBufferOffset = bufferInUse;
       
   321     iLastBufferOffset = 0;
       
   322     }
       
   323 
       
   324 
       
   325 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateStartToken()
       
   326     {
       
   327     TTokenState nextState = EStateError;
       
   328     iTokenType = EUnknown;
       
   329 
       
   330     switch (iToken[0])
       
   331         {
       
   332         // single char tokens
       
   333         case '=':
       
   334             iToken[++iTokenOffset] = 0;
       
   335             ++iBufferOffset;
       
   336             nextState = EStateComplete;
       
   337             iTokenType = EEquals;
       
   338             break;
       
   339 
       
   340         case ':':
       
   341             iToken[++iTokenOffset] = 0;
       
   342             ++iBufferOffset;
       
   343             nextState = EStateComplete;
       
   344             iTokenType = EColon;
       
   345             break;
       
   346 
       
   347         case ' ':
       
   348         case '\t':
       
   349             // consume leading whitespace
       
   350             nextState = EStateStartToken;
       
   351             break;
       
   352 
       
   353         case '\n':
       
   354             ++iLine;
       
   355             nextState = EStateStartToken;
       
   356             break;
       
   357 
       
   358         case '\"':
       
   359             nextState = EStateMaybeString;
       
   360             --iTokenOffset; // don't include the quotes in the token
       
   361             break;
       
   362 
       
   363         case '/':
       
   364             nextState = EStateMaybeComment; 
       
   365             break;
       
   366 
       
   367         case '-':
       
   368             nextState = EStateMaybeNegativeNumber;
       
   369             break;
       
   370 
       
   371         case '0':
       
   372             nextState = EStateNumber;
       
   373             iTokenType = ENumberDec;
       
   374             break;
       
   375 
       
   376         default:
       
   377             if (iToken[iTokenOffset] >= '1' && iToken[iTokenOffset] <= '9')
       
   378                 {
       
   379                 nextState = EStateDecimalNumber;
       
   380                 iTokenType = ENumberDec;
       
   381                 }
       
   382             else if (tolower(iToken[iTokenOffset]) >= 'a'&& tolower(iToken[iTokenOffset]) <= 'z')
       
   383                 {
       
   384                 nextState = EStateIdentifier;
       
   385                 iTokenType = EIdentifier;
       
   386                 }
       
   387             break;
       
   388         }
       
   389 
       
   390     return nextState;
       
   391     }
       
   392 
       
   393 
       
   394 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMaybeNegativeNumber()
       
   395     {
       
   396     TTokenState nextState = EStateError;
       
   397 
       
   398     switch (iToken[iTokenOffset])
       
   399         {
       
   400         case '0':
       
   401             nextState = EStateNumber;
       
   402             iTokenType = ENumberDec;
       
   403             break;
       
   404 
       
   405         default:
       
   406             if (iToken[iTokenOffset] >= '1' && iToken[iTokenOffset] <= '9')
       
   407                 {
       
   408                 nextState = EStateDecimalNumber;
       
   409                 iTokenType = ENumberDec;
       
   410                 }
       
   411             break;
       
   412         }
       
   413 
       
   414     return nextState;
       
   415     }
       
   416 
       
   417 
       
   418 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMaybeComment()
       
   419     {
       
   420     TTokenState nextState = EStateError;
       
   421 
       
   422     if (iToken[iTokenOffset] == '/')
       
   423         {
       
   424         nextState = EStateComment; 
       
   425         }
       
   426     else if (iToken[iTokenOffset] == '*')
       
   427         {
       
   428         nextState = EStateMultiLineComment;
       
   429         }
       
   430 
       
   431     return nextState;
       
   432     }
       
   433 
       
   434 
       
   435 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateComment()
       
   436     {
       
   437     TTokenState nextState = EStateComment;
       
   438 
       
   439     if (iToken[iTokenOffset] == '\n')
       
   440         {
       
   441         --iBufferOffset; // because the behaviour of moving to EStateStartToken is
       
   442                          // to progress to the next byte - we want to process the '\n'
       
   443         nextState = EStateStartToken; 
       
   444         iTokenType = EUnknown;
       
   445         }
       
   446 
       
   447     return nextState;
       
   448     }
       
   449 
       
   450 
       
   451 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMultiLineComment()
       
   452     {
       
   453     TTokenState nextState = EStateMultiLineComment;
       
   454 
       
   455     if (iState == EStateMultiLineComment)
       
   456         {
       
   457         if (iToken[iTokenOffset] == '*')
       
   458             {
       
   459             // Started multi line comment
       
   460             nextState = EStateMaybeEndMultiLineComment;
       
   461             }
       
   462         }
       
   463     else if (iState == EStateMaybeEndMultiLineComment)
       
   464         {
       
   465         if (iToken[iTokenOffset] == '/')
       
   466             {
       
   467             nextState = EStateStartToken; 
       
   468             iTokenType = EUnknown;
       
   469             }
       
   470         }
       
   471 
       
   472     return nextState;
       
   473     }
       
   474 
       
   475 
       
   476 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateNumber()
       
   477     {
       
   478     TTokenState nextState = EStateError;
       
   479 
       
   480     if (IsTerminalChar())
       
   481         {
       
   482         nextState = EStateComplete;
       
   483         // iTokenType = ENumberDec; 
       
   484         }
       
   485     else
       
   486         {
       
   487         if (tolower(iToken[iTokenOffset]) == 'x')
       
   488             {
       
   489             nextState = EStateMaybeHexNumber;
       
   490             iTokenType = EUnknown;
       
   491             }
       
   492         else if (iToken[iTokenOffset] >= '0' && iToken[iTokenOffset] <= '9' )
       
   493             {
       
   494             nextState = EStateDecimalNumber;
       
   495             // iTokenType = ENumberDec;
       
   496             }
       
   497         }
       
   498 
       
   499     return nextState;
       
   500     }
       
   501 
       
   502 
       
   503 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateDecimalNumber()
       
   504     {
       
   505     TTokenState nextState = EStateError;
       
   506 
       
   507     if (IsTerminalChar())
       
   508         {
       
   509         nextState = EStateComplete;
       
   510         // iTokenType = ENumberDec;
       
   511         }
       
   512     else
       
   513         {
       
   514         if (iToken[iTokenOffset] >= '0' && iToken[iTokenOffset] <= '9')
       
   515             {
       
   516             nextState = EStateDecimalNumber;
       
   517             // iTokenType = ENumberDec;
       
   518             }
       
   519         }
       
   520     return nextState;
       
   521     }
       
   522 
       
   523 
       
   524 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMaybeHexNumber()
       
   525     {
       
   526     TTokenState nextState = EStateError;
       
   527     
       
   528     char c = tolower(iToken[iTokenOffset]);
       
   529     if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'))
       
   530         {
       
   531         nextState = EStateHexNumber;
       
   532         iTokenType = ENumberHex;
       
   533         }
       
   534 
       
   535     return nextState;
       
   536     }
       
   537 
       
   538 
       
   539 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateHexNumber()
       
   540     {
       
   541     TTokenState nextState = EStateError;
       
   542 
       
   543     if (IsTerminalChar())
       
   544         {
       
   545         nextState = EStateComplete;
       
   546         // iTokenType = ENumberHex;
       
   547         }
       
   548     else
       
   549         {
       
   550         char c = tolower(iToken[iTokenOffset]);
       
   551         if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'))
       
   552             {
       
   553             nextState = EStateHexNumber;
       
   554             // iTokenType = ENumberHex;
       
   555             }
       
   556         }
       
   557     return nextState;
       
   558     }
       
   559 
       
   560 
       
   561 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateIdentifier()
       
   562     {
       
   563     TTokenState nextState = EStateError;
       
   564 
       
   565     if (IsTerminalChar())
       
   566         {
       
   567         nextState = EStateComplete;
       
   568         // iTokenType = EIdentifier;
       
   569         }
       
   570     else
       
   571         {
       
   572         char c = tolower(iToken[iTokenOffset]);
       
   573         if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c == '_'))
       
   574             {
       
   575             nextState = EStateIdentifier;
       
   576             // iTokenType = EIdentifier;
       
   577             }
       
   578         }
       
   579     return nextState;
       
   580     }
       
   581 
       
   582 
       
   583 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMaybeString()
       
   584     {
       
   585     TTokenState nextState = EStateMaybeString;
       
   586 
       
   587     if (iToken[iTokenOffset] == '\"')
       
   588         {
       
   589         nextState = EStateComplete;
       
   590         ++iBufferOffset; // don't want to process the quote again
       
   591         iTokenType = EString;
       
   592         }
       
   593     else if (iToken[iTokenOffset] == '\n')
       
   594         {
       
   595         nextState = EStateError;
       
   596         --iTokenOffset; // don't include the \n in the bad token
       
   597         iError = EUnterminatedString;
       
   598         // iTokenType = EUnknown;
       
   599         }
       
   600 
       
   601     return nextState;
       
   602     }
       
   603 
       
   604 
       
   605 bool CDefinitionTokenizer::IsTerminalChar()
       
   606     {
       
   607     char c = iToken[iTokenOffset];
       
   608     if (c == ' ' || c == '\t' || c == '\n' || c == ':' || c == '=' || c == '/')
       
   609         {
       
   610         return true;
       
   611         }
       
   612     return false;
       
   613     }
       
   614 
       
   615 } // namespace Tokens
       
   616