commsfwtools/commstools/utracedecoder/src/messagedefparser/definitiontokenizer.cpp
changeset 0 dfb7c4ff071f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commsfwtools/commstools/utracedecoder/src/messagedefparser/definitiontokenizer.cpp	Thu Dec 17 09:22:25 2009 +0200
@@ -0,0 +1,616 @@
+// Copyright (c) 2007-2009 Nokia Corporation and/or its subsidiary(-ies).
+// All rights reserved.
+// This component and the accompanying materials are made available
+// under the terms of "Eclipse Public License v1.0"
+// which accompanies this distribution, and is available
+// at the URL "http://www.eclipse.org/legal/epl-v10.html".
+//
+// Initial Contributors:
+// Nokia Corporation - initial contribution.
+//
+// Contributors:
+//
+// Description:
+//
+
+#include <ctype.h>
+#include <memory.h>
+#include <string.h>
+
+#include "messagedefparser\definitiontokenizer.h"
+
+namespace Tokens
+{
+
+struct TTokenTypeText 
+    {
+    const char* iText;
+    TTokenType iType;
+    };
+
+
+static const TTokenTypeText keywords[] =
+    {
+        { "enum", EEnum },
+        { "message", EMessage },
+        { "struct", EStruct },
+        { "signature", ESignature }, 
+        { "context", EContext },
+        { "end", EEnd },
+        { "const", EConst },
+        { "include", EInclude },
+        { "alias", EAlias },
+
+        { "decimal", EDisplayDec },
+        { "hex", EDisplayHex },
+
+        { "uint8", EIntType },
+        { "uint16", EIntType },
+        { "uint32", EIntType },
+        { "int8", EIntType },
+        { "int16", EIntType },
+        { "int32", EIntType },
+        { "tmessageid", EMessageIdType },
+        { "pad", EPadType },
+
+        { "typeid", ETypeId },
+        { "messageid", EMessageId },
+
+        { NULL, EUnknown }
+    };
+
+
+static const TTokenTypeText tokenTypes[] =
+    {
+        { "EColon", EColon },
+        { "EEquals", EEquals },
+        { "EInclude", EInclude },
+        { "EAlias", EAlias },
+        { "EConst", EConst },
+        { "EEnd", EEnd },
+        { "EEnum", EEnum },
+        { "EStruct", EStruct },
+        { "ESignature", ESignature },
+        { "EContext", EContext },
+        { "EMessage", EMessage },
+        { "EIdentifier", EIdentifier },
+        { "ENumberDec", ENumberDec },
+        { "ENumberHex", ENumberHex },
+        { "EIntType", EIntType },
+        { "EPadType", EPadType },
+        { "EString", EString },
+        { "ETypeId", ETypeId },
+        { "EMessageId", EMessageId },
+        { "EMessageIdType", EMessageIdType },
+        { "EUnknown", EUnknown }
+    };
+
+
+const char* TokenTypeToString(Tokens::TTokenType aType)
+    {
+    int i = 0;
+    while (tokenTypes[i].iType != EUnknown)
+        {
+        if (aType == tokenTypes[i].iType)
+            {
+            break;
+            }
+        ++i;
+        }
+    return tokenTypes[i].iText;
+    }
+
+
+CDefinitionTokenizer::CDefinitionTokenizer()
+    {
+    this->iToken = new char[KMaxTokenSize];
+    this->iInputBuffer = new char[KBufferSize];
+    this->iTokenOffset = 0;
+    this->iLastBufferOffset = 0;
+    this->iBufferValid = false;
+    this->iBufferOffset = 0;
+    this->iLine = 1;
+    }
+
+
+CDefinitionTokenizer::~CDefinitionTokenizer()
+    {
+    iInputFile.close();
+    delete iToken;
+    delete iInputBuffer;
+    }
+
+
+Tokens::TResult CDefinitionTokenizer::LoadDefinitionFile(const std::string& aFilename)
+    {
+    iInputFile.open(aFilename.c_str(), std::ios::in);
+    iLine = 1;
+    if (iInputFile.is_open())
+        {
+        RefillBuffer();
+        iError = ENoError;
+        }
+    else
+        {
+        iError = EFileNotFound;
+        }
+    
+    return iError;
+    }
+
+
+Tokens::TResult CDefinitionTokenizer::GetNextToken()
+    {
+    iTokenOffset = 0;
+    iLastBufferOffset = iBufferOffset;
+    iTokenType = EUnknown;
+//    TTokenState state = EStateStartToken;
+    iState = EStateStartToken;
+    iError = EUnexpectedToken;
+
+    while (iBufferValid)
+        {
+        // start to collect the token
+        while (iBufferOffset < iBufferSize)
+            {
+            if (iTokenOffset == KMaxTokenSize)
+                {
+                iError = ETokenTooBig;
+                return ETokenTooBig;
+                }
+
+            iToken[iTokenOffset] = iInputBuffer[iBufferOffset];
+            if (iToken[iTokenOffset] == '\r')
+                {
+                ++iBufferOffset;
+                continue;
+                }
+            
+            switch (iState)
+                {
+                case EStateStartToken:
+                    iState = ProcessStateStartToken();
+                    break;
+                
+                case EStateMaybeComment:
+                    iState = ProcessStateMaybeComment();
+                    break;
+
+                case EStateMultiLineComment:
+                case EStateMaybeEndMultiLineComment:
+                    iState = ProcessStateMultiLineComment();
+                    break;
+
+                case EStateComment:
+                    iState = ProcessStateComment();
+                    break;
+
+                case EStateNumber:
+                    iState = ProcessStateNumber();
+                    break;
+
+                case EStateDecimalNumber:
+                    iState = ProcessStateDecimalNumber();
+                    break;
+
+                case EStateMaybeHexNumber:
+                    iState = ProcessStateMaybeHexNumber();
+                    break;
+
+                case EStateHexNumber:
+                    iState = ProcessStateHexNumber();
+                    break;
+
+                case EStateIdentifier:
+                    iState = ProcessStateIdentifier();
+                    break;
+
+                case EStateMaybeString:
+                    iState = ProcessStateMaybeString();
+                    break;
+
+                case EStateMaybeNegativeNumber:
+                    iState = ProcessStateMaybeNegativeNumber();
+                    break;
+
+                default:
+                    iState = EStateError;
+                    iError = EUnknownState;
+                    break;
+                }
+
+            
+            // New state processing
+            switch (iState)
+                {
+                case EStateError:
+                    iTokenType = EUnknown;
+                    iToken[++iTokenOffset] = 0; // include the invalid character in the token
+                    iBufferOffset++;
+                    return iError;
+                    break;
+
+                case EStateStartToken:
+                    iTokenOffset = 0; 
+                    iLastBufferOffset = iBufferOffset;
+                    iBufferOffset++;
+                    break;
+                
+                case EStateComplete:
+                    // TODO: clear error
+                    iLastBufferOffset = iBufferOffset;
+                    iToken[iTokenOffset] = 0;
+                    if (iTokenType == EIdentifier)
+                        {
+                        ExamineIdentifierForKeyword();
+                        }
+                    iError = ETokenFound;
+                    return ETokenFound;
+                    //break;
+
+                case EStateComment:
+                case EStateMultiLineComment:
+                case EStateMaybeEndMultiLineComment:
+                    iTokenOffset = 0;
+                    iLastBufferOffset = iBufferOffset;
+                    iBufferOffset++;
+                    break;
+
+                default:
+                    ++iBufferOffset;
+                    ++iTokenOffset;
+                    break;
+                }
+            }
+        
+        RefillBuffer();
+        }
+
+    iToken[iTokenOffset] = 0;
+    if (iTokenOffset == 0)
+        {
+        iError = EEndOfFile;
+        }
+    else
+        {
+        if (iTokenType == EIdentifier)
+            {
+            ExamineIdentifierForKeyword();
+            }
+        iError = ETokenFound;
+        }
+
+    return iError;
+    }
+
+
+void CDefinitionTokenizer::ExamineIdentifierForKeyword()
+    {
+    int i = 0;
+    while (keywords[i].iText != NULL)
+        {
+        if (!_strcmpi(keywords[i].iText, iToken))
+            {
+            iTokenType = keywords[i].iType;
+            break;
+            }
+        ++i;
+        }
+    }
+
+
+void CDefinitionTokenizer::RefillBuffer()
+    {
+    int bufferSpace = KBufferSize;
+    int bufferInUse = 0;
+
+    if (iLastBufferOffset)
+        {
+        bufferInUse = iBufferSize - iLastBufferOffset;
+        bufferSpace = KBufferSize - bufferInUse;
+        memcpy(iInputBuffer, &iInputBuffer[iLastBufferOffset], bufferInUse);
+        }
+
+    memset(&iInputBuffer[bufferInUse], 0, bufferSpace);
+    iInputFile.read(&iInputBuffer[bufferInUse], bufferSpace);
+    iBufferSize = iInputFile.gcount();
+    iBufferValid = (iBufferSize > 0);
+    iBufferSize += bufferInUse;
+
+    iBufferOffset = bufferInUse;
+    iLastBufferOffset = 0;
+    }
+
+
+CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateStartToken()
+    {
+    TTokenState nextState = EStateError;
+    iTokenType = EUnknown;
+
+    switch (iToken[0])
+        {
+        // single char tokens
+        case '=':
+            iToken[++iTokenOffset] = 0;
+            ++iBufferOffset;
+            nextState = EStateComplete;
+            iTokenType = EEquals;
+            break;
+
+        case ':':
+            iToken[++iTokenOffset] = 0;
+            ++iBufferOffset;
+            nextState = EStateComplete;
+            iTokenType = EColon;
+            break;
+
+        case ' ':
+        case '\t':
+            // consume leading whitespace
+            nextState = EStateStartToken;
+            break;
+
+        case '\n':
+            ++iLine;
+            nextState = EStateStartToken;
+            break;
+
+        case '\"':
+            nextState = EStateMaybeString;
+            --iTokenOffset; // don't include the quotes in the token
+            break;
+
+        case '/':
+            nextState = EStateMaybeComment; 
+            break;
+
+        case '-':
+            nextState = EStateMaybeNegativeNumber;
+            break;
+
+        case '0':
+            nextState = EStateNumber;
+            iTokenType = ENumberDec;
+            break;
+
+        default:
+            if (iToken[iTokenOffset] >= '1' && iToken[iTokenOffset] <= '9')
+                {
+                nextState = EStateDecimalNumber;
+                iTokenType = ENumberDec;
+                }
+            else if (tolower(iToken[iTokenOffset]) >= 'a'&& tolower(iToken[iTokenOffset]) <= 'z')
+                {
+                nextState = EStateIdentifier;
+                iTokenType = EIdentifier;
+                }
+            break;
+        }
+
+    return nextState;
+    }
+
+
+CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMaybeNegativeNumber()
+    {
+    TTokenState nextState = EStateError;
+
+    switch (iToken[iTokenOffset])
+        {
+        case '0':
+            nextState = EStateNumber;
+            iTokenType = ENumberDec;
+            break;
+
+        default:
+            if (iToken[iTokenOffset] >= '1' && iToken[iTokenOffset] <= '9')
+                {
+                nextState = EStateDecimalNumber;
+                iTokenType = ENumberDec;
+                }
+            break;
+        }
+
+    return nextState;
+    }
+
+
+CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMaybeComment()
+    {
+    TTokenState nextState = EStateError;
+
+    if (iToken[iTokenOffset] == '/')
+        {
+        nextState = EStateComment; 
+        }
+    else if (iToken[iTokenOffset] == '*')
+        {
+        nextState = EStateMultiLineComment;
+        }
+
+    return nextState;
+    }
+
+
+CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateComment()
+    {
+    TTokenState nextState = EStateComment;
+
+    if (iToken[iTokenOffset] == '\n')
+        {
+        --iBufferOffset; // because the behaviour of moving to EStateStartToken is
+                         // to progress to the next byte - we want to process the '\n'
+        nextState = EStateStartToken; 
+        iTokenType = EUnknown;
+        }
+
+    return nextState;
+    }
+
+
+CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMultiLineComment()
+    {
+    TTokenState nextState = EStateMultiLineComment;
+
+    if (iState == EStateMultiLineComment)
+        {
+        if (iToken[iTokenOffset] == '*')
+            {
+            // Started multi line comment
+            nextState = EStateMaybeEndMultiLineComment;
+            }
+        }
+    else if (iState == EStateMaybeEndMultiLineComment)
+        {
+        if (iToken[iTokenOffset] == '/')
+            {
+            nextState = EStateStartToken; 
+            iTokenType = EUnknown;
+            }
+        }
+
+    return nextState;
+    }
+
+
+CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateNumber()
+    {
+    TTokenState nextState = EStateError;
+
+    if (IsTerminalChar())
+        {
+        nextState = EStateComplete;
+        // iTokenType = ENumberDec; 
+        }
+    else
+        {
+        if (tolower(iToken[iTokenOffset]) == 'x')
+            {
+            nextState = EStateMaybeHexNumber;
+            iTokenType = EUnknown;
+            }
+        else if (iToken[iTokenOffset] >= '0' && iToken[iTokenOffset] <= '9' )
+            {
+            nextState = EStateDecimalNumber;
+            // iTokenType = ENumberDec;
+            }
+        }
+
+    return nextState;
+    }
+
+
+CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateDecimalNumber()
+    {
+    TTokenState nextState = EStateError;
+
+    if (IsTerminalChar())
+        {
+        nextState = EStateComplete;
+        // iTokenType = ENumberDec;
+        }
+    else
+        {
+        if (iToken[iTokenOffset] >= '0' && iToken[iTokenOffset] <= '9')
+            {
+            nextState = EStateDecimalNumber;
+            // iTokenType = ENumberDec;
+            }
+        }
+    return nextState;
+    }
+
+
+CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMaybeHexNumber()
+    {
+    TTokenState nextState = EStateError;
+    
+    char c = tolower(iToken[iTokenOffset]);
+    if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'))
+        {
+        nextState = EStateHexNumber;
+        iTokenType = ENumberHex;
+        }
+
+    return nextState;
+    }
+
+
+CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateHexNumber()
+    {
+    TTokenState nextState = EStateError;
+
+    if (IsTerminalChar())
+        {
+        nextState = EStateComplete;
+        // iTokenType = ENumberHex;
+        }
+    else
+        {
+        char c = tolower(iToken[iTokenOffset]);
+        if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'))
+            {
+            nextState = EStateHexNumber;
+            // iTokenType = ENumberHex;
+            }
+        }
+    return nextState;
+    }
+
+
+CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateIdentifier()
+    {
+    TTokenState nextState = EStateError;
+
+    if (IsTerminalChar())
+        {
+        nextState = EStateComplete;
+        // iTokenType = EIdentifier;
+        }
+    else
+        {
+        char c = tolower(iToken[iTokenOffset]);
+        if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c == '_'))
+            {
+            nextState = EStateIdentifier;
+            // iTokenType = EIdentifier;
+            }
+        }
+    return nextState;
+    }
+
+
+CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMaybeString()
+    {
+    TTokenState nextState = EStateMaybeString;
+
+    if (iToken[iTokenOffset] == '\"')
+        {
+        nextState = EStateComplete;
+        ++iBufferOffset; // don't want to process the quote again
+        iTokenType = EString;
+        }
+    else if (iToken[iTokenOffset] == '\n')
+        {
+        nextState = EStateError;
+        --iTokenOffset; // don't include the \n in the bad token
+        iError = EUnterminatedString;
+        // iTokenType = EUnknown;
+        }
+
+    return nextState;
+    }
+
+
+bool CDefinitionTokenizer::IsTerminalChar()
+    {
+    char c = iToken[iTokenOffset];
+    if (c == ' ' || c == '\t' || c == '\n' || c == ':' || c == '=' || c == '/')
+        {
+        return true;
+        }
+    return false;
+    }
+
+} // namespace Tokens
+