diff -r 000000000000 -r dfb7c4ff071f commsfwtools/commstools/utracedecoder/src/messagedefparser/definitiontokenizer.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commsfwtools/commstools/utracedecoder/src/messagedefparser/definitiontokenizer.cpp Thu Dec 17 09:22:25 2009 +0200 @@ -0,0 +1,616 @@ +// Copyright (c) 2007-2009 Nokia Corporation and/or its subsidiary(-ies). +// All rights reserved. +// This component and the accompanying materials are made available +// under the terms of "Eclipse Public License v1.0" +// which accompanies this distribution, and is available +// at the URL "http://www.eclipse.org/legal/epl-v10.html". +// +// Initial Contributors: +// Nokia Corporation - initial contribution. +// +// Contributors: +// +// Description: +// + +#include +#include +#include + +#include "messagedefparser\definitiontokenizer.h" + +namespace Tokens +{ + +struct TTokenTypeText + { + const char* iText; + TTokenType iType; + }; + + +static const TTokenTypeText keywords[] = + { + { "enum", EEnum }, + { "message", EMessage }, + { "struct", EStruct }, + { "signature", ESignature }, + { "context", EContext }, + { "end", EEnd }, + { "const", EConst }, + { "include", EInclude }, + { "alias", EAlias }, + + { "decimal", EDisplayDec }, + { "hex", EDisplayHex }, + + { "uint8", EIntType }, + { "uint16", EIntType }, + { "uint32", EIntType }, + { "int8", EIntType }, + { "int16", EIntType }, + { "int32", EIntType }, + { "tmessageid", EMessageIdType }, + { "pad", EPadType }, + + { "typeid", ETypeId }, + { "messageid", EMessageId }, + + { NULL, EUnknown } + }; + + +static const TTokenTypeText tokenTypes[] = + { + { "EColon", EColon }, + { "EEquals", EEquals }, + { "EInclude", EInclude }, + { "EAlias", EAlias }, + { "EConst", EConst }, + { "EEnd", EEnd }, + { "EEnum", EEnum }, + { "EStruct", EStruct }, + { "ESignature", ESignature }, + { "EContext", EContext }, + { "EMessage", EMessage }, + { "EIdentifier", EIdentifier }, + { "ENumberDec", ENumberDec }, + { "ENumberHex", ENumberHex }, + { "EIntType", EIntType }, + { "EPadType", EPadType }, + { "EString", EString }, + { "ETypeId", ETypeId }, + { "EMessageId", EMessageId }, + { "EMessageIdType", EMessageIdType }, + { "EUnknown", EUnknown } + }; + + +const char* TokenTypeToString(Tokens::TTokenType aType) + { + int i = 0; + while (tokenTypes[i].iType != EUnknown) + { + if (aType == tokenTypes[i].iType) + { + break; + } + ++i; + } + return tokenTypes[i].iText; + } + + +CDefinitionTokenizer::CDefinitionTokenizer() + { + this->iToken = new char[KMaxTokenSize]; + this->iInputBuffer = new char[KBufferSize]; + this->iTokenOffset = 0; + this->iLastBufferOffset = 0; + this->iBufferValid = false; + this->iBufferOffset = 0; + this->iLine = 1; + } + + +CDefinitionTokenizer::~CDefinitionTokenizer() + { + iInputFile.close(); + delete iToken; + delete iInputBuffer; + } + + +Tokens::TResult CDefinitionTokenizer::LoadDefinitionFile(const std::string& aFilename) + { + iInputFile.open(aFilename.c_str(), std::ios::in); + iLine = 1; + if (iInputFile.is_open()) + { + RefillBuffer(); + iError = ENoError; + } + else + { + iError = EFileNotFound; + } + + return iError; + } + + +Tokens::TResult CDefinitionTokenizer::GetNextToken() + { + iTokenOffset = 0; + iLastBufferOffset = iBufferOffset; + iTokenType = EUnknown; +// TTokenState state = EStateStartToken; + iState = EStateStartToken; + iError = EUnexpectedToken; + + while (iBufferValid) + { + // start to collect the token + while (iBufferOffset < iBufferSize) + { + if (iTokenOffset == KMaxTokenSize) + { + iError = ETokenTooBig; + return ETokenTooBig; + } + + iToken[iTokenOffset] = iInputBuffer[iBufferOffset]; + if (iToken[iTokenOffset] == '\r') + { + ++iBufferOffset; + continue; + } + + switch (iState) + { + case EStateStartToken: + iState = ProcessStateStartToken(); + break; + + case EStateMaybeComment: + iState = ProcessStateMaybeComment(); + break; + + case EStateMultiLineComment: + case EStateMaybeEndMultiLineComment: + iState = ProcessStateMultiLineComment(); + break; + + case EStateComment: + iState = ProcessStateComment(); + break; + + case EStateNumber: + iState = ProcessStateNumber(); + break; + + case EStateDecimalNumber: + iState = ProcessStateDecimalNumber(); + break; + + case EStateMaybeHexNumber: + iState = ProcessStateMaybeHexNumber(); + break; + + case EStateHexNumber: + iState = ProcessStateHexNumber(); + break; + + case EStateIdentifier: + iState = ProcessStateIdentifier(); + break; + + case EStateMaybeString: + iState = ProcessStateMaybeString(); + break; + + case EStateMaybeNegativeNumber: + iState = ProcessStateMaybeNegativeNumber(); + break; + + default: + iState = EStateError; + iError = EUnknownState; + break; + } + + + // New state processing + switch (iState) + { + case EStateError: + iTokenType = EUnknown; + iToken[++iTokenOffset] = 0; // include the invalid character in the token + iBufferOffset++; + return iError; + break; + + case EStateStartToken: + iTokenOffset = 0; + iLastBufferOffset = iBufferOffset; + iBufferOffset++; + break; + + case EStateComplete: + // TODO: clear error + iLastBufferOffset = iBufferOffset; + iToken[iTokenOffset] = 0; + if (iTokenType == EIdentifier) + { + ExamineIdentifierForKeyword(); + } + iError = ETokenFound; + return ETokenFound; + //break; + + case EStateComment: + case EStateMultiLineComment: + case EStateMaybeEndMultiLineComment: + iTokenOffset = 0; + iLastBufferOffset = iBufferOffset; + iBufferOffset++; + break; + + default: + ++iBufferOffset; + ++iTokenOffset; + break; + } + } + + RefillBuffer(); + } + + iToken[iTokenOffset] = 0; + if (iTokenOffset == 0) + { + iError = EEndOfFile; + } + else + { + if (iTokenType == EIdentifier) + { + ExamineIdentifierForKeyword(); + } + iError = ETokenFound; + } + + return iError; + } + + +void CDefinitionTokenizer::ExamineIdentifierForKeyword() + { + int i = 0; + while (keywords[i].iText != NULL) + { + if (!_strcmpi(keywords[i].iText, iToken)) + { + iTokenType = keywords[i].iType; + break; + } + ++i; + } + } + + +void CDefinitionTokenizer::RefillBuffer() + { + int bufferSpace = KBufferSize; + int bufferInUse = 0; + + if (iLastBufferOffset) + { + bufferInUse = iBufferSize - iLastBufferOffset; + bufferSpace = KBufferSize - bufferInUse; + memcpy(iInputBuffer, &iInputBuffer[iLastBufferOffset], bufferInUse); + } + + memset(&iInputBuffer[bufferInUse], 0, bufferSpace); + iInputFile.read(&iInputBuffer[bufferInUse], bufferSpace); + iBufferSize = iInputFile.gcount(); + iBufferValid = (iBufferSize > 0); + iBufferSize += bufferInUse; + + iBufferOffset = bufferInUse; + iLastBufferOffset = 0; + } + + +CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateStartToken() + { + TTokenState nextState = EStateError; + iTokenType = EUnknown; + + switch (iToken[0]) + { + // single char tokens + case '=': + iToken[++iTokenOffset] = 0; + ++iBufferOffset; + nextState = EStateComplete; + iTokenType = EEquals; + break; + + case ':': + iToken[++iTokenOffset] = 0; + ++iBufferOffset; + nextState = EStateComplete; + iTokenType = EColon; + break; + + case ' ': + case '\t': + // consume leading whitespace + nextState = EStateStartToken; + break; + + case '\n': + ++iLine; + nextState = EStateStartToken; + break; + + case '\"': + nextState = EStateMaybeString; + --iTokenOffset; // don't include the quotes in the token + break; + + case '/': + nextState = EStateMaybeComment; + break; + + case '-': + nextState = EStateMaybeNegativeNumber; + break; + + case '0': + nextState = EStateNumber; + iTokenType = ENumberDec; + break; + + default: + if (iToken[iTokenOffset] >= '1' && iToken[iTokenOffset] <= '9') + { + nextState = EStateDecimalNumber; + iTokenType = ENumberDec; + } + else if (tolower(iToken[iTokenOffset]) >= 'a'&& tolower(iToken[iTokenOffset]) <= 'z') + { + nextState = EStateIdentifier; + iTokenType = EIdentifier; + } + break; + } + + return nextState; + } + + +CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMaybeNegativeNumber() + { + TTokenState nextState = EStateError; + + switch (iToken[iTokenOffset]) + { + case '0': + nextState = EStateNumber; + iTokenType = ENumberDec; + break; + + default: + if (iToken[iTokenOffset] >= '1' && iToken[iTokenOffset] <= '9') + { + nextState = EStateDecimalNumber; + iTokenType = ENumberDec; + } + break; + } + + return nextState; + } + + +CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMaybeComment() + { + TTokenState nextState = EStateError; + + if (iToken[iTokenOffset] == '/') + { + nextState = EStateComment; + } + else if (iToken[iTokenOffset] == '*') + { + nextState = EStateMultiLineComment; + } + + return nextState; + } + + +CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateComment() + { + TTokenState nextState = EStateComment; + + if (iToken[iTokenOffset] == '\n') + { + --iBufferOffset; // because the behaviour of moving to EStateStartToken is + // to progress to the next byte - we want to process the '\n' + nextState = EStateStartToken; + iTokenType = EUnknown; + } + + return nextState; + } + + +CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMultiLineComment() + { + TTokenState nextState = EStateMultiLineComment; + + if (iState == EStateMultiLineComment) + { + if (iToken[iTokenOffset] == '*') + { + // Started multi line comment + nextState = EStateMaybeEndMultiLineComment; + } + } + else if (iState == EStateMaybeEndMultiLineComment) + { + if (iToken[iTokenOffset] == '/') + { + nextState = EStateStartToken; + iTokenType = EUnknown; + } + } + + return nextState; + } + + +CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateNumber() + { + TTokenState nextState = EStateError; + + if (IsTerminalChar()) + { + nextState = EStateComplete; + // iTokenType = ENumberDec; + } + else + { + if (tolower(iToken[iTokenOffset]) == 'x') + { + nextState = EStateMaybeHexNumber; + iTokenType = EUnknown; + } + else if (iToken[iTokenOffset] >= '0' && iToken[iTokenOffset] <= '9' ) + { + nextState = EStateDecimalNumber; + // iTokenType = ENumberDec; + } + } + + return nextState; + } + + +CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateDecimalNumber() + { + TTokenState nextState = EStateError; + + if (IsTerminalChar()) + { + nextState = EStateComplete; + // iTokenType = ENumberDec; + } + else + { + if (iToken[iTokenOffset] >= '0' && iToken[iTokenOffset] <= '9') + { + nextState = EStateDecimalNumber; + // iTokenType = ENumberDec; + } + } + return nextState; + } + + +CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMaybeHexNumber() + { + TTokenState nextState = EStateError; + + char c = tolower(iToken[iTokenOffset]); + if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')) + { + nextState = EStateHexNumber; + iTokenType = ENumberHex; + } + + return nextState; + } + + +CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateHexNumber() + { + TTokenState nextState = EStateError; + + if (IsTerminalChar()) + { + nextState = EStateComplete; + // iTokenType = ENumberHex; + } + else + { + char c = tolower(iToken[iTokenOffset]); + if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')) + { + nextState = EStateHexNumber; + // iTokenType = ENumberHex; + } + } + return nextState; + } + + +CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateIdentifier() + { + TTokenState nextState = EStateError; + + if (IsTerminalChar()) + { + nextState = EStateComplete; + // iTokenType = EIdentifier; + } + else + { + char c = tolower(iToken[iTokenOffset]); + if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c == '_')) + { + nextState = EStateIdentifier; + // iTokenType = EIdentifier; + } + } + return nextState; + } + + +CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMaybeString() + { + TTokenState nextState = EStateMaybeString; + + if (iToken[iTokenOffset] == '\"') + { + nextState = EStateComplete; + ++iBufferOffset; // don't want to process the quote again + iTokenType = EString; + } + else if (iToken[iTokenOffset] == '\n') + { + nextState = EStateError; + --iTokenOffset; // don't include the \n in the bad token + iError = EUnterminatedString; + // iTokenType = EUnknown; + } + + return nextState; + } + + +bool CDefinitionTokenizer::IsTerminalChar() + { + char c = iToken[iTokenOffset]; + if (c == ' ' || c == '\t' || c == '\n' || c == ':' || c == '=' || c == '/') + { + return true; + } + return false; + } + +} // namespace Tokens +