tools/porting/src/rpp.cpp
author Eckhart Koeppen <eckhart.koppen@nokia.com>
Thu, 22 Apr 2010 16:15:11 +0300
branchRCL_3
changeset 14 8c4229025c0b
parent 4 3b1da2848fc7
permissions -rw-r--r--
930346f3335f271b808bd69409c708262673ba3a

/****************************************************************************
**
** Copyright (C) 2001-2004 Roberto Raggi
** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
** All rights reserved.
** Contact: Nokia Corporation (qt-info@nokia.com)
**
** This file is part of the qt3to4 porting application of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** No Commercial Usage
** This file contains pre-release code and may not be distributed.
** You may use this file in accordance with the terms and conditions
** contained in the Technology Preview License Agreement accompanying
** this package.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL included in the
** packaging of this file.  Please review the following information to
** ensure the GNU Lesser General Public License version 2.1 requirements
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Nokia gives you certain additional
** rights.  These rights are described in the Nokia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** If you have questions regarding the use of this file, please contact
** Nokia at qt-info@nokia.com.
**
**
**
**
**
**
**
**
** $QT_END_LICENSE$
**
****************************************************************************/

#include "rpp.h"
#include "rppexpressionbuilder.h"

QT_BEGIN_NAMESPACE

using namespace TokenEngine;

namespace Rpp
{

Preprocessor::Preprocessor()
    : lexerTokenIndex(0), numTokens(0)
{

}

Source *Preprocessor::parse(const TokenEngine::TokenContainer &tokenContainer,
                            const QVector<Type> &tokenTypeList, TypedPool<Item> *memoryPool)
{
    m_memoryPool = memoryPool;
    Source *m_source = createNode<Source>(m_memoryPool); //node whith no parent
    m_tokenContainer = tokenContainer;
    m_tokenTypeList = tokenTypeList;
    lexerTokenIndex = 0;
    numTokens = m_tokenContainer.count();

    if(m_tokenContainer.count() != tokenTypeList.count()) {
        emit error(QLatin1String("Error"), QLatin1String("Internal error in preprocessor: Number of tokens is not equal to number of types in type list"));
        return m_source;
    }

    if(tokenTypeList.isEmpty()) {
    //    emit error("Warning:", "Trying to parse empty source file");
        return m_source;
    }
    Q_ASSERT(m_source->toItemComposite());
    parseGroup(m_source);

    return m_source;
}

// group-part
// group group-part
bool Preprocessor::parseGroup(Item *group)
{
    Q_ASSERT(group->toItemComposite());
    bool gotGroup = false;
    while(lexerTokenIndex < numTokens) {
        if (!parseGroupPart(group))
            break;
        gotGroup = true;
    }
    return gotGroup;
}

//if-section        (# if / # ifdef / #ifndef )
//control-line      ( #include / etc )
//# non-directive   ( # text newline
//text-line         (text newline )
bool Preprocessor::parseGroupPart(Item *group)
{
    //cout << "parse group part" << endl;
    Q_ASSERT(group->toItemComposite());

    //look up first significant token
    Type token = lookAhead();
    if(token == Token_eof)
        return false;

    //look for '#'
    if(token != Token_preproc)
        return parseTextLine(group);

    //look up first significant token after the '#'
    token = lookAheadSkipHash();
    if(token == Token_eof)
        return false;

    // Check if we are at the end of a group. This is not an neccesarely an
    // error, it happens when we reach an #endif for example.
    if (token == Token_directive_elif || token == Token_directive_else ||
        token == Token_directive_endif)
        return false;

    // if-section?
    if(token == Token_directive_if || token == Token_directive_ifdef ||
        token == Token_directive_ifndef)
        return parseIfSection(group);

    // control-line?
    if (token == Token_directive_define)
        return parseDefineDirective(group);
    if (token ==  Token_directive_undef)
        return parseUndefDirective(group);
    if (token ==  Token_directive_include)
        return parseIncludeDirective(group);
    if (token == Token_directive_error)
        return parseErrorDirective(group);
    if (token ==  Token_directive_pragma)
        return parsePragmaDirective(group);

    return parseNonDirective(group);
}

// if-section -> if-group elif-groups[opt] else-group[opt] endif-line
bool Preprocessor::parseIfSection(Item *group)
{
   // cout << "parse if section" << endl ;
    Q_ASSERT(group->toItemComposite());
    IfSection *ifSection = createNode<IfSection>(m_memoryPool, group);
    group->toItemComposite()->add(ifSection);

    if (!parseIfGroup(ifSection))
        return false;

    Type type = lookAheadSkipHash();
    if(type == Token_directive_elif)
        if(!parseElifGroups(ifSection))
            return false;

    type = lookAheadSkipHash();
    if(type == Token_directive_else)
        if(!parseElseGroup(ifSection))
            return false;

    return parseEndifLine(ifSection);
}

bool Preprocessor::parseNonDirective(Item *group)
{
 //  cout << "parsenondirective" << endl;
    Q_ASSERT(group->toItemComposite());
    TokenSection tokenSection = readLine();
    if(tokenSection.count() == 0)
        return false;

    NonDirective *nonDirective = createNode<NonDirective>(m_memoryPool, group);
    group->toItemComposite()->add(nonDirective);
    nonDirective->setTokenSection(tokenSection);
    return true;
}


bool Preprocessor::parseTextLine(Item *group)
{
    //cout << "parsetextline" << endl;
    Q_ASSERT(group->toItemComposite());
    const TokenSection tokenSection = readLine();
   // cout << tokenSection.fullText().constData() << endl;

    if(tokenSection.count() == 0)
        return false;

    Text *text = createNode<Text>(m_memoryPool, group);
    group->toItemComposite()->add(text);
    text->setTokenSection(tokenSection);

    // Create Token-derived nodes and atach to text
    QVector<Token *> tokens;
    tokens.reserve(tokenSection.count());
    for (int t = 0; t < tokenSection.count(); ++t) {
        Token *node = 0;
        const int containerIndex = tokenSection.containerIndex(t);
        switch(m_tokenTypeList.at(containerIndex)) {
            case Token_identifier:
            case Token_defined:
            case Token_directive_if:
            case Token_directive_elif:
            case Token_directive_else:
            case Token_directive_undef:
            case Token_directive_endif:
            case Token_directive_ifdef:
            case Token_directive_ifndef:
            case Token_directive_define:
            case Token_directive_include:
                node = createNode<IdToken>(m_memoryPool, text);
            break;
            case Token_line_comment:
                node = createNode<LineComment>(m_memoryPool, text);
            break;
            case Token_multiline_comment:
                node = createNode<MultiLineComment>(m_memoryPool, text);
            break;
            case Token_whitespaces:
            case Token_char_literal:
            case Token_string_literal:
            default:
                node = createNode<NonIdToken>(m_memoryPool, text);
            break;
        }
        Q_ASSERT(node);
        node->setToken(containerIndex);
        tokens.append(node);
    }

    text->setTokens(tokens);

    return true;
}

// if-group -> ifDirective
// if-group -> ifdefDirevtive
// if-group -> ifndefDirevtive
bool Preprocessor::parseIfGroup(IfSection *ifSection)
{
    //  cout << "parse if group" << endl;
    Q_ASSERT(ifSection->toItemComposite());
    bool result;
    const Type type = lookAheadSkipHash();
    if (type == Token_directive_ifdef) {
        IfdefDirective *d = createNode<IfdefDirective>(m_memoryPool, ifSection);
        result = parseIfdefLikeDirective(d);
        ifSection->setIfGroup(d);
    } else if (type == Token_directive_ifndef) {
        IfndefDirective *d = createNode<IfndefDirective>(m_memoryPool, ifSection);
        result = parseIfdefLikeDirective(d);
        ifSection->setIfGroup(d);
    } else  if (type == Token_directive_if) {
        IfDirective *d = createNode<IfDirective>(m_memoryPool, ifSection);
        result = parseIfLikeDirective(d);
        ifSection->setIfGroup(d);
    } else {
        result = false;
    }
    return result;
}

bool Preprocessor::parseElifGroups(IfSection *ifSection)
{
    //cout << "parse ElifGroups" << endl;
    bool gotElif = false;
    while(lookAheadSkipHash() == Token_directive_elif ) {
        if (!parseElifGroup(ifSection))
            break;
        gotElif = true;
    }
    return gotElif;
}

bool Preprocessor::parseElifGroup(IfSection *ifSection)
{
    //cout << "parse ElifGroup" << endl;
    ElifDirective *elifDirective = createNode<ElifDirective>(m_memoryPool, ifSection);
    ifSection->addElifGroup(elifDirective);
    return parseIfLikeDirective(elifDirective);
}

bool Preprocessor::parseElseGroup(IfSection *ifSection)
{
    //cout << "parse else group" << endl;
    TokenSection tokenSection = readLine();
    if(tokenSection.count() == 0)
        return false;

    ElseDirective *elseDirective = createNode<ElseDirective>(m_memoryPool, ifSection);
    ifSection->setElseGroup(elseDirective);
    elseDirective->setTokenSection(tokenSection);
    parseGroup(elseDirective);
    return true;
}

//# endif newline
bool Preprocessor::parseEndifLine(IfSection *ifSection)
{
    //cout << "parse endifline" << endl;
    TokenSection tokenSection = readLine();
    if(tokenSection.count() == 0)
        return false;

    EndifDirective *endifDirective = createNode<EndifDirective>(m_memoryPool, ifSection);
    ifSection->setEndifLine(endifDirective);
    endifDirective->setTokenSection(tokenSection);

    return true;
}

//parses an "ifdef-like" directive, like #ifdef and #ifndef :)
//# ifdef identifier newline group[opt]
bool Preprocessor::parseIfdefLikeDirective(IfdefLikeDirective *node)
{
    Q_ASSERT(node->toItemComposite());
    const TokenSection tokenSection = readLine();
    const QVector<int> cleanedLine = cleanTokenRange(tokenSection);

    if(cleanedLine.count() < 3)
        return false;

    node->setTokenSection(tokenSection);
    node->setIdentifier(TokenList(m_tokenContainer, QVector<int>() << cleanedLine.at(2)));
    parseGroup(node);

    return true;
}

//# if constant-expression newline group[opt]
bool Preprocessor::parseIfLikeDirective(IfLikeDirective *node)
{
    //cout << "parse if-like directive" << endl;
    Q_ASSERT(node->toItemComposite());
    TokenSection tokenSection = readLine();
    QVector<int> cleanedSection = cleanTokenRange(tokenSection);
    if(cleanedSection.count() < 3)
        return false;

    cleanedSection.erase(cleanedSection.begin(), cleanedSection.begin() + 2); //remove # and if
    cleanedSection.pop_back(); //remove endl;

    const TokenList sectionList(m_tokenContainer, cleanedSection);
    ExpressionBuilder expressionBuilder(sectionList, m_tokenTypeList, m_memoryPool);
    Expression *expr = expressionBuilder.parse();
    node->setTokenSection(tokenSection);
    node->setExpression(expr);

    parseGroup(node);
    return true;
}

/*
   # define identifier                               replacement-list new-line
   # define identifier lparen identifier-list[opt] ) replacement-list new-line
   # define identifier lparen ... )                  replacement-list new-line
   # define identifier lparen identifier-list, ... ) replacement-list new-line
*/
bool Preprocessor::parseDefineDirective(Item *group)
{
    Q_ASSERT(group->toItemComposite());
    const TokenSection line = readLine();
    const QVector<int> cleanedLine = cleanTokenRange(line);
    if(cleanedLine.count() < 3)
        return false;

    // get identifier
    const int identifier = cleanedLine.at(2); //skip "#" and "define"
    DefineDirective *defineDirective = 0;
    int replacementListStart;

    // check if this is a macro function
    if (cleanedLine.count() >= 4
        && m_tokenContainer.text(cleanedLine.at(3)) == "("
        && !isWhiteSpace(cleanedLine.at(3) - 1)) {
        MacroFunctionDefinition *macro;
        macro = createNode<MacroFunctionDefinition>(m_memoryPool, group);

        int tokenIndex = 4; //point to first argument or ')'
        QVector<int> macroParameterList;
        while(tokenIndex < cleanedLine.count()) {
            QByteArray currentText = m_tokenContainer.text(cleanedLine.at(tokenIndex));
            ++tokenIndex;
            if(currentText == ")")
                break;
            if(currentText == ",")
                continue;
            macroParameterList.append(cleanedLine.at(tokenIndex - 1));
        }
        macro->setParameters(TokenList(m_tokenContainer, macroParameterList));
        defineDirective = macro;
        replacementListStart = tokenIndex;
    } else {
        MacroDefinition *macro;
        macro = createNode<MacroDefinition>(m_memoryPool, group);
        defineDirective = macro;
        replacementListStart = 3;
    }
    Q_ASSERT(defineDirective);

    // This is a bit hackish.. we want the replacement list with whitepspace
    // tokens, but cleanedLine() has already removed those. And we can't use
    // the original line, because that may contain escaped newline tokens.
    // So we remove the esacped newlines and search for the token number
    // given by cleanedLine.at(replacementListStart)
    QVector<int> replacementList;
    const QVector<int> noEscNewline = cleanEscapedNewLines(line);
    if (replacementListStart < cleanedLine.count()) {
        const int cleanedLineReplacementListStart = cleanedLine.at(replacementListStart);
        const int rListStart = noEscNewline.indexOf(cleanedLineReplacementListStart);
        if (rListStart != -1) {
            const int skipNewLineToken = 1;
            for (int i = rListStart; i < noEscNewline.count() - skipNewLineToken; ++i) {
                const int tokenContainerIndex = noEscNewline.at(i);
                const Type type = m_tokenTypeList.at(tokenContainerIndex);
                // Don't append comment tokens.
                if (type != Token_line_comment && type != Token_multiline_comment) {
                   replacementList.append(tokenContainerIndex);

                }
            }
        }
    }

    defineDirective->setTokenSection(line);
    defineDirective->setIdentifier(TokenList(m_tokenContainer, QVector<int>() << identifier));
    defineDirective->setReplacementList(TokenList(m_tokenContainer, replacementList));
    group->toItemComposite()->add(defineDirective);
    return true;
}


// # undef identifier newline
bool Preprocessor::parseUndefDirective(Item *group)
{
    Q_ASSERT(group->toItemComposite());
    const TokenSection tokenSection = readLine();
    const QVector<int> cleanedLine = cleanTokenRange(tokenSection);

    if(cleanedLine.count() < 3)
        return false;

    UndefDirective *undefDirective = createNode<UndefDirective>(m_memoryPool, group);
    group->toItemComposite()->add(undefDirective);
    undefDirective->setIdentifier(TokenList(m_tokenContainer, QVector<int>() << cleanedLine.at(2)));
    undefDirective->setTokenSection(tokenSection);
    return true;
}

//include pp-tokens new-line
bool Preprocessor::parseIncludeDirective(Item *group)
{
  //  cout << "parseIncludeDirective" << endl;
    Q_ASSERT(group->toItemComposite());
    TokenSection tokenSection = readLine();
    if(tokenSection.count() == 0)
        return false;

    const TokenEngine::TokenContainer tokenContainer = tokenSection.tokenContainer(0);
    IncludeDirective *includeDirective = createNode<IncludeDirective>(m_memoryPool, group);
    group->toItemComposite()->add(includeDirective);
    includeDirective->setTokenSection(tokenSection);

    //remove whitepspace and comment tokens
    TokenList tokenList(m_tokenContainer, cleanTokenRange(tokenSection));

    //iterate through the tokens, look for a string literal or a '<'.
    int tokenIndex = 0;
    const int endIndex = tokenList.count();
    while (tokenIndex < endIndex) {
        const int containerTokenIndex = tokenList.containerIndex(tokenIndex);
        if(m_tokenTypeList.at(containerTokenIndex) == Token_string_literal) {
            QByteArray tokenText = tokenList.text(tokenIndex);
            includeDirective->setFilename(tokenText.mid(1, tokenText.size() -2)); //remove quotes
            includeDirective->setFilenameTokens(TokenEngine::TokenList(tokenContainer, QVector<int>() << containerTokenIndex));
            includeDirective->setIncludeType(IncludeDirective::QuoteInclude);
            break;
        } else if(tokenList.text(tokenIndex) == "<") {
            // We found a <, all following tokens until we read a
            // > is a part of the file anme
            QByteArray filename;
            ++tokenIndex; //skip '<'
            QVector<int> filenameTokens;
            while(tokenIndex < endIndex) {
                const QByteArray tokenText = tokenList.text(tokenIndex);
                if(tokenText == ">")
                    break;
                filenameTokens.append(tokenList.containerIndex(tokenIndex));
                filename += tokenText;
                ++tokenIndex;
            }
            if(tokenIndex < endIndex) {
                includeDirective->setFilename(filename);
                includeDirective->setFilenameTokens(TokenEngine::TokenList(tokenContainer, filenameTokens));
                includeDirective->setIncludeType(IncludeDirective::AngleBracketInclude);
            }
            break;
        }
        ++tokenIndex;
    }

    return true;
}

//# error pp-tokens[opt] new-line
bool Preprocessor::parseErrorDirective(Item *group)
{
    Q_ASSERT(group->toItemComposite());
    TokenSection tokenSection = readLine();
    if(tokenSection.count() == 0)
        return false;

    ErrorDirective *errorDirective = createNode<ErrorDirective>(m_memoryPool, group);
    group->toItemComposite()->add(errorDirective);
    errorDirective->setTokenSection(tokenSection);
    return true;
}

//# pragma pp-tokens[opt] new-line
bool Preprocessor::parsePragmaDirective(Item *group)
{
    Q_ASSERT(group->toItemComposite());
    TokenSection tokenSection = readLine();
    if(tokenSection.count() == 0)
        return false;

    PragmaDirective *pragmaDirective = createNode<PragmaDirective>(m_memoryPool, group);
    group->toItemComposite()->add(pragmaDirective);
    pragmaDirective->setTokenSection(tokenSection);
    return true;
}
/*
    Reads a preprocessor line from the source by advancing lexerTokenIndex and
    returing a TokenSection containg the read line. Text lines separated by
    an escaped newline are joined.
*/
TokenSection Preprocessor::readLine()
{
    const int startIndex = lexerTokenIndex;
    bool gotNewline = false;

    while(isValidIndex(lexerTokenIndex) && !gotNewline) {
        if(m_tokenTypeList.at(lexerTokenIndex) == Token_newline) {
            if (lexerTokenIndex == 0 || m_tokenTypeList.at(lexerTokenIndex-1) != '\\') {
                gotNewline = true;
                break;
            }
        }
        ++lexerTokenIndex;
    }

    if(gotNewline)
        ++lexerTokenIndex; //include newline
    else
        emit error(QLatin1String("Error"), QLatin1String("Unexpected end of source"));

    return TokenSection(m_tokenContainer, startIndex, lexerTokenIndex - startIndex);
}

/*
    Returns false if index is past the end of m_tokenContainer.
*/
inline bool Preprocessor::isValidIndex(const int index) const
{
    return  (index < m_tokenContainer.count());
}

/*
    Returns true if the token at index is a whitepsace token.
*/
inline bool Preprocessor::isWhiteSpace(const int index) const
{
    return (m_tokenTypeList.at(index) == Token_whitespaces);
}

/*
    Looks ahead from lexerTokenIndex, returns the token type found at the first
    token that is not a comment or whitespace token.
*/
Type Preprocessor::lookAhead() const
{
    const int index = skipWhiteSpaceAndComments();
    if (index == -1)
        return Token_eof;
    return m_tokenTypeList.at(index);
}
/*
    Looks ahead from lexerTokenIndex, returns the token type found at the first
    token that is not a comment, whitespace or '#' token.
*/
Type Preprocessor::lookAheadSkipHash() const
{
     const int index = skipWhiteSpaceCommentsHash();
     if (index == -1)
        return Token_eof;
    return m_tokenTypeList.at(index);
}

/*
    Returns the index for the first token after lexerTokenIndex that is not a
    whitespace or comment token.
*/
inline int Preprocessor::skipWhiteSpaceAndComments() const
{
    int index = lexerTokenIndex;
    if(!isValidIndex(index))
           return -1;
    while(m_tokenTypeList.at(index) == Token_whitespaces
             || m_tokenTypeList.at(index) == Token_comment
             || m_tokenTypeList.at(index) == Token_line_comment
             || m_tokenTypeList.at(index) == Token_multiline_comment ) {
       ++index;
       if(!isValidIndex(index))
           return -1;
    }
    return index;
}

/*
    Returns the index for the first token after lexerTokenIndex that is not a
    whitespace, comment or '#' token.
*/
inline int Preprocessor::skipWhiteSpaceCommentsHash() const
{
    int index = lexerTokenIndex;
    if(!isValidIndex(index))
           return -1;
    while(m_tokenTypeList.at(index) == Token_whitespaces
             || m_tokenTypeList.at(index) == Token_comment
             || m_tokenTypeList.at(index) == Token_line_comment
             || m_tokenTypeList.at(index) == Token_multiline_comment
             || m_tokenTypeList.at(index) == Token_preproc ) {
       ++index;
       if(!isValidIndex(index))
           return -1;
    }
    return index;
}

/*
    Removes escaped newlines from tokenSection. Both the escape token ('\')
    and the newline token ('\n') are removed.
*/
QVector<int> Preprocessor::cleanEscapedNewLines(const TokenSection &tokenSection) const
{
    QVector<int> indexList;

    int t = 0;
    const int numTokens = tokenSection.count();
    while (t < numTokens) {
        const int containerIndex = tokenSection.containerIndex(t);
        const int currentToken = t;
        ++t;

        //handle escaped newlines
		if (tokenSection.text(currentToken) == "\\"
           && currentToken + 1 < numTokens
           && m_tokenTypeList.at(containerIndex + 1) == Token_newline)
             continue;

        indexList.append(containerIndex);
    }
    return indexList;
}

/*
    Removes escaped newlines, whitespace and comment tokens from tokenSection
*/
QVector<int> Preprocessor::cleanTokenRange(const TokenSection &tokenSection) const
{
    QVector<int> indexList;

    int t = 0;
    const int numTokens = tokenSection.count();
    while (t < numTokens) {
        const int containerIndex = tokenSection.containerIndex(t);
        const Type tokenType = m_tokenTypeList.at(containerIndex);
        const int currentToken = t;
        ++t;

        if(tokenType == Token_whitespaces ||
           tokenType == Token_line_comment ||
           tokenType == Token_multiline_comment )
            continue;

        //handle escaped newlines
        if(tokenSection.text(currentToken) == "\\" &&
           currentToken + 1 < numTokens &&
           m_tokenTypeList.at(containerIndex + 1) == Token_newline)
            continue;

        indexList.append(containerIndex);
    }
    return indexList;
}
/*
    Returns the text for an Item node and all its children.
*/
QByteArray visitGetText(Item *item)
{
    QByteArray text;

    text += item->text().fullText();

    if(item->toItemComposite()) {
        ItemComposite *composite = item->toItemComposite();
        for (int i=0; i <composite->count(); ++i)
            text += visitGetText(composite->item(i));
    }

    return text;
}

void Source::setFileName(const QString &fileName)
{
    m_fileName = fileName;
}

} // namespace Rpp

QT_END_NAMESPACE