util/lexgen/generator.cpp
changeset 0 1918ee327afb
child 4 3b1da2848fc7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/util/lexgen/generator.cpp	Mon Jan 11 14:00:40 2010 +0000
@@ -0,0 +1,532 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "generator.h"
+
+#include <QFile>
+
+void Function::printDeclaration(CodeBlock &block, const QString &funcNamePrefix) const
+{
+    block << (iline ? "inline " : "") << signature(funcNamePrefix) << (iline ? QLatin1String(" {") : QLatin1String(";"));
+    if (!iline)
+        return;
+    
+    block.indent();
+    QString tmp = body;
+    if (tmp.endsWith(QLatin1Char('\n')))
+        tmp.chop(1);
+    foreach (QString line, tmp.split(QLatin1Char('\n')))
+        block << line;
+    block.outdent();
+    block << "}";
+}
+
+QString Function::signature(const QString &funcNamePrefix) const
+{
+   QString sig;
+   if (!rtype.isEmpty()) {
+       sig += rtype;
+       sig += QLatin1Char(' ');
+   }
+   sig += funcNamePrefix;
+   sig += fname;
+   if (cnst)
+       sig += " const";
+   return sig;
+}
+
+QString Function::definition() const
+{
+    if (iline)
+        return QString();
+
+    QString result;
+    result += signature();
+    result += QLatin1String("\n{\n");
+    
+    QString tmp = body;
+
+    if (tmp.endsWith(QLatin1Char('\n')))
+        tmp.chop(1);
+    if (!tmp.startsWith(QLatin1Char('\n')))
+        tmp.prepend("    ");
+
+    tmp.replace(QLatin1Char('\n'), QLatin1String("\n    "));
+
+    result += tmp;
+    
+    result += QLatin1String("\n}\n");
+
+    return result;
+}
+
+void Class::Section::printDeclaration(const Class *klass, CodeBlock &block) const
+{
+    foreach (Function ctor, constructors)
+        ctor.printDeclaration(block, klass->name());
+    
+    if (!constructors.isEmpty())
+        block.addNewLine();
+    
+    foreach (Function func, functions)
+        func.printDeclaration(block);
+    
+    if (!functions.isEmpty())
+        block.addNewLine();
+    
+    foreach (QString var, variables)
+        block << var << ';';
+}
+
+void Class::addConstructor(Access access, const QString &body, const QString &_args)
+{
+    Function ctor;
+    QString args = _args;
+    if (!args.startsWith(QLatin1Char('('))
+        && !args.endsWith(QLatin1Char(')'))) {
+        args.prepend('(');
+        args.append(')');
+    }
+    ctor.setName(args);
+    ctor.addBody(body);
+    sections[access].constructors.append(ctor);
+}
+
+QString Class::Section::definition(const Class *klass) const
+{
+    QString result;
+    
+    foreach (Function ctor, constructors) {
+        ctor.setName(klass->name() + "::" + klass->name() + ctor.name());
+        result += ctor.definition();
+        result += QLatin1Char('\n');
+    }
+    
+    foreach (Function func, functions) {
+        if (!func.hasBody()) continue;
+        func.setName(klass->name() + "::" + func.name());
+        result += func.definition();
+        result += QLatin1Char('\n');
+    }
+
+    return result;
+}
+
+QString Class::declaration() const
+{
+    CodeBlock block;
+    
+    block << QLatin1String("class ") << cname;
+    block << "{";
+    
+    if (!sections[PublicMember].isEmpty()) {
+        block << "public:";
+        block.indent();
+        sections[PublicMember].printDeclaration(this, block);
+        block.outdent();
+    }
+
+    if (!sections[ProtectedMember].isEmpty()) {
+        block << "protected:";
+        block.indent();
+        sections[ProtectedMember].printDeclaration(this, block);
+        block.outdent();
+    }
+    
+    if (!sections[PrivateMember].isEmpty()) {
+        block << "private:";
+        block.indent();
+        sections[PrivateMember].printDeclaration(this, block);
+        block.outdent();
+    }
+
+    block << "};";
+    block.addNewLine();
+    
+    return block.toString();
+}
+
+QString Class::definition() const
+{
+    return sections[PrivateMember].definition(this)
+           + sections[ProtectedMember].definition(this)
+           + sections[PublicMember].definition(this);
+}
+
+Generator::Generator(const DFA &_dfa, const Config &config)
+     : dfa(_dfa), cfg(config)
+{
+    QList<InputType> lst = cfg.maxInputSet.toList();
+    qSort(lst);
+    minInput = lst.first();
+    maxInput = lst.last();
+
+    ConfigFile::Section section = config.configSections.value("Code Generator Options");
+
+    foreach (ConfigFile::Entry entry, section) {
+        if (!entry.key.startsWith(QLatin1String("MapToCode["))
+            || !entry.key.endsWith(QLatin1Char(']')))
+            continue;
+        QString range = entry.key;
+        range.remove(0, qstrlen("MapToCode["));
+        range.chop(1);
+        if (range.length() != 3
+            || range.at(1) != QLatin1Char('-')) {
+            qWarning("Invalid range for char mapping function: %s", qPrintable(range));
+            continue;
+        }
+        TransitionSequence seq;
+        seq.first = range.at(0).unicode();
+        seq.last = range.at(2).unicode();
+        seq.testFunction = entry.value;
+        charFunctionRanges.append(seq);
+    }
+
+    QString tokenPrefix = section.value("TokenPrefix");
+    if (!tokenPrefix.isEmpty()) {
+        for (int i = 0; i < dfa.count(); ++i)
+            if (!dfa.at(i).symbol.isEmpty()
+                && !dfa.at(i).symbol.endsWith(QLatin1String("()")))
+                dfa[i].symbol.prepend(tokenPrefix);
+    }
+
+    headerFileName = section.value("FileHeader");
+}
+
+static inline bool adjacentKeys(int left, int right) { return left + 1 == right; }
+//static inline bool adjacentKeys(const InputType &left, const InputType &right)
+//{ return left.val + 1 == right.val; }
+
+static QVector<Generator::TransitionSequence> convertToSequences(const TransitionMap &transitions)
+{
+    QVector<Generator::TransitionSequence> sequences;
+    if (transitions.isEmpty())
+        return sequences;
+
+    QList<InputType> keys = transitions.keys();
+    qSort(keys);
+    int i = 0;
+    Generator::TransitionSequence sequence;
+    sequence.first = keys.at(0);
+    ++i;
+    for (; i < keys.count(); ++i) {
+        if (adjacentKeys(keys.at(i - 1), keys.at(i))
+            && transitions.value(keys.at(i)) == transitions.value(keys.at(i - 1))) {
+            continue;
+        }
+        sequence.last = keys.at(i - 1);
+        sequence.transition = transitions.value(sequence.last);
+        sequences.append(sequence);
+
+        sequence.first = keys.at(i);
+    }
+    sequence.last = keys.at(i - 1);
+    sequence.transition = transitions.value(sequence.last);
+    sequences.append(sequence);
+
+    return sequences;
+}
+
+QDebug &operator<<(QDebug &debug, const Generator::TransitionSequence &seq)
+{
+    return debug << "[first:" << seq.first << "; last:" << seq.last << "; transition:" << seq.transition
+                 << (seq.testFunction.isEmpty() ? QString() : QString(QString("; testfunction:" + seq.testFunction)))
+                 << "]";
+}
+
+bool Generator::isSingleReferencedFinalState(int i) const
+{
+    return backReferenceMap.value(i) == 1
+           && dfa.at(i).transitions.isEmpty()
+           && !dfa.at(i).symbol.isEmpty();
+}
+
+void Generator::generateTransitions(CodeBlock &body, const TransitionMap &transitions)
+{
+    if (transitions.isEmpty())
+        return;
+
+    QVector<TransitionSequence> sequences = convertToSequences(transitions);
+
+    bool needsCharFunction = false;
+    if (!charFunctionRanges.isEmpty()) {
+        int i = 0;
+        while (i < sequences.count()) {
+            const TransitionSequence &seq = sequences.at(i);
+            if (!seq.testFunction.isEmpty()) {
+                ++i;
+                continue;
+            }
+
+            foreach (TransitionSequence range, charFunctionRanges)
+                if (range.first >= seq.first && range.last <= seq.last) {
+                    needsCharFunction = true;
+
+                    TransitionSequence left, middle, right;
+
+                    left.first = seq.first;
+                    left.last = range.first - 1;
+                    left.transition = seq.transition;
+
+                    middle = range;
+                    middle.transition = seq.transition;
+
+                    right.first = range.last + 1;
+                    right.last = seq.last;
+                    right.transition = seq.transition;
+
+                    sequences.remove(i);
+                    if (left.last >= left.first) {
+                        sequences.insert(i, left);
+                        ++i;
+                    }
+                    sequences.insert(i, middle);
+                    ++i;
+                    if (right.last >= right.first) {
+                        sequences.insert(i, right);
+                        ++i;
+                    }
+
+                    i = -1;
+                    break;
+                }
+
+            ++i;
+        }
+    }
+
+    //qDebug() << "sequence count" << sequences.count();
+    //qDebug() << sequences;
+
+    if (sequences.count() < 10
+        || sequences.last().last == maxInput
+        || needsCharFunction) {
+        foreach (TransitionSequence seq, sequences) {
+            const bool embedFinalState = isSingleReferencedFinalState(seq.transition);
+
+            QString brace;
+            if (embedFinalState)
+                brace = " {";
+
+            if (!seq.testFunction.isEmpty()) {
+                body << "if (" << seq.testFunction << ")" << brace;
+            } else if (seq.first == seq.last) {
+                body << "if (ch.unicode() == " << seq.first << ")" << brace;
+            } else {
+                if (seq.last < maxInput)
+                    body << "if (ch.unicode() >= " << seq.first
+                         << " && ch.unicode() <= " << seq.last << ")" << brace;
+                else
+                    body << "if (ch.unicode() >= " << seq.first << ")" << brace;
+            }
+            body.indent();
+            if (embedFinalState) {
+                body << "token = " << dfa.at(seq.transition).symbol << ";";
+                body << "goto found;";
+
+                body.outdent();
+                body << "}";
+            } else {
+                body << "goto state_" << seq.transition << ";";
+                body.outdent();
+            }
+        }
+    } else {
+        QList<InputType> keys = transitions.keys();
+        qSort(keys);
+
+        body << "switch (ch.unicode()) {";
+        body.indent();
+        for (int k = 0; k < keys.count(); ++k) {
+            const InputType key = keys.at(k);
+            const int trans = transitions.value(key);
+
+            QString keyStr;
+            if (key == '\\')
+                keyStr = QString("\'\\\\\'");
+            else if (key >= 48 && key < 127)
+                keyStr = QString('\'') + QChar::fromLatin1(char(key)) + QChar('\'');
+            else
+                keyStr = QString::number(key);
+
+            if (k < keys.count() - 1
+                && transitions.value(keys.at(k + 1)) == trans) {
+                body << "case " << keyStr << ":";
+            } else {
+                if (isSingleReferencedFinalState(trans)) {
+                    body << "case " << keyStr << ": token = " << dfa.at(trans).symbol << "; goto found;";
+                } else {
+                    body << "case " << keyStr << ": goto state_" << trans << ";";
+                }
+            }
+        }
+        body.outdent();
+        body << "}";
+    }
+}
+
+QString Generator::generate()
+{
+    Class klass(cfg.className);
+    
+    klass.addMember(Class::PublicMember, "QString input");
+    klass.addMember(Class::PublicMember, "int pos");
+    klass.addMember(Class::PublicMember, "int lexemStart");
+    klass.addMember(Class::PublicMember, "int lexemLength");
+    
+    {
+        CodeBlock body;
+        body << "input = inp;";
+        body << "pos = 0;";
+        body << "lexemStart = 0;";
+        body << "lexemLength = 0;";
+        klass.addConstructor(Class::PublicMember, body, "const QString &inp");
+    }
+    
+    {
+        Function next("QChar", "next()");
+        next.setInline(true);
+        if (cfg.caseSensitivity == Qt::CaseSensitive)
+            next.addBody("return (pos < input.length()) ? input.at(pos++) : QChar();");
+        else
+            next.addBody("return (pos < input.length()) ? input.at(pos++).toLower() : QChar();");
+        klass.addMember(Class::PublicMember, next);
+    }
+    
+    /*
+    {
+        Function lexem("QString", "lexem()");
+        lexem.setConst(true);
+        lexem.setInline(true);
+        lexem.addBody("return input.mid(lexemStart, lexemLength);");
+        klass.addMember(Class::PublicMember, lexem);
+    }
+    */
+
+    for (int i = 0; i < dfa.count(); ++i)
+        if (dfa.at(i).symbol.endsWith(QLatin1String("()"))) {
+            Function handlerFunc("int", dfa.at(i).symbol);
+            klass.addMember(Class::PublicMember, handlerFunc);
+        }
+
+    Function lexFunc;
+    lexFunc.setReturnType("int");
+    lexFunc.setName("lex()");
+    
+    CodeBlock body;
+    body << "lexemStart = pos;";
+    body << "lexemLength = 0;";
+    body << "int lastAcceptingPos = -1;";
+    body << "int token = -1;";
+    body << "QChar ch;";
+    body.addNewLine();
+
+    backReferenceMap.clear();
+    foreach (State s, dfa)
+        foreach (int state, s.transitions)
+            backReferenceMap[state]++;
+
+    bool haveSingleReferencedFinalState = false;
+
+    for (int i = 0; i < dfa.count(); ++i) {
+        if (isSingleReferencedFinalState(i)) {
+            haveSingleReferencedFinalState = true;
+            continue;
+        }
+
+        if (i > 0)
+            body << "state_" << i << ":";
+        else
+            body << "// initial state";
+
+        body.indent();
+
+        if (!dfa.at(i).symbol.isEmpty()) {
+            body << "lastAcceptingPos = pos;";
+            body << "token = " << dfa.at(i).symbol << ";";
+        }
+
+        body.outdent();
+
+        body.indent();
+        
+        if (!dfa.at(i).transitions.isEmpty()) {
+            body << "ch = next();";
+            generateTransitions(body, dfa.at(i).transitions);
+        }
+        
+        body << "goto out;";
+        
+        body.outdent();
+    }
+
+    if (haveSingleReferencedFinalState) {
+        body << "found:";
+        body << "lastAcceptingPos = pos;";
+        body.addNewLine();
+    }
+
+    body << "out:";
+    body << "if (lastAcceptingPos != -1) {";
+    body.indent();
+    body << "lexemLength = lastAcceptingPos - lexemStart;";
+    body << "pos = lastAcceptingPos;";
+    body.outdent();
+    body << "}";
+    body << "return token;";
+    
+    lexFunc.addBody(body);
+    
+    klass.addMember(Class::PublicMember, lexFunc);
+
+    QString header;
+    QFile headerFile(headerFileName);
+    if (!headerFileName.isEmpty()
+        && headerFile.exists()
+        && headerFile.open(QIODevice::ReadOnly)) {
+        header = QString::fromUtf8(headerFile.readAll());
+    }
+
+    header += QLatin1String("// auto generated. DO NOT EDIT.\n");
+    
+    return header + klass.declaration() + klass.definition();
+}
+