util/lexgen/generator.cpp
changeset 0 1918ee327afb
child 4 3b1da2848fc7
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     4 ** All rights reserved.
       
     5 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     6 **
       
     7 ** This file is part of the utils of the Qt Toolkit.
       
     8 **
       
     9 ** $QT_BEGIN_LICENSE:LGPL$
       
    10 ** No Commercial Usage
       
    11 ** This file contains pre-release code and may not be distributed.
       
    12 ** You may use this file in accordance with the terms and conditions
       
    13 ** contained in the Technology Preview License Agreement accompanying
       
    14 ** this package.
       
    15 **
       
    16 ** GNU Lesser General Public License Usage
       
    17 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    18 ** General Public License version 2.1 as published by the Free Software
       
    19 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    20 ** packaging of this file.  Please review the following information to
       
    21 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    23 **
       
    24 ** In addition, as a special exception, Nokia gives you certain additional
       
    25 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    27 **
       
    28 ** If you have questions regarding the use of this file, please contact
       
    29 ** Nokia at qt-info@nokia.com.
       
    30 **
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 ** $QT_END_LICENSE$
       
    39 **
       
    40 ****************************************************************************/
       
    41 
       
    42 #include "generator.h"
       
    43 
       
    44 #include <QFile>
       
    45 
       
    46 void Function::printDeclaration(CodeBlock &block, const QString &funcNamePrefix) const
       
    47 {
       
    48     block << (iline ? "inline " : "") << signature(funcNamePrefix) << (iline ? QLatin1String(" {") : QLatin1String(";"));
       
    49     if (!iline)
       
    50         return;
       
    51     
       
    52     block.indent();
       
    53     QString tmp = body;
       
    54     if (tmp.endsWith(QLatin1Char('\n')))
       
    55         tmp.chop(1);
       
    56     foreach (QString line, tmp.split(QLatin1Char('\n')))
       
    57         block << line;
       
    58     block.outdent();
       
    59     block << "}";
       
    60 }
       
    61 
       
    62 QString Function::signature(const QString &funcNamePrefix) const
       
    63 {
       
    64    QString sig;
       
    65    if (!rtype.isEmpty()) {
       
    66        sig += rtype;
       
    67        sig += QLatin1Char(' ');
       
    68    }
       
    69    sig += funcNamePrefix;
       
    70    sig += fname;
       
    71    if (cnst)
       
    72        sig += " const";
       
    73    return sig;
       
    74 }
       
    75 
       
    76 QString Function::definition() const
       
    77 {
       
    78     if (iline)
       
    79         return QString();
       
    80 
       
    81     QString result;
       
    82     result += signature();
       
    83     result += QLatin1String("\n{\n");
       
    84     
       
    85     QString tmp = body;
       
    86 
       
    87     if (tmp.endsWith(QLatin1Char('\n')))
       
    88         tmp.chop(1);
       
    89     if (!tmp.startsWith(QLatin1Char('\n')))
       
    90         tmp.prepend("    ");
       
    91 
       
    92     tmp.replace(QLatin1Char('\n'), QLatin1String("\n    "));
       
    93 
       
    94     result += tmp;
       
    95     
       
    96     result += QLatin1String("\n}\n");
       
    97 
       
    98     return result;
       
    99 }
       
   100 
       
   101 void Class::Section::printDeclaration(const Class *klass, CodeBlock &block) const
       
   102 {
       
   103     foreach (Function ctor, constructors)
       
   104         ctor.printDeclaration(block, klass->name());
       
   105     
       
   106     if (!constructors.isEmpty())
       
   107         block.addNewLine();
       
   108     
       
   109     foreach (Function func, functions)
       
   110         func.printDeclaration(block);
       
   111     
       
   112     if (!functions.isEmpty())
       
   113         block.addNewLine();
       
   114     
       
   115     foreach (QString var, variables)
       
   116         block << var << ';';
       
   117 }
       
   118 
       
   119 void Class::addConstructor(Access access, const QString &body, const QString &_args)
       
   120 {
       
   121     Function ctor;
       
   122     QString args = _args;
       
   123     if (!args.startsWith(QLatin1Char('('))
       
   124         && !args.endsWith(QLatin1Char(')'))) {
       
   125         args.prepend('(');
       
   126         args.append(')');
       
   127     }
       
   128     ctor.setName(args);
       
   129     ctor.addBody(body);
       
   130     sections[access].constructors.append(ctor);
       
   131 }
       
   132 
       
   133 QString Class::Section::definition(const Class *klass) const
       
   134 {
       
   135     QString result;
       
   136     
       
   137     foreach (Function ctor, constructors) {
       
   138         ctor.setName(klass->name() + "::" + klass->name() + ctor.name());
       
   139         result += ctor.definition();
       
   140         result += QLatin1Char('\n');
       
   141     }
       
   142     
       
   143     foreach (Function func, functions) {
       
   144         if (!func.hasBody()) continue;
       
   145         func.setName(klass->name() + "::" + func.name());
       
   146         result += func.definition();
       
   147         result += QLatin1Char('\n');
       
   148     }
       
   149 
       
   150     return result;
       
   151 }
       
   152 
       
   153 QString Class::declaration() const
       
   154 {
       
   155     CodeBlock block;
       
   156     
       
   157     block << QLatin1String("class ") << cname;
       
   158     block << "{";
       
   159     
       
   160     if (!sections[PublicMember].isEmpty()) {
       
   161         block << "public:";
       
   162         block.indent();
       
   163         sections[PublicMember].printDeclaration(this, block);
       
   164         block.outdent();
       
   165     }
       
   166 
       
   167     if (!sections[ProtectedMember].isEmpty()) {
       
   168         block << "protected:";
       
   169         block.indent();
       
   170         sections[ProtectedMember].printDeclaration(this, block);
       
   171         block.outdent();
       
   172     }
       
   173     
       
   174     if (!sections[PrivateMember].isEmpty()) {
       
   175         block << "private:";
       
   176         block.indent();
       
   177         sections[PrivateMember].printDeclaration(this, block);
       
   178         block.outdent();
       
   179     }
       
   180 
       
   181     block << "};";
       
   182     block.addNewLine();
       
   183     
       
   184     return block.toString();
       
   185 }
       
   186 
       
   187 QString Class::definition() const
       
   188 {
       
   189     return sections[PrivateMember].definition(this)
       
   190            + sections[ProtectedMember].definition(this)
       
   191            + sections[PublicMember].definition(this);
       
   192 }
       
   193 
       
   194 Generator::Generator(const DFA &_dfa, const Config &config)
       
   195      : dfa(_dfa), cfg(config)
       
   196 {
       
   197     QList<InputType> lst = cfg.maxInputSet.toList();
       
   198     qSort(lst);
       
   199     minInput = lst.first();
       
   200     maxInput = lst.last();
       
   201 
       
   202     ConfigFile::Section section = config.configSections.value("Code Generator Options");
       
   203 
       
   204     foreach (ConfigFile::Entry entry, section) {
       
   205         if (!entry.key.startsWith(QLatin1String("MapToCode["))
       
   206             || !entry.key.endsWith(QLatin1Char(']')))
       
   207             continue;
       
   208         QString range = entry.key;
       
   209         range.remove(0, qstrlen("MapToCode["));
       
   210         range.chop(1);
       
   211         if (range.length() != 3
       
   212             || range.at(1) != QLatin1Char('-')) {
       
   213             qWarning("Invalid range for char mapping function: %s", qPrintable(range));
       
   214             continue;
       
   215         }
       
   216         TransitionSequence seq;
       
   217         seq.first = range.at(0).unicode();
       
   218         seq.last = range.at(2).unicode();
       
   219         seq.testFunction = entry.value;
       
   220         charFunctionRanges.append(seq);
       
   221     }
       
   222 
       
   223     QString tokenPrefix = section.value("TokenPrefix");
       
   224     if (!tokenPrefix.isEmpty()) {
       
   225         for (int i = 0; i < dfa.count(); ++i)
       
   226             if (!dfa.at(i).symbol.isEmpty()
       
   227                 && !dfa.at(i).symbol.endsWith(QLatin1String("()")))
       
   228                 dfa[i].symbol.prepend(tokenPrefix);
       
   229     }
       
   230 
       
   231     headerFileName = section.value("FileHeader");
       
   232 }
       
   233 
       
   234 static inline bool adjacentKeys(int left, int right) { return left + 1 == right; }
       
   235 //static inline bool adjacentKeys(const InputType &left, const InputType &right)
       
   236 //{ return left.val + 1 == right.val; }
       
   237 
       
   238 static QVector<Generator::TransitionSequence> convertToSequences(const TransitionMap &transitions)
       
   239 {
       
   240     QVector<Generator::TransitionSequence> sequences;
       
   241     if (transitions.isEmpty())
       
   242         return sequences;
       
   243 
       
   244     QList<InputType> keys = transitions.keys();
       
   245     qSort(keys);
       
   246     int i = 0;
       
   247     Generator::TransitionSequence sequence;
       
   248     sequence.first = keys.at(0);
       
   249     ++i;
       
   250     for (; i < keys.count(); ++i) {
       
   251         if (adjacentKeys(keys.at(i - 1), keys.at(i))
       
   252             && transitions.value(keys.at(i)) == transitions.value(keys.at(i - 1))) {
       
   253             continue;
       
   254         }
       
   255         sequence.last = keys.at(i - 1);
       
   256         sequence.transition = transitions.value(sequence.last);
       
   257         sequences.append(sequence);
       
   258 
       
   259         sequence.first = keys.at(i);
       
   260     }
       
   261     sequence.last = keys.at(i - 1);
       
   262     sequence.transition = transitions.value(sequence.last);
       
   263     sequences.append(sequence);
       
   264 
       
   265     return sequences;
       
   266 }
       
   267 
       
   268 QDebug &operator<<(QDebug &debug, const Generator::TransitionSequence &seq)
       
   269 {
       
   270     return debug << "[first:" << seq.first << "; last:" << seq.last << "; transition:" << seq.transition
       
   271                  << (seq.testFunction.isEmpty() ? QString() : QString(QString("; testfunction:" + seq.testFunction)))
       
   272                  << "]";
       
   273 }
       
   274 
       
   275 bool Generator::isSingleReferencedFinalState(int i) const
       
   276 {
       
   277     return backReferenceMap.value(i) == 1
       
   278            && dfa.at(i).transitions.isEmpty()
       
   279            && !dfa.at(i).symbol.isEmpty();
       
   280 }
       
   281 
       
   282 void Generator::generateTransitions(CodeBlock &body, const TransitionMap &transitions)
       
   283 {
       
   284     if (transitions.isEmpty())
       
   285         return;
       
   286 
       
   287     QVector<TransitionSequence> sequences = convertToSequences(transitions);
       
   288 
       
   289     bool needsCharFunction = false;
       
   290     if (!charFunctionRanges.isEmpty()) {
       
   291         int i = 0;
       
   292         while (i < sequences.count()) {
       
   293             const TransitionSequence &seq = sequences.at(i);
       
   294             if (!seq.testFunction.isEmpty()) {
       
   295                 ++i;
       
   296                 continue;
       
   297             }
       
   298 
       
   299             foreach (TransitionSequence range, charFunctionRanges)
       
   300                 if (range.first >= seq.first && range.last <= seq.last) {
       
   301                     needsCharFunction = true;
       
   302 
       
   303                     TransitionSequence left, middle, right;
       
   304 
       
   305                     left.first = seq.first;
       
   306                     left.last = range.first - 1;
       
   307                     left.transition = seq.transition;
       
   308 
       
   309                     middle = range;
       
   310                     middle.transition = seq.transition;
       
   311 
       
   312                     right.first = range.last + 1;
       
   313                     right.last = seq.last;
       
   314                     right.transition = seq.transition;
       
   315 
       
   316                     sequences.remove(i);
       
   317                     if (left.last >= left.first) {
       
   318                         sequences.insert(i, left);
       
   319                         ++i;
       
   320                     }
       
   321                     sequences.insert(i, middle);
       
   322                     ++i;
       
   323                     if (right.last >= right.first) {
       
   324                         sequences.insert(i, right);
       
   325                         ++i;
       
   326                     }
       
   327 
       
   328                     i = -1;
       
   329                     break;
       
   330                 }
       
   331 
       
   332             ++i;
       
   333         }
       
   334     }
       
   335 
       
   336     //qDebug() << "sequence count" << sequences.count();
       
   337     //qDebug() << sequences;
       
   338 
       
   339     if (sequences.count() < 10
       
   340         || sequences.last().last == maxInput
       
   341         || needsCharFunction) {
       
   342         foreach (TransitionSequence seq, sequences) {
       
   343             const bool embedFinalState = isSingleReferencedFinalState(seq.transition);
       
   344 
       
   345             QString brace;
       
   346             if (embedFinalState)
       
   347                 brace = " {";
       
   348 
       
   349             if (!seq.testFunction.isEmpty()) {
       
   350                 body << "if (" << seq.testFunction << ")" << brace;
       
   351             } else if (seq.first == seq.last) {
       
   352                 body << "if (ch.unicode() == " << seq.first << ")" << brace;
       
   353             } else {
       
   354                 if (seq.last < maxInput)
       
   355                     body << "if (ch.unicode() >= " << seq.first
       
   356                          << " && ch.unicode() <= " << seq.last << ")" << brace;
       
   357                 else
       
   358                     body << "if (ch.unicode() >= " << seq.first << ")" << brace;
       
   359             }
       
   360             body.indent();
       
   361             if (embedFinalState) {
       
   362                 body << "token = " << dfa.at(seq.transition).symbol << ";";
       
   363                 body << "goto found;";
       
   364 
       
   365                 body.outdent();
       
   366                 body << "}";
       
   367             } else {
       
   368                 body << "goto state_" << seq.transition << ";";
       
   369                 body.outdent();
       
   370             }
       
   371         }
       
   372     } else {
       
   373         QList<InputType> keys = transitions.keys();
       
   374         qSort(keys);
       
   375 
       
   376         body << "switch (ch.unicode()) {";
       
   377         body.indent();
       
   378         for (int k = 0; k < keys.count(); ++k) {
       
   379             const InputType key = keys.at(k);
       
   380             const int trans = transitions.value(key);
       
   381 
       
   382             QString keyStr;
       
   383             if (key == '\\')
       
   384                 keyStr = QString("\'\\\\\'");
       
   385             else if (key >= 48 && key < 127)
       
   386                 keyStr = QString('\'') + QChar::fromLatin1(char(key)) + QChar('\'');
       
   387             else
       
   388                 keyStr = QString::number(key);
       
   389 
       
   390             if (k < keys.count() - 1
       
   391                 && transitions.value(keys.at(k + 1)) == trans) {
       
   392                 body << "case " << keyStr << ":";
       
   393             } else {
       
   394                 if (isSingleReferencedFinalState(trans)) {
       
   395                     body << "case " << keyStr << ": token = " << dfa.at(trans).symbol << "; goto found;";
       
   396                 } else {
       
   397                     body << "case " << keyStr << ": goto state_" << trans << ";";
       
   398                 }
       
   399             }
       
   400         }
       
   401         body.outdent();
       
   402         body << "}";
       
   403     }
       
   404 }
       
   405 
       
   406 QString Generator::generate()
       
   407 {
       
   408     Class klass(cfg.className);
       
   409     
       
   410     klass.addMember(Class::PublicMember, "QString input");
       
   411     klass.addMember(Class::PublicMember, "int pos");
       
   412     klass.addMember(Class::PublicMember, "int lexemStart");
       
   413     klass.addMember(Class::PublicMember, "int lexemLength");
       
   414     
       
   415     {
       
   416         CodeBlock body;
       
   417         body << "input = inp;";
       
   418         body << "pos = 0;";
       
   419         body << "lexemStart = 0;";
       
   420         body << "lexemLength = 0;";
       
   421         klass.addConstructor(Class::PublicMember, body, "const QString &inp");
       
   422     }
       
   423     
       
   424     {
       
   425         Function next("QChar", "next()");
       
   426         next.setInline(true);
       
   427         if (cfg.caseSensitivity == Qt::CaseSensitive)
       
   428             next.addBody("return (pos < input.length()) ? input.at(pos++) : QChar();");
       
   429         else
       
   430             next.addBody("return (pos < input.length()) ? input.at(pos++).toLower() : QChar();");
       
   431         klass.addMember(Class::PublicMember, next);
       
   432     }
       
   433     
       
   434     /*
       
   435     {
       
   436         Function lexem("QString", "lexem()");
       
   437         lexem.setConst(true);
       
   438         lexem.setInline(true);
       
   439         lexem.addBody("return input.mid(lexemStart, lexemLength);");
       
   440         klass.addMember(Class::PublicMember, lexem);
       
   441     }
       
   442     */
       
   443 
       
   444     for (int i = 0; i < dfa.count(); ++i)
       
   445         if (dfa.at(i).symbol.endsWith(QLatin1String("()"))) {
       
   446             Function handlerFunc("int", dfa.at(i).symbol);
       
   447             klass.addMember(Class::PublicMember, handlerFunc);
       
   448         }
       
   449 
       
   450     Function lexFunc;
       
   451     lexFunc.setReturnType("int");
       
   452     lexFunc.setName("lex()");
       
   453     
       
   454     CodeBlock body;
       
   455     body << "lexemStart = pos;";
       
   456     body << "lexemLength = 0;";
       
   457     body << "int lastAcceptingPos = -1;";
       
   458     body << "int token = -1;";
       
   459     body << "QChar ch;";
       
   460     body.addNewLine();
       
   461 
       
   462     backReferenceMap.clear();
       
   463     foreach (State s, dfa)
       
   464         foreach (int state, s.transitions)
       
   465             backReferenceMap[state]++;
       
   466 
       
   467     bool haveSingleReferencedFinalState = false;
       
   468 
       
   469     for (int i = 0; i < dfa.count(); ++i) {
       
   470         if (isSingleReferencedFinalState(i)) {
       
   471             haveSingleReferencedFinalState = true;
       
   472             continue;
       
   473         }
       
   474 
       
   475         if (i > 0)
       
   476             body << "state_" << i << ":";
       
   477         else
       
   478             body << "// initial state";
       
   479 
       
   480         body.indent();
       
   481 
       
   482         if (!dfa.at(i).symbol.isEmpty()) {
       
   483             body << "lastAcceptingPos = pos;";
       
   484             body << "token = " << dfa.at(i).symbol << ";";
       
   485         }
       
   486 
       
   487         body.outdent();
       
   488 
       
   489         body.indent();
       
   490         
       
   491         if (!dfa.at(i).transitions.isEmpty()) {
       
   492             body << "ch = next();";
       
   493             generateTransitions(body, dfa.at(i).transitions);
       
   494         }
       
   495         
       
   496         body << "goto out;";
       
   497         
       
   498         body.outdent();
       
   499     }
       
   500 
       
   501     if (haveSingleReferencedFinalState) {
       
   502         body << "found:";
       
   503         body << "lastAcceptingPos = pos;";
       
   504         body.addNewLine();
       
   505     }
       
   506 
       
   507     body << "out:";
       
   508     body << "if (lastAcceptingPos != -1) {";
       
   509     body.indent();
       
   510     body << "lexemLength = lastAcceptingPos - lexemStart;";
       
   511     body << "pos = lastAcceptingPos;";
       
   512     body.outdent();
       
   513     body << "}";
       
   514     body << "return token;";
       
   515     
       
   516     lexFunc.addBody(body);
       
   517     
       
   518     klass.addMember(Class::PublicMember, lexFunc);
       
   519 
       
   520     QString header;
       
   521     QFile headerFile(headerFileName);
       
   522     if (!headerFileName.isEmpty()
       
   523         && headerFile.exists()
       
   524         && headerFile.open(QIODevice::ReadOnly)) {
       
   525         header = QString::fromUtf8(headerFile.readAll());
       
   526     }
       
   527 
       
   528     header += QLatin1String("// auto generated. DO NOT EDIT.\n");
       
   529     
       
   530     return header + klass.declaration() + klass.definition();
       
   531 }
       
   532