util/lexgen/nfa.cpp
changeset 0 1918ee327afb
child 4 3b1da2848fc7
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     4 ** All rights reserved.
       
     5 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     6 **
       
     7 ** This file is part of the utils of the Qt Toolkit.
       
     8 **
       
     9 ** $QT_BEGIN_LICENSE:LGPL$
       
    10 ** No Commercial Usage
       
    11 ** This file contains pre-release code and may not be distributed.
       
    12 ** You may use this file in accordance with the terms and conditions
       
    13 ** contained in the Technology Preview License Agreement accompanying
       
    14 ** this package.
       
    15 **
       
    16 ** GNU Lesser General Public License Usage
       
    17 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    18 ** General Public License version 2.1 as published by the Free Software
       
    19 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    20 ** packaging of this file.  Please review the following information to
       
    21 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    23 **
       
    24 ** In addition, as a special exception, Nokia gives you certain additional
       
    25 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    27 **
       
    28 ** If you have questions regarding the use of this file, please contact
       
    29 ** Nokia at qt-info@nokia.com.
       
    30 **
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 ** $QT_END_LICENSE$
       
    39 **
       
    40 ****************************************************************************/
       
    41 #include "nfa.h"
       
    42 #include <QSet>
       
    43 #include <limits.h>
       
    44 
       
    45 NFA NFA::createSingleInputNFA(InputType input)
       
    46 {
       
    47     NFA result;
       
    48     result.initialize(2);
       
    49     result.addTransition(result.initialState, input, result.finalState);
       
    50     return result;
       
    51 }
       
    52 
       
    53 NFA NFA::createSymbolNFA(const QString &symbol)
       
    54 {
       
    55     NFA result = NFA::createSingleInputNFA(Epsilon);
       
    56     result.states[result.finalState].symbol = symbol;
       
    57     return result;
       
    58 }
       
    59 
       
    60 void NFA::initialize(int size)
       
    61 {
       
    62     states.resize(size);
       
    63     states.fill(State());
       
    64     initialState = 0;
       
    65     finalState = size - 1;
       
    66 }
       
    67 
       
    68 void NFA::addTransition(int from, InputType input, int to)
       
    69 {
       
    70     assertValidState(from);
       
    71     assertValidState(to);
       
    72 
       
    73     states[from].transitions.insertMulti(input, to);
       
    74 }
       
    75 
       
    76 void NFA::copyFrom(const NFA &other, int baseState)
       
    77 {
       
    78     assertValidState(baseState);
       
    79     assertValidState(baseState + other.states.count() - 1);
       
    80 
       
    81     for (int i = 0; i < other.states.count(); ++i) {
       
    82         State s = other.states.at(i);
       
    83 
       
    84         for (TransitionMap::Iterator it = s.transitions.begin(),
       
    85              end = s.transitions.end(); it != end; ++it)
       
    86             *it += baseState;
       
    87 
       
    88         states[baseState + i] = s;
       
    89     }
       
    90 }
       
    91 
       
    92 void NFA::initializeFromPair(const NFA &a, const NFA &b,
       
    93                              int *initialA, int *finalA,
       
    94                              int *initialB, int *finalB)
       
    95 {
       
    96     initialize(a.states.count() + b.states.count() + 2);
       
    97 
       
    98     int baseIdxA = 1;
       
    99     int baseIdxB = 1 + a.states.count();
       
   100 
       
   101     *initialA = a.initialState + baseIdxA;
       
   102     *finalA = a.finalState + baseIdxA;
       
   103 
       
   104     *initialB = b.initialState + baseIdxB;
       
   105     *finalB = b.finalState + baseIdxB;
       
   106 
       
   107     copyFrom(a, baseIdxA);
       
   108     copyFrom(b, baseIdxB);
       
   109 }
       
   110 
       
   111 NFA NFA::createAlternatingNFA(const NFA &a, const NFA &b)
       
   112 {
       
   113     NFA result;
       
   114 
       
   115     int newInitialA, newFinalA,
       
   116         newInitialB, newFinalB;
       
   117 
       
   118     result.initializeFromPair(a, b, &newInitialA, &newFinalA,
       
   119                               &newInitialB, &newFinalB);
       
   120 
       
   121     result.addTransition(result.initialState, Epsilon, newInitialA);
       
   122     result.addTransition(result.initialState, Epsilon, newInitialB);
       
   123 
       
   124     result.addTransition(newFinalA, Epsilon, result.finalState);
       
   125     result.addTransition(newFinalB, Epsilon, result.finalState);
       
   126 
       
   127     return result;
       
   128 }
       
   129 
       
   130 NFA NFA::createConcatenatingNFA(const NFA &a, const NFA &b)
       
   131 {
       
   132     NFA result;
       
   133 
       
   134     int initialA, finalA,
       
   135         initialB, finalB;
       
   136 
       
   137     result.initializeFromPair(a, b, &initialA, &finalA, &initialB, &finalB);
       
   138 
       
   139     result.addTransition(result.initialState, Epsilon, initialA);
       
   140     result.addTransition(finalA, Epsilon, initialB);
       
   141     result.addTransition(finalB, Epsilon, result.finalState);
       
   142     return result;
       
   143 }
       
   144 
       
   145 NFA NFA::createOptionalNFA(const NFA &a)
       
   146 {
       
   147     NFA result;
       
   148 
       
   149     result.initialize(a.states.count() + 2);
       
   150 
       
   151     int baseIdxA = 1;
       
   152     int initialA = a.initialState + baseIdxA;
       
   153     int finalA = a.finalState + baseIdxA;
       
   154 
       
   155     result.copyFrom(a, baseIdxA);
       
   156 
       
   157     result.addTransition(result.initialState, Epsilon, initialA);
       
   158     result.addTransition(result.initialState, Epsilon, result.finalState);
       
   159 
       
   160     result.addTransition(finalA, Epsilon, initialA);
       
   161     result.addTransition(finalA, Epsilon, result.finalState);
       
   162 
       
   163     return result;
       
   164 }
       
   165 
       
   166 NFA NFA::createStringNFA(const QByteArray &str)
       
   167 {
       
   168     NFA result;
       
   169     foreach (char c, str) {
       
   170         NFA ch = NFA::createSingleInputNFA(c);
       
   171         if (result.isEmpty())
       
   172             result = ch;
       
   173         else
       
   174             result = NFA::createConcatenatingNFA(result, ch);
       
   175     }
       
   176     return result;
       
   177 }
       
   178 
       
   179 NFA NFA::createSetNFA(const QSet<InputType> &set)
       
   180 {
       
   181     NFA result;
       
   182     result.initialize(set.count() + 2);
       
   183 
       
   184     int state = 1;
       
   185     for (QSet<InputType>::ConstIterator it = set.constBegin(), end = set.constEnd();
       
   186          it != end; ++it, ++state) {
       
   187         result.addTransition(result.initialState, Epsilon, state);
       
   188         result.addTransition(state, *it, result.finalState);
       
   189     }
       
   190 
       
   191     /*
       
   192     foreach (InputType input, set) {
       
   193         NFA ch = NFA::createSingleInputNFA(input);
       
   194         if (result.isEmpty())
       
   195             result = ch;
       
   196         else
       
   197             result = NFA::createAlternatingNFA(result, ch);
       
   198     }
       
   199     */
       
   200     return result;
       
   201 }
       
   202 
       
   203 NFA NFA::createZeroOrOneNFA(const NFA &a)
       
   204 {
       
   205     NFA epsilonNFA = createSingleInputNFA(Epsilon);
       
   206     return NFA::createAlternatingNFA(a, epsilonNFA);
       
   207 }
       
   208 
       
   209 NFA NFA::applyQuantity(const NFA &a, int minOccurrences, int maxOccurrences)
       
   210 {
       
   211     NFA result = a;
       
   212     NFA epsilonNFA = createSingleInputNFA(Epsilon);
       
   213 
       
   214     if (minOccurrences == 0) {
       
   215         result = NFA::createAlternatingNFA(result, epsilonNFA);
       
   216     } else {
       
   217         minOccurrences--;
       
   218     }
       
   219     maxOccurrences--;
       
   220 
       
   221     for (int i = 0; i < minOccurrences; ++i)
       
   222         result = NFA::createConcatenatingNFA(result, a);
       
   223 
       
   224     for (int i = minOccurrences; i < maxOccurrences; ++i)
       
   225         result = NFA::createConcatenatingNFA(result, NFA::createAlternatingNFA(a, epsilonNFA));
       
   226 
       
   227     return result;
       
   228 }
       
   229 
       
   230 void NFA::debug()
       
   231 {
       
   232     qDebug() << "NFA has" << states.count() << "states";
       
   233     qDebug() << "initial state is" << initialState;
       
   234     qDebug() << "final state is" << finalState;
       
   235 
       
   236     for (int i = 0; i < states.count(); ++i) {
       
   237         const State &s = states.at(i);
       
   238         for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
       
   239              end = s.transitions.constEnd(); it != end; ++it)
       
   240             qDebug() << "transition from state" << i << "to" << it.value() << "through"
       
   241                      << (it.key() == Epsilon ? QString("Epsilon") : QString(char(it.key())));
       
   242         if (!s.symbol.isEmpty())
       
   243             qDebug() << "State" << i << "leads to symbol" << s.symbol;
       
   244     }
       
   245 }
       
   246 
       
   247 // helper
       
   248 typedef QSet<int> DFAState;
       
   249 
       
   250 // that's a bad hash, but it's good enough for us
       
   251 // and it allows us to use the nice QHash API :)
       
   252 inline uint qHash(const DFAState &state)
       
   253 {
       
   254     uint val = 0;
       
   255     foreach (int s, state)
       
   256         val |= qHash(s);
       
   257     return val;
       
   258 }
       
   259 
       
   260 DFA NFA::toDFA() const
       
   261 {
       
   262     DFA result;
       
   263     result.reserve(states.count());
       
   264 
       
   265     QHash<QString, int> symbolReferenceCounts;
       
   266     {
       
   267         QSet<int> symbolStates;
       
   268         for (int i = 0; i < states.count(); ++i)
       
   269             if (!states.at(i).symbol.isEmpty())
       
   270                 symbolStates.insert(i);
       
   271 
       
   272         QHash<int, QString> epsilonStates;
       
   273         for (int i = 0; i < states.count(); ++i) {
       
   274             const State &s = states.at(i);
       
   275             for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
       
   276                  transition != end; ++transition)
       
   277                 if (transition.key() == Epsilon && symbolStates.contains(transition.value()))
       
   278                     epsilonStates.insert(i, states.at(transition.value()).symbol);
       
   279         }
       
   280 
       
   281         int lastCount;
       
   282         do {
       
   283             lastCount = epsilonStates.count();
       
   284             for (int i = 0; i < states.count(); ++i) {
       
   285                 const State &s = states.at(i);
       
   286                 for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
       
   287                      transition != end; ++transition)
       
   288                     if (transition.key() == Epsilon && epsilonStates.contains(transition.value()))
       
   289                         epsilonStates.insert(i, epsilonStates.value(transition.value()));
       
   290             }
       
   291 
       
   292         } while (lastCount != epsilonStates.count());
       
   293 
       
   294         for (int i = 0; i < states.count(); ++i) {
       
   295             const State &s = states.at(i);
       
   296             for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
       
   297                  transition != end; ++transition) {
       
   298                 if (transition.key() == Epsilon)
       
   299                     continue;
       
   300                 if (symbolStates.contains(transition.value())) {
       
   301                     const QString symbol = states.at(transition.value()).symbol;
       
   302                     symbolReferenceCounts[symbol]++;
       
   303                 } else if (epsilonStates.contains(transition.value())) {
       
   304                     const QString symbol = epsilonStates.value(transition.value());
       
   305                     symbolReferenceCounts[symbol]++;
       
   306                 }
       
   307             }
       
   308         }
       
   309         /*
       
   310         for (QHash<QString, int>::ConstIterator symIt = symbolReferenceCounts.constBegin(), symEnd = symbolReferenceCounts.constEnd();
       
   311              symIt != symEnd; ++symIt)
       
   312             qDebug() << "symbol" << symIt.key() << "is reached" << symIt.value() << "times";
       
   313             */
       
   314     }
       
   315 
       
   316     
       
   317     QSet<InputType> validInput;
       
   318     foreach (const State &s, states)
       
   319         for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
       
   320              end = s.transitions.constEnd(); it != end; ++it)
       
   321             if (it.key() != Epsilon)
       
   322                 validInput.insert(it.key());
       
   323 
       
   324     // A DFA state can consist of multiple NFA states.
       
   325     // the dfaStateMap maps from these to the actual
       
   326     // state index within the resulting DFA vector
       
   327     QHash<DFAState, int> dfaStateMap;
       
   328     QStack<DFAState> pendingDFAStates;
       
   329 
       
   330     DFAState startState = epsilonClosure(QSet<int>() << initialState);
       
   331 
       
   332     result.resize(1);
       
   333     dfaStateMap.insert(startState, 0);
       
   334 
       
   335     pendingDFAStates.push(startState);
       
   336 
       
   337     while (!pendingDFAStates.isEmpty()) {
       
   338         DFAState state = pendingDFAStates.pop();
       
   339 //        qDebug() << "processing" << state << "from the stack of pending states";
       
   340 
       
   341         foreach (InputType input, validInput) {
       
   342 
       
   343             QSet<int> reachableStates;
       
   344 
       
   345             foreach (int nfaState, state) {
       
   346                 const TransitionMap &transitions = states.at(nfaState).transitions;
       
   347                 TransitionMap::ConstIterator it = transitions.find(input);
       
   348                 while (it != transitions.constEnd() && it.key() == input) {
       
   349                     reachableStates.insert(it.value());
       
   350                     ++it;
       
   351                 }
       
   352             }
       
   353 
       
   354             if (reachableStates.isEmpty())
       
   355                 continue;
       
   356 
       
   357 //            qDebug() << "can reach" << reachableStates << "from input" << char(input);
       
   358 
       
   359             QSet<int> closure = epsilonClosure(reachableStates);
       
   360 
       
   361 //            qDebug() << "closure is" << closure;
       
   362 
       
   363             if (!dfaStateMap.contains(closure)) {
       
   364                 int dfaState = result.count();
       
   365                 result.append(State());
       
   366 
       
   367                 QString symbol;
       
   368                 int refCount = INT_MAX;
       
   369                 foreach (int nfaState, closure)
       
   370                     if (!states.at(nfaState).symbol.isEmpty()) {
       
   371 //                        qDebug() << "closure also contains symbol" << states.at(nfaState).symbol;
       
   372                         QString candidate = states.at(nfaState).symbol;
       
   373                         int candidateRefCount =symbolReferenceCounts.value(candidate, INT_MAX);
       
   374                         if (candidateRefCount < refCount) {
       
   375                             refCount = candidateRefCount;
       
   376                             symbol = candidate;
       
   377                         }
       
   378                     }
       
   379                 if (!symbol.isEmpty())
       
   380                     result.last().symbol = symbol;
       
   381 
       
   382                 dfaStateMap.insert(closure, dfaState);
       
   383 
       
   384                 Q_ASSERT(!pendingDFAStates.contains(closure));
       
   385                 pendingDFAStates.prepend(closure);
       
   386             }
       
   387 
       
   388             result[dfaStateMap.value(state)].transitions.insert(input, dfaStateMap.value(closure));
       
   389         }
       
   390     }
       
   391 
       
   392     return result;
       
   393 }
       
   394 
       
   395 QSet<int> NFA::epsilonClosure(const QSet<int> &initialClosure) const
       
   396 {
       
   397     QSet<int> closure = initialClosure;
       
   398     closure.reserve(closure.count() * 4);
       
   399 
       
   400     QStack<int> stateStack;
       
   401     stateStack.resize(closure.count());
       
   402     qCopy(closure.constBegin(), closure.constEnd(), stateStack.begin());
       
   403 
       
   404     while (!stateStack.isEmpty()) {
       
   405         int t = stateStack.pop();
       
   406         const TransitionMap &transitions = states.at(t).transitions;
       
   407         TransitionMap::ConstIterator it = transitions.find(Epsilon);
       
   408         while (it != transitions.constEnd() && it.key() == Epsilon) {
       
   409             const int u = it.value();
       
   410             if (!closure.contains(u)) {
       
   411                 closure.insert(u);
       
   412                 stateStack.push(u);
       
   413             }
       
   414             ++it;
       
   415         }
       
   416     }
       
   417 
       
   418     return closure;
       
   419 }
       
   420 
       
   421 void NFA::setTerminationSymbol(const QString &symbol)
       
   422 {
       
   423     states[finalState].symbol = symbol;
       
   424 }
       
   425 
       
   426 void DFA::debug() const
       
   427 {
       
   428     qDebug() << "DFA has" << count() << "states";
       
   429 
       
   430     for (int i = 0; i < count(); ++i) {
       
   431         const State &s = at(i);
       
   432         if (s.transitions.isEmpty()) {
       
   433             qDebug() << "State" << i << "has no transitions";
       
   434         } else {
       
   435             for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
       
   436                  end = s.transitions.constEnd(); it != end; ++it)
       
   437                 qDebug() << "transition from state" << i << "to" << it.value() << "through"
       
   438                          << (it.key() == Epsilon ? QString("Epsilon") : QString(char(it.key())));
       
   439         }
       
   440         if (!s.symbol.isEmpty())
       
   441             qDebug() << "State" << i << "leads to symbol" << s.symbol;
       
   442     }
       
   443 
       
   444 }
       
   445 
       
   446 DFA DFA::minimize() const
       
   447 {
       
   448     QVector<bool> inequivalentStates(count() * count());
       
   449     inequivalentStates.fill(false);
       
   450 
       
   451     for (int i = 0; i < count(); ++i)
       
   452         for (int j = 0; j < i; ++j) {
       
   453             if (i != j && at(i).symbol != at(j).symbol)
       
   454                 inequivalentStates[i * count() + j] = true;
       
   455         }
       
   456 
       
   457     bool done;
       
   458     do {
       
   459         done = true;
       
   460         for (int i = 0; i < count(); ++i)
       
   461             for (int j = 0; j < count(); ++j) {
       
   462                 if (i == j)
       
   463                     continue;
       
   464 
       
   465                 if (inequivalentStates[i * count() + j])
       
   466                     continue;
       
   467 
       
   468                 if (at(i).transitions.keys() != at(j).transitions.keys()) {
       
   469                     inequivalentStates[i * count() + j] = true;
       
   470                     done = false;
       
   471                     continue;
       
   472                 }
       
   473 
       
   474                 foreach (InputType a, at(i).transitions.keys()) {
       
   475                     int r = at(i).transitions.value(a, -1);
       
   476                     if (r == -1)
       
   477                         continue;
       
   478                     int s = at(j).transitions.value(a, -1);
       
   479                     if (s == -1)
       
   480                         continue;
       
   481 
       
   482                     if (inequivalentStates[r * count() + s]
       
   483                         || r == s) {
       
   484                         inequivalentStates[i * count() + j] = true;
       
   485                         done = false;
       
   486                         break;
       
   487                     }
       
   488                 }
       
   489             }
       
   490     } while (!done);
       
   491 
       
   492     QHash<int, int> statesToEliminate;
       
   493     for (int i = 0; i < count(); ++i)
       
   494         for (int j = 0; j < i; ++j)
       
   495             if (!inequivalentStates[i * count() + j]) {
       
   496                 statesToEliminate.insertMulti(i, j);
       
   497             }
       
   498 
       
   499     /*
       
   500     qDebug() << "states to eliminiate:" << statesToEliminate.count();;
       
   501     qDebug() << "merging" << statesToEliminate;
       
   502     debug();
       
   503     */
       
   504 
       
   505     return *this;
       
   506 }
       
   507 
       
   508