symbian-qemu-0.9.1-12/python-2.6.1/Parser/spark.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 #  Copyright (c) 1998-2002 John Aycock
       
     2 #
       
     3 #  Permission is hereby granted, free of charge, to any person obtaining
       
     4 #  a copy of this software and associated documentation files (the
       
     5 #  "Software"), to deal in the Software without restriction, including
       
     6 #  without limitation the rights to use, copy, modify, merge, publish,
       
     7 #  distribute, sublicense, and/or sell copies of the Software, and to
       
     8 #  permit persons to whom the Software is furnished to do so, subject to
       
     9 #  the following conditions:
       
    10 #
       
    11 #  The above copyright notice and this permission notice shall be
       
    12 #  included in all copies or substantial portions of the Software.
       
    13 #
       
    14 #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
       
    15 #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
       
    16 #  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
       
    17 #  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
       
    18 #  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
       
    19 #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
       
    20 #  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
       
    21 
       
    22 __version__ = 'SPARK-0.7 (pre-alpha-5)'
       
    23 
       
    24 import re
       
    25 import string
       
    26 
       
    27 def _namelist(instance):
       
    28     namelist, namedict, classlist = [], {}, [instance.__class__]
       
    29     for c in classlist:
       
    30         for b in c.__bases__:
       
    31             classlist.append(b)
       
    32         for name in c.__dict__.keys():
       
    33             if not namedict.has_key(name):
       
    34                 namelist.append(name)
       
    35                 namedict[name] = 1
       
    36     return namelist
       
    37 
       
    38 class GenericScanner:
       
    39     def __init__(self, flags=0):
       
    40         pattern = self.reflect()
       
    41         self.re = re.compile(pattern, re.VERBOSE|flags)
       
    42 
       
    43         self.index2func = {}
       
    44         for name, number in self.re.groupindex.items():
       
    45             self.index2func[number-1] = getattr(self, 't_' + name)
       
    46 
       
    47     def makeRE(self, name):
       
    48         doc = getattr(self, name).__doc__
       
    49         rv = '(?P<%s>%s)' % (name[2:], doc)
       
    50         return rv
       
    51 
       
    52     def reflect(self):
       
    53         rv = []
       
    54         for name in _namelist(self):
       
    55             if name[:2] == 't_' and name != 't_default':
       
    56                 rv.append(self.makeRE(name))
       
    57 
       
    58         rv.append(self.makeRE('t_default'))
       
    59         return string.join(rv, '|')
       
    60 
       
    61     def error(self, s, pos):
       
    62         print "Lexical error at position %s" % pos
       
    63         raise SystemExit
       
    64 
       
    65     def tokenize(self, s):
       
    66         pos = 0
       
    67         n = len(s)
       
    68         while pos < n:
       
    69             m = self.re.match(s, pos)
       
    70             if m is None:
       
    71                 self.error(s, pos)
       
    72 
       
    73             groups = m.groups()
       
    74             for i in range(len(groups)):
       
    75                 if groups[i] and self.index2func.has_key(i):
       
    76                     self.index2func[i](groups[i])
       
    77             pos = m.end()
       
    78 
       
    79     def t_default(self, s):
       
    80         r'( . | \n )+'
       
    81         print "Specification error: unmatched input"
       
    82         raise SystemExit
       
    83 
       
    84 #
       
    85 #  Extracted from GenericParser and made global so that [un]picking works.
       
    86 #
       
    87 class _State:
       
    88     def __init__(self, stateno, items):
       
    89         self.T, self.complete, self.items = [], [], items
       
    90         self.stateno = stateno
       
    91 
       
    92 class GenericParser:
       
    93     #
       
    94     #  An Earley parser, as per J. Earley, "An Efficient Context-Free
       
    95     #  Parsing Algorithm", CACM 13(2), pp. 94-102.  Also J. C. Earley,
       
    96     #  "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis,
       
    97     #  Carnegie-Mellon University, August 1968.  New formulation of
       
    98     #  the parser according to J. Aycock, "Practical Earley Parsing
       
    99     #  and the SPARK Toolkit", Ph.D. thesis, University of Victoria,
       
   100     #  2001, and J. Aycock and R. N. Horspool, "Practical Earley
       
   101     #  Parsing", unpublished paper, 2001.
       
   102     #
       
   103 
       
   104     def __init__(self, start):
       
   105         self.rules = {}
       
   106         self.rule2func = {}
       
   107         self.rule2name = {}
       
   108         self.collectRules()
       
   109         self.augment(start)
       
   110         self.ruleschanged = 1
       
   111 
       
   112     _NULLABLE = '\e_'
       
   113     _START = 'START'
       
   114     _BOF = '|-'
       
   115 
       
   116     #
       
   117     #  When pickling, take the time to generate the full state machine;
       
   118     #  some information is then extraneous, too.  Unfortunately we
       
   119     #  can't save the rule2func map.
       
   120     #
       
   121     def __getstate__(self):
       
   122         if self.ruleschanged:
       
   123             #
       
   124             #  XXX - duplicated from parse()
       
   125             #
       
   126             self.computeNull()
       
   127             self.newrules = {}
       
   128             self.new2old = {}
       
   129             self.makeNewRules()
       
   130             self.ruleschanged = 0
       
   131             self.edges, self.cores = {}, {}
       
   132             self.states = { 0: self.makeState0() }
       
   133             self.makeState(0, self._BOF)
       
   134         #
       
   135         #  XXX - should find a better way to do this..
       
   136         #
       
   137         changes = 1
       
   138         while changes:
       
   139             changes = 0
       
   140             for k, v in self.edges.items():
       
   141                 if v is None:
       
   142                     state, sym = k
       
   143                     if self.states.has_key(state):
       
   144                         self.goto(state, sym)
       
   145                         changes = 1
       
   146         rv = self.__dict__.copy()
       
   147         for s in self.states.values():
       
   148             del s.items
       
   149         del rv['rule2func']
       
   150         del rv['nullable']
       
   151         del rv['cores']
       
   152         return rv
       
   153 
       
   154     def __setstate__(self, D):
       
   155         self.rules = {}
       
   156         self.rule2func = {}
       
   157         self.rule2name = {}
       
   158         self.collectRules()
       
   159         start = D['rules'][self._START][0][1][1]        # Blech.
       
   160         self.augment(start)
       
   161         D['rule2func'] = self.rule2func
       
   162         D['makeSet'] = self.makeSet_fast
       
   163         self.__dict__ = D
       
   164 
       
   165     #
       
   166     #  A hook for GenericASTBuilder and GenericASTMatcher.  Mess
       
   167     #  thee not with this; nor shall thee toucheth the _preprocess
       
   168     #  argument to addRule.
       
   169     #
       
   170     def preprocess(self, rule, func):       return rule, func
       
   171 
       
   172     def addRule(self, doc, func, _preprocess=1):
       
   173         fn = func
       
   174         rules = string.split(doc)
       
   175 
       
   176         index = []
       
   177         for i in range(len(rules)):
       
   178             if rules[i] == '::=':
       
   179                 index.append(i-1)
       
   180         index.append(len(rules))
       
   181 
       
   182         for i in range(len(index)-1):
       
   183             lhs = rules[index[i]]
       
   184             rhs = rules[index[i]+2:index[i+1]]
       
   185             rule = (lhs, tuple(rhs))
       
   186 
       
   187             if _preprocess:
       
   188                 rule, fn = self.preprocess(rule, func)
       
   189 
       
   190             if self.rules.has_key(lhs):
       
   191                 self.rules[lhs].append(rule)
       
   192             else:
       
   193                 self.rules[lhs] = [ rule ]
       
   194             self.rule2func[rule] = fn
       
   195             self.rule2name[rule] = func.__name__[2:]
       
   196         self.ruleschanged = 1
       
   197 
       
   198     def collectRules(self):
       
   199         for name in _namelist(self):
       
   200             if name[:2] == 'p_':
       
   201                 func = getattr(self, name)
       
   202                 doc = func.__doc__
       
   203                 self.addRule(doc, func)
       
   204 
       
   205     def augment(self, start):
       
   206         rule = '%s ::= %s %s' % (self._START, self._BOF, start)
       
   207         self.addRule(rule, lambda args: args[1], 0)
       
   208 
       
   209     def computeNull(self):
       
   210         self.nullable = {}
       
   211         tbd = []
       
   212 
       
   213         for rulelist in self.rules.values():
       
   214             lhs = rulelist[0][0]
       
   215             self.nullable[lhs] = 0
       
   216             for rule in rulelist:
       
   217                 rhs = rule[1]
       
   218                 if len(rhs) == 0:
       
   219                     self.nullable[lhs] = 1
       
   220                     continue
       
   221                 #
       
   222                 #  We only need to consider rules which
       
   223                 #  consist entirely of nonterminal symbols.
       
   224                 #  This should be a savings on typical
       
   225                 #  grammars.
       
   226                 #
       
   227                 for sym in rhs:
       
   228                     if not self.rules.has_key(sym):
       
   229                         break
       
   230                 else:
       
   231                     tbd.append(rule)
       
   232         changes = 1
       
   233         while changes:
       
   234             changes = 0
       
   235             for lhs, rhs in tbd:
       
   236                 if self.nullable[lhs]:
       
   237                     continue
       
   238                 for sym in rhs:
       
   239                     if not self.nullable[sym]:
       
   240                         break
       
   241                 else:
       
   242                     self.nullable[lhs] = 1
       
   243                     changes = 1
       
   244 
       
   245     def makeState0(self):
       
   246         s0 = _State(0, [])
       
   247         for rule in self.newrules[self._START]:
       
   248             s0.items.append((rule, 0))
       
   249         return s0
       
   250 
       
   251     def finalState(self, tokens):
       
   252         #
       
   253         #  Yuck.
       
   254         #
       
   255         if len(self.newrules[self._START]) == 2 and len(tokens) == 0:
       
   256             return 1
       
   257         start = self.rules[self._START][0][1][1]
       
   258         return self.goto(1, start)
       
   259 
       
   260     def makeNewRules(self):
       
   261         worklist = []
       
   262         for rulelist in self.rules.values():
       
   263             for rule in rulelist:
       
   264                 worklist.append((rule, 0, 1, rule))
       
   265 
       
   266         for rule, i, candidate, oldrule in worklist:
       
   267             lhs, rhs = rule
       
   268             n = len(rhs)
       
   269             while i < n:
       
   270                 sym = rhs[i]
       
   271                 if not self.rules.has_key(sym) or \
       
   272                    not self.nullable[sym]:
       
   273                     candidate = 0
       
   274                     i = i + 1
       
   275                     continue
       
   276 
       
   277                 newrhs = list(rhs)
       
   278                 newrhs[i] = self._NULLABLE+sym
       
   279                 newrule = (lhs, tuple(newrhs))
       
   280                 worklist.append((newrule, i+1,
       
   281                                  candidate, oldrule))
       
   282                 candidate = 0
       
   283                 i = i + 1
       
   284             else:
       
   285                 if candidate:
       
   286                     lhs = self._NULLABLE+lhs
       
   287                     rule = (lhs, rhs)
       
   288                 if self.newrules.has_key(lhs):
       
   289                     self.newrules[lhs].append(rule)
       
   290                 else:
       
   291                     self.newrules[lhs] = [ rule ]
       
   292                 self.new2old[rule] = oldrule
       
   293 
       
   294     def typestring(self, token):
       
   295         return None
       
   296 
       
   297     def error(self, token):
       
   298         print "Syntax error at or near `%s' token" % token
       
   299         raise SystemExit
       
   300 
       
   301     def parse(self, tokens):
       
   302         sets = [ [(1,0), (2,0)] ]
       
   303         self.links = {}
       
   304 
       
   305         if self.ruleschanged:
       
   306             self.computeNull()
       
   307             self.newrules = {}
       
   308             self.new2old = {}
       
   309             self.makeNewRules()
       
   310             self.ruleschanged = 0
       
   311             self.edges, self.cores = {}, {}
       
   312             self.states = { 0: self.makeState0() }
       
   313             self.makeState(0, self._BOF)
       
   314 
       
   315         for i in xrange(len(tokens)):
       
   316             sets.append([])
       
   317 
       
   318             if sets[i] == []:
       
   319                 break
       
   320             self.makeSet(tokens[i], sets, i)
       
   321         else:
       
   322             sets.append([])
       
   323             self.makeSet(None, sets, len(tokens))
       
   324 
       
   325         #_dump(tokens, sets, self.states)
       
   326 
       
   327         finalitem = (self.finalState(tokens), 0)
       
   328         if finalitem not in sets[-2]:
       
   329             if len(tokens) > 0:
       
   330                 self.error(tokens[i-1])
       
   331             else:
       
   332                 self.error(None)
       
   333 
       
   334         return self.buildTree(self._START, finalitem,
       
   335                               tokens, len(sets)-2)
       
   336 
       
   337     def isnullable(self, sym):
       
   338         #
       
   339         #  For symbols in G_e only.  If we weren't supporting 1.5,
       
   340         #  could just use sym.startswith().
       
   341         #
       
   342         return self._NULLABLE == sym[0:len(self._NULLABLE)]
       
   343 
       
   344     def skip(self, (lhs, rhs), pos=0):
       
   345         n = len(rhs)
       
   346         while pos < n:
       
   347             if not self.isnullable(rhs[pos]):
       
   348                 break
       
   349             pos = pos + 1
       
   350         return pos
       
   351 
       
   352     def makeState(self, state, sym):
       
   353         assert sym is not None
       
   354         #
       
   355         #  Compute \epsilon-kernel state's core and see if
       
   356         #  it exists already.
       
   357         #
       
   358         kitems = []
       
   359         for rule, pos in self.states[state].items:
       
   360             lhs, rhs = rule
       
   361             if rhs[pos:pos+1] == (sym,):
       
   362                 kitems.append((rule, self.skip(rule, pos+1)))
       
   363         core = kitems
       
   364 
       
   365         core.sort()
       
   366         tcore = tuple(core)
       
   367         if self.cores.has_key(tcore):
       
   368             return self.cores[tcore]
       
   369         #
       
   370         #  Nope, doesn't exist.  Compute it and the associated
       
   371         #  \epsilon-nonkernel state together; we'll need it right away.
       
   372         #
       
   373         k = self.cores[tcore] = len(self.states)
       
   374         K, NK = _State(k, kitems), _State(k+1, [])
       
   375         self.states[k] = K
       
   376         predicted = {}
       
   377 
       
   378         edges = self.edges
       
   379         rules = self.newrules
       
   380         for X in K, NK:
       
   381             worklist = X.items
       
   382             for item in worklist:
       
   383                 rule, pos = item
       
   384                 lhs, rhs = rule
       
   385                 if pos == len(rhs):
       
   386                     X.complete.append(rule)
       
   387                     continue
       
   388 
       
   389                 nextSym = rhs[pos]
       
   390                 key = (X.stateno, nextSym)
       
   391                 if not rules.has_key(nextSym):
       
   392                     if not edges.has_key(key):
       
   393                         edges[key] = None
       
   394                         X.T.append(nextSym)
       
   395                 else:
       
   396                     edges[key] = None
       
   397                     if not predicted.has_key(nextSym):
       
   398                         predicted[nextSym] = 1
       
   399                         for prule in rules[nextSym]:
       
   400                             ppos = self.skip(prule)
       
   401                             new = (prule, ppos)
       
   402                             NK.items.append(new)
       
   403             #
       
   404             #  Problem: we know K needs generating, but we
       
   405             #  don't yet know about NK.  Can't commit anything
       
   406             #  regarding NK to self.edges until we're sure.  Should
       
   407             #  we delay committing on both K and NK to avoid this
       
   408             #  hacky code?  This creates other problems..
       
   409             #
       
   410             if X is K:
       
   411                 edges = {}
       
   412 
       
   413         if NK.items == []:
       
   414             return k
       
   415 
       
   416         #
       
   417         #  Check for \epsilon-nonkernel's core.  Unfortunately we
       
   418         #  need to know the entire set of predicted nonterminals
       
   419         #  to do this without accidentally duplicating states.
       
   420         #
       
   421         core = predicted.keys()
       
   422         core.sort()
       
   423         tcore = tuple(core)
       
   424         if self.cores.has_key(tcore):
       
   425             self.edges[(k, None)] = self.cores[tcore]
       
   426             return k
       
   427 
       
   428         nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno
       
   429         self.edges.update(edges)
       
   430         self.states[nk] = NK
       
   431         return k
       
   432 
       
   433     def goto(self, state, sym):
       
   434         key = (state, sym)
       
   435         if not self.edges.has_key(key):
       
   436             #
       
   437             #  No transitions from state on sym.
       
   438             #
       
   439             return None
       
   440 
       
   441         rv = self.edges[key]
       
   442         if rv is None:
       
   443             #
       
   444             #  Target state isn't generated yet.  Remedy this.
       
   445             #
       
   446             rv = self.makeState(state, sym)
       
   447             self.edges[key] = rv
       
   448         return rv
       
   449 
       
   450     def gotoT(self, state, t):
       
   451         return [self.goto(state, t)]
       
   452 
       
   453     def gotoST(self, state, st):
       
   454         rv = []
       
   455         for t in self.states[state].T:
       
   456             if st == t:
       
   457                 rv.append(self.goto(state, t))
       
   458         return rv
       
   459 
       
   460     def add(self, set, item, i=None, predecessor=None, causal=None):
       
   461         if predecessor is None:
       
   462             if item not in set:
       
   463                 set.append(item)
       
   464         else:
       
   465             key = (item, i)
       
   466             if item not in set:
       
   467                 self.links[key] = []
       
   468                 set.append(item)
       
   469             self.links[key].append((predecessor, causal))
       
   470 
       
   471     def makeSet(self, token, sets, i):
       
   472         cur, next = sets[i], sets[i+1]
       
   473 
       
   474         ttype = token is not None and self.typestring(token) or None
       
   475         if ttype is not None:
       
   476             fn, arg = self.gotoT, ttype
       
   477         else:
       
   478             fn, arg = self.gotoST, token
       
   479 
       
   480         for item in cur:
       
   481             ptr = (item, i)
       
   482             state, parent = item
       
   483             add = fn(state, arg)
       
   484             for k in add:
       
   485                 if k is not None:
       
   486                     self.add(next, (k, parent), i+1, ptr)
       
   487                     nk = self.goto(k, None)
       
   488                     if nk is not None:
       
   489                         self.add(next, (nk, i+1))
       
   490 
       
   491             if parent == i:
       
   492                 continue
       
   493 
       
   494             for rule in self.states[state].complete:
       
   495                 lhs, rhs = rule
       
   496                 for pitem in sets[parent]:
       
   497                     pstate, pparent = pitem
       
   498                     k = self.goto(pstate, lhs)
       
   499                     if k is not None:
       
   500                         why = (item, i, rule)
       
   501                         pptr = (pitem, parent)
       
   502                         self.add(cur, (k, pparent),
       
   503                                  i, pptr, why)
       
   504                         nk = self.goto(k, None)
       
   505                         if nk is not None:
       
   506                             self.add(cur, (nk, i))
       
   507 
       
   508     def makeSet_fast(self, token, sets, i):
       
   509         #
       
   510         #  Call *only* when the entire state machine has been built!
       
   511         #  It relies on self.edges being filled in completely, and
       
   512         #  then duplicates and inlines code to boost speed at the
       
   513         #  cost of extreme ugliness.
       
   514         #
       
   515         cur, next = sets[i], sets[i+1]
       
   516         ttype = token is not None and self.typestring(token) or None
       
   517 
       
   518         for item in cur:
       
   519             ptr = (item, i)
       
   520             state, parent = item
       
   521             if ttype is not None:
       
   522                 k = self.edges.get((state, ttype), None)
       
   523                 if k is not None:
       
   524                     #self.add(next, (k, parent), i+1, ptr)
       
   525                     #INLINED --v
       
   526                     new = (k, parent)
       
   527                     key = (new, i+1)
       
   528                     if new not in next:
       
   529                         self.links[key] = []
       
   530                         next.append(new)
       
   531                     self.links[key].append((ptr, None))
       
   532                     #INLINED --^
       
   533                     #nk = self.goto(k, None)
       
   534                     nk = self.edges.get((k, None), None)
       
   535                     if nk is not None:
       
   536                         #self.add(next, (nk, i+1))
       
   537                         #INLINED --v
       
   538                         new = (nk, i+1)
       
   539                         if new not in next:
       
   540                             next.append(new)
       
   541                         #INLINED --^
       
   542             else:
       
   543                 add = self.gotoST(state, token)
       
   544                 for k in add:
       
   545                     if k is not None:
       
   546                         self.add(next, (k, parent), i+1, ptr)
       
   547                         #nk = self.goto(k, None)
       
   548                         nk = self.edges.get((k, None), None)
       
   549                         if nk is not None:
       
   550                             self.add(next, (nk, i+1))
       
   551 
       
   552             if parent == i:
       
   553                 continue
       
   554 
       
   555             for rule in self.states[state].complete:
       
   556                 lhs, rhs = rule
       
   557                 for pitem in sets[parent]:
       
   558                     pstate, pparent = pitem
       
   559                     #k = self.goto(pstate, lhs)
       
   560                     k = self.edges.get((pstate, lhs), None)
       
   561                     if k is not None:
       
   562                         why = (item, i, rule)
       
   563                         pptr = (pitem, parent)
       
   564                         #self.add(cur, (k, pparent),
       
   565                         #        i, pptr, why)
       
   566                         #INLINED --v
       
   567                         new = (k, pparent)
       
   568                         key = (new, i)
       
   569                         if new not in cur:
       
   570                             self.links[key] = []
       
   571                             cur.append(new)
       
   572                         self.links[key].append((pptr, why))
       
   573                         #INLINED --^
       
   574                         #nk = self.goto(k, None)
       
   575                         nk = self.edges.get((k, None), None)
       
   576                         if nk is not None:
       
   577                             #self.add(cur, (nk, i))
       
   578                             #INLINED --v
       
   579                             new = (nk, i)
       
   580                             if new not in cur:
       
   581                                 cur.append(new)
       
   582                             #INLINED --^
       
   583 
       
   584     def predecessor(self, key, causal):
       
   585         for p, c in self.links[key]:
       
   586             if c == causal:
       
   587                 return p
       
   588         assert 0
       
   589 
       
   590     def causal(self, key):
       
   591         links = self.links[key]
       
   592         if len(links) == 1:
       
   593             return links[0][1]
       
   594         choices = []
       
   595         rule2cause = {}
       
   596         for p, c in links:
       
   597             rule = c[2]
       
   598             choices.append(rule)
       
   599             rule2cause[rule] = c
       
   600         return rule2cause[self.ambiguity(choices)]
       
   601 
       
   602     def deriveEpsilon(self, nt):
       
   603         if len(self.newrules[nt]) > 1:
       
   604             rule = self.ambiguity(self.newrules[nt])
       
   605         else:
       
   606             rule = self.newrules[nt][0]
       
   607         #print rule
       
   608 
       
   609         rhs = rule[1]
       
   610         attr = [None] * len(rhs)
       
   611 
       
   612         for i in range(len(rhs)-1, -1, -1):
       
   613             attr[i] = self.deriveEpsilon(rhs[i])
       
   614         return self.rule2func[self.new2old[rule]](attr)
       
   615 
       
   616     def buildTree(self, nt, item, tokens, k):
       
   617         state, parent = item
       
   618 
       
   619         choices = []
       
   620         for rule in self.states[state].complete:
       
   621             if rule[0] == nt:
       
   622                 choices.append(rule)
       
   623         rule = choices[0]
       
   624         if len(choices) > 1:
       
   625             rule = self.ambiguity(choices)
       
   626         #print rule
       
   627 
       
   628         rhs = rule[1]
       
   629         attr = [None] * len(rhs)
       
   630 
       
   631         for i in range(len(rhs)-1, -1, -1):
       
   632             sym = rhs[i]
       
   633             if not self.newrules.has_key(sym):
       
   634                 if sym != self._BOF:
       
   635                     attr[i] = tokens[k-1]
       
   636                     key = (item, k)
       
   637                     item, k = self.predecessor(key, None)
       
   638             #elif self.isnullable(sym):
       
   639             elif self._NULLABLE == sym[0:len(self._NULLABLE)]:
       
   640                 attr[i] = self.deriveEpsilon(sym)
       
   641             else:
       
   642                 key = (item, k)
       
   643                 why = self.causal(key)
       
   644                 attr[i] = self.buildTree(sym, why[0],
       
   645                                          tokens, why[1])
       
   646                 item, k = self.predecessor(key, why)
       
   647         return self.rule2func[self.new2old[rule]](attr)
       
   648 
       
   649     def ambiguity(self, rules):
       
   650         #
       
   651         #  XXX - problem here and in collectRules() if the same rule
       
   652         #        appears in >1 method.  Also undefined results if rules
       
   653         #        causing the ambiguity appear in the same method.
       
   654         #
       
   655         sortlist = []
       
   656         name2index = {}
       
   657         for i in range(len(rules)):
       
   658             lhs, rhs = rule = rules[i]
       
   659             name = self.rule2name[self.new2old[rule]]
       
   660             sortlist.append((len(rhs), name))
       
   661             name2index[name] = i
       
   662         sortlist.sort()
       
   663         list = map(lambda (a,b): b, sortlist)
       
   664         return rules[name2index[self.resolve(list)]]
       
   665 
       
   666     def resolve(self, list):
       
   667         #
       
   668         #  Resolve ambiguity in favor of the shortest RHS.
       
   669         #  Since we walk the tree from the top down, this
       
   670         #  should effectively resolve in favor of a "shift".
       
   671         #
       
   672         return list[0]
       
   673 
       
   674 #
       
   675 #  GenericASTBuilder automagically constructs a concrete/abstract syntax tree
       
   676 #  for a given input.  The extra argument is a class (not an instance!)
       
   677 #  which supports the "__setslice__" and "__len__" methods.
       
   678 #
       
   679 #  XXX - silently overrides any user code in methods.
       
   680 #
       
   681 
       
   682 class GenericASTBuilder(GenericParser):
       
   683     def __init__(self, AST, start):
       
   684         GenericParser.__init__(self, start)
       
   685         self.AST = AST
       
   686 
       
   687     def preprocess(self, rule, func):
       
   688         rebind = lambda lhs, self=self: \
       
   689                         lambda args, lhs=lhs, self=self: \
       
   690                                 self.buildASTNode(args, lhs)
       
   691         lhs, rhs = rule
       
   692         return rule, rebind(lhs)
       
   693 
       
   694     def buildASTNode(self, args, lhs):
       
   695         children = []
       
   696         for arg in args:
       
   697             if isinstance(arg, self.AST):
       
   698                 children.append(arg)
       
   699             else:
       
   700                 children.append(self.terminal(arg))
       
   701         return self.nonterminal(lhs, children)
       
   702 
       
   703     def terminal(self, token):      return token
       
   704 
       
   705     def nonterminal(self, type, args):
       
   706         rv = self.AST(type)
       
   707         rv[:len(args)] = args
       
   708         return rv
       
   709 
       
   710 #
       
   711 #  GenericASTTraversal is a Visitor pattern according to Design Patterns.  For
       
   712 #  each node it attempts to invoke the method n_<node type>, falling
       
   713 #  back onto the default() method if the n_* can't be found.  The preorder
       
   714 #  traversal also looks for an exit hook named n_<node type>_exit (no default
       
   715 #  routine is called if it's not found).  To prematurely halt traversal
       
   716 #  of a subtree, call the prune() method -- this only makes sense for a
       
   717 #  preorder traversal.  Node type is determined via the typestring() method.
       
   718 #
       
   719 
       
   720 class GenericASTTraversalPruningException:
       
   721     pass
       
   722 
       
   723 class GenericASTTraversal:
       
   724     def __init__(self, ast):
       
   725         self.ast = ast
       
   726 
       
   727     def typestring(self, node):
       
   728         return node.type
       
   729 
       
   730     def prune(self):
       
   731         raise GenericASTTraversalPruningException
       
   732 
       
   733     def preorder(self, node=None):
       
   734         if node is None:
       
   735             node = self.ast
       
   736 
       
   737         try:
       
   738             name = 'n_' + self.typestring(node)
       
   739             if hasattr(self, name):
       
   740                 func = getattr(self, name)
       
   741                 func(node)
       
   742             else:
       
   743                 self.default(node)
       
   744         except GenericASTTraversalPruningException:
       
   745             return
       
   746 
       
   747         for kid in node:
       
   748             self.preorder(kid)
       
   749 
       
   750         name = name + '_exit'
       
   751         if hasattr(self, name):
       
   752             func = getattr(self, name)
       
   753             func(node)
       
   754 
       
   755     def postorder(self, node=None):
       
   756         if node is None:
       
   757             node = self.ast
       
   758 
       
   759         for kid in node:
       
   760             self.postorder(kid)
       
   761 
       
   762         name = 'n_' + self.typestring(node)
       
   763         if hasattr(self, name):
       
   764             func = getattr(self, name)
       
   765             func(node)
       
   766         else:
       
   767             self.default(node)
       
   768 
       
   769 
       
   770     def default(self, node):
       
   771         pass
       
   772 
       
   773 #
       
   774 #  GenericASTMatcher.  AST nodes must have "__getitem__" and "__cmp__"
       
   775 #  implemented.
       
   776 #
       
   777 #  XXX - makes assumptions about how GenericParser walks the parse tree.
       
   778 #
       
   779 
       
   780 class GenericASTMatcher(GenericParser):
       
   781     def __init__(self, start, ast):
       
   782         GenericParser.__init__(self, start)
       
   783         self.ast = ast
       
   784 
       
   785     def preprocess(self, rule, func):
       
   786         rebind = lambda func, self=self: \
       
   787                         lambda args, func=func, self=self: \
       
   788                                 self.foundMatch(args, func)
       
   789         lhs, rhs = rule
       
   790         rhslist = list(rhs)
       
   791         rhslist.reverse()
       
   792 
       
   793         return (lhs, tuple(rhslist)), rebind(func)
       
   794 
       
   795     def foundMatch(self, args, func):
       
   796         func(args[-1])
       
   797         return args[-1]
       
   798 
       
   799     def match_r(self, node):
       
   800         self.input.insert(0, node)
       
   801         children = 0
       
   802 
       
   803         for child in node:
       
   804             if children == 0:
       
   805                 self.input.insert(0, '(')
       
   806             children = children + 1
       
   807             self.match_r(child)
       
   808 
       
   809         if children > 0:
       
   810             self.input.insert(0, ')')
       
   811 
       
   812     def match(self, ast=None):
       
   813         if ast is None:
       
   814             ast = self.ast
       
   815         self.input = []
       
   816 
       
   817         self.match_r(ast)
       
   818         self.parse(self.input)
       
   819 
       
   820     def resolve(self, list):
       
   821         #
       
   822         #  Resolve ambiguity in favor of the longest RHS.
       
   823         #
       
   824         return list[-1]
       
   825 
       
   826 def _dump(tokens, sets, states):
       
   827     for i in range(len(sets)):
       
   828         print 'set', i
       
   829         for item in sets[i]:
       
   830             print '\t', item
       
   831             for (lhs, rhs), pos in states[item[0]].items:
       
   832                 print '\t\t', lhs, '::=',
       
   833                 print string.join(rhs[:pos]),
       
   834                 print '.',
       
   835                 print string.join(rhs[pos:])
       
   836         if i < len(tokens):
       
   837             print
       
   838             print 'token', str(tokens[i])
       
   839             print