|
1 # Copyright (c) 1998-2002 John Aycock |
|
2 # |
|
3 # Permission is hereby granted, free of charge, to any person obtaining |
|
4 # a copy of this software and associated documentation files (the |
|
5 # "Software"), to deal in the Software without restriction, including |
|
6 # without limitation the rights to use, copy, modify, merge, publish, |
|
7 # distribute, sublicense, and/or sell copies of the Software, and to |
|
8 # permit persons to whom the Software is furnished to do so, subject to |
|
9 # the following conditions: |
|
10 # |
|
11 # The above copyright notice and this permission notice shall be |
|
12 # included in all copies or substantial portions of the Software. |
|
13 # |
|
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
15 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|
16 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
|
17 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
|
18 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
|
19 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
|
20 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|
21 |
|
22 __version__ = 'SPARK-0.7 (pre-alpha-5)' |
|
23 |
|
24 import re |
|
25 import string |
|
26 |
|
27 def _namelist(instance): |
|
28 namelist, namedict, classlist = [], {}, [instance.__class__] |
|
29 for c in classlist: |
|
30 for b in c.__bases__: |
|
31 classlist.append(b) |
|
32 for name in c.__dict__.keys(): |
|
33 if not namedict.has_key(name): |
|
34 namelist.append(name) |
|
35 namedict[name] = 1 |
|
36 return namelist |
|
37 |
|
38 class GenericScanner: |
|
39 def __init__(self, flags=0): |
|
40 pattern = self.reflect() |
|
41 self.re = re.compile(pattern, re.VERBOSE|flags) |
|
42 |
|
43 self.index2func = {} |
|
44 for name, number in self.re.groupindex.items(): |
|
45 self.index2func[number-1] = getattr(self, 't_' + name) |
|
46 |
|
47 def makeRE(self, name): |
|
48 doc = getattr(self, name).__doc__ |
|
49 rv = '(?P<%s>%s)' % (name[2:], doc) |
|
50 return rv |
|
51 |
|
52 def reflect(self): |
|
53 rv = [] |
|
54 for name in _namelist(self): |
|
55 if name[:2] == 't_' and name != 't_default': |
|
56 rv.append(self.makeRE(name)) |
|
57 |
|
58 rv.append(self.makeRE('t_default')) |
|
59 return string.join(rv, '|') |
|
60 |
|
61 def error(self, s, pos): |
|
62 print "Lexical error at position %s" % pos |
|
63 raise SystemExit |
|
64 |
|
65 def tokenize(self, s): |
|
66 pos = 0 |
|
67 n = len(s) |
|
68 while pos < n: |
|
69 m = self.re.match(s, pos) |
|
70 if m is None: |
|
71 self.error(s, pos) |
|
72 |
|
73 groups = m.groups() |
|
74 for i in range(len(groups)): |
|
75 if groups[i] and self.index2func.has_key(i): |
|
76 self.index2func[i](groups[i]) |
|
77 pos = m.end() |
|
78 |
|
79 def t_default(self, s): |
|
80 r'( . | \n )+' |
|
81 print "Specification error: unmatched input" |
|
82 raise SystemExit |
|
83 |
|
84 # |
|
85 # Extracted from GenericParser and made global so that [un]picking works. |
|
86 # |
|
87 class _State: |
|
88 def __init__(self, stateno, items): |
|
89 self.T, self.complete, self.items = [], [], items |
|
90 self.stateno = stateno |
|
91 |
|
92 class GenericParser: |
|
93 # |
|
94 # An Earley parser, as per J. Earley, "An Efficient Context-Free |
|
95 # Parsing Algorithm", CACM 13(2), pp. 94-102. Also J. C. Earley, |
|
96 # "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis, |
|
97 # Carnegie-Mellon University, August 1968. New formulation of |
|
98 # the parser according to J. Aycock, "Practical Earley Parsing |
|
99 # and the SPARK Toolkit", Ph.D. thesis, University of Victoria, |
|
100 # 2001, and J. Aycock and R. N. Horspool, "Practical Earley |
|
101 # Parsing", unpublished paper, 2001. |
|
102 # |
|
103 |
|
104 def __init__(self, start): |
|
105 self.rules = {} |
|
106 self.rule2func = {} |
|
107 self.rule2name = {} |
|
108 self.collectRules() |
|
109 self.augment(start) |
|
110 self.ruleschanged = 1 |
|
111 |
|
112 _NULLABLE = '\e_' |
|
113 _START = 'START' |
|
114 _BOF = '|-' |
|
115 |
|
116 # |
|
117 # When pickling, take the time to generate the full state machine; |
|
118 # some information is then extraneous, too. Unfortunately we |
|
119 # can't save the rule2func map. |
|
120 # |
|
121 def __getstate__(self): |
|
122 if self.ruleschanged: |
|
123 # |
|
124 # XXX - duplicated from parse() |
|
125 # |
|
126 self.computeNull() |
|
127 self.newrules = {} |
|
128 self.new2old = {} |
|
129 self.makeNewRules() |
|
130 self.ruleschanged = 0 |
|
131 self.edges, self.cores = {}, {} |
|
132 self.states = { 0: self.makeState0() } |
|
133 self.makeState(0, self._BOF) |
|
134 # |
|
135 # XXX - should find a better way to do this.. |
|
136 # |
|
137 changes = 1 |
|
138 while changes: |
|
139 changes = 0 |
|
140 for k, v in self.edges.items(): |
|
141 if v is None: |
|
142 state, sym = k |
|
143 if self.states.has_key(state): |
|
144 self.goto(state, sym) |
|
145 changes = 1 |
|
146 rv = self.__dict__.copy() |
|
147 for s in self.states.values(): |
|
148 del s.items |
|
149 del rv['rule2func'] |
|
150 del rv['nullable'] |
|
151 del rv['cores'] |
|
152 return rv |
|
153 |
|
154 def __setstate__(self, D): |
|
155 self.rules = {} |
|
156 self.rule2func = {} |
|
157 self.rule2name = {} |
|
158 self.collectRules() |
|
159 start = D['rules'][self._START][0][1][1] # Blech. |
|
160 self.augment(start) |
|
161 D['rule2func'] = self.rule2func |
|
162 D['makeSet'] = self.makeSet_fast |
|
163 self.__dict__ = D |
|
164 |
|
165 # |
|
166 # A hook for GenericASTBuilder and GenericASTMatcher. Mess |
|
167 # thee not with this; nor shall thee toucheth the _preprocess |
|
168 # argument to addRule. |
|
169 # |
|
170 def preprocess(self, rule, func): return rule, func |
|
171 |
|
172 def addRule(self, doc, func, _preprocess=1): |
|
173 fn = func |
|
174 rules = string.split(doc) |
|
175 |
|
176 index = [] |
|
177 for i in range(len(rules)): |
|
178 if rules[i] == '::=': |
|
179 index.append(i-1) |
|
180 index.append(len(rules)) |
|
181 |
|
182 for i in range(len(index)-1): |
|
183 lhs = rules[index[i]] |
|
184 rhs = rules[index[i]+2:index[i+1]] |
|
185 rule = (lhs, tuple(rhs)) |
|
186 |
|
187 if _preprocess: |
|
188 rule, fn = self.preprocess(rule, func) |
|
189 |
|
190 if self.rules.has_key(lhs): |
|
191 self.rules[lhs].append(rule) |
|
192 else: |
|
193 self.rules[lhs] = [ rule ] |
|
194 self.rule2func[rule] = fn |
|
195 self.rule2name[rule] = func.__name__[2:] |
|
196 self.ruleschanged = 1 |
|
197 |
|
198 def collectRules(self): |
|
199 for name in _namelist(self): |
|
200 if name[:2] == 'p_': |
|
201 func = getattr(self, name) |
|
202 doc = func.__doc__ |
|
203 self.addRule(doc, func) |
|
204 |
|
205 def augment(self, start): |
|
206 rule = '%s ::= %s %s' % (self._START, self._BOF, start) |
|
207 self.addRule(rule, lambda args: args[1], 0) |
|
208 |
|
209 def computeNull(self): |
|
210 self.nullable = {} |
|
211 tbd = [] |
|
212 |
|
213 for rulelist in self.rules.values(): |
|
214 lhs = rulelist[0][0] |
|
215 self.nullable[lhs] = 0 |
|
216 for rule in rulelist: |
|
217 rhs = rule[1] |
|
218 if len(rhs) == 0: |
|
219 self.nullable[lhs] = 1 |
|
220 continue |
|
221 # |
|
222 # We only need to consider rules which |
|
223 # consist entirely of nonterminal symbols. |
|
224 # This should be a savings on typical |
|
225 # grammars. |
|
226 # |
|
227 for sym in rhs: |
|
228 if not self.rules.has_key(sym): |
|
229 break |
|
230 else: |
|
231 tbd.append(rule) |
|
232 changes = 1 |
|
233 while changes: |
|
234 changes = 0 |
|
235 for lhs, rhs in tbd: |
|
236 if self.nullable[lhs]: |
|
237 continue |
|
238 for sym in rhs: |
|
239 if not self.nullable[sym]: |
|
240 break |
|
241 else: |
|
242 self.nullable[lhs] = 1 |
|
243 changes = 1 |
|
244 |
|
245 def makeState0(self): |
|
246 s0 = _State(0, []) |
|
247 for rule in self.newrules[self._START]: |
|
248 s0.items.append((rule, 0)) |
|
249 return s0 |
|
250 |
|
251 def finalState(self, tokens): |
|
252 # |
|
253 # Yuck. |
|
254 # |
|
255 if len(self.newrules[self._START]) == 2 and len(tokens) == 0: |
|
256 return 1 |
|
257 start = self.rules[self._START][0][1][1] |
|
258 return self.goto(1, start) |
|
259 |
|
260 def makeNewRules(self): |
|
261 worklist = [] |
|
262 for rulelist in self.rules.values(): |
|
263 for rule in rulelist: |
|
264 worklist.append((rule, 0, 1, rule)) |
|
265 |
|
266 for rule, i, candidate, oldrule in worklist: |
|
267 lhs, rhs = rule |
|
268 n = len(rhs) |
|
269 while i < n: |
|
270 sym = rhs[i] |
|
271 if not self.rules.has_key(sym) or \ |
|
272 not self.nullable[sym]: |
|
273 candidate = 0 |
|
274 i = i + 1 |
|
275 continue |
|
276 |
|
277 newrhs = list(rhs) |
|
278 newrhs[i] = self._NULLABLE+sym |
|
279 newrule = (lhs, tuple(newrhs)) |
|
280 worklist.append((newrule, i+1, |
|
281 candidate, oldrule)) |
|
282 candidate = 0 |
|
283 i = i + 1 |
|
284 else: |
|
285 if candidate: |
|
286 lhs = self._NULLABLE+lhs |
|
287 rule = (lhs, rhs) |
|
288 if self.newrules.has_key(lhs): |
|
289 self.newrules[lhs].append(rule) |
|
290 else: |
|
291 self.newrules[lhs] = [ rule ] |
|
292 self.new2old[rule] = oldrule |
|
293 |
|
294 def typestring(self, token): |
|
295 return None |
|
296 |
|
297 def error(self, token): |
|
298 print "Syntax error at or near `%s' token" % token |
|
299 raise SystemExit |
|
300 |
|
301 def parse(self, tokens): |
|
302 sets = [ [(1,0), (2,0)] ] |
|
303 self.links = {} |
|
304 |
|
305 if self.ruleschanged: |
|
306 self.computeNull() |
|
307 self.newrules = {} |
|
308 self.new2old = {} |
|
309 self.makeNewRules() |
|
310 self.ruleschanged = 0 |
|
311 self.edges, self.cores = {}, {} |
|
312 self.states = { 0: self.makeState0() } |
|
313 self.makeState(0, self._BOF) |
|
314 |
|
315 for i in xrange(len(tokens)): |
|
316 sets.append([]) |
|
317 |
|
318 if sets[i] == []: |
|
319 break |
|
320 self.makeSet(tokens[i], sets, i) |
|
321 else: |
|
322 sets.append([]) |
|
323 self.makeSet(None, sets, len(tokens)) |
|
324 |
|
325 #_dump(tokens, sets, self.states) |
|
326 |
|
327 finalitem = (self.finalState(tokens), 0) |
|
328 if finalitem not in sets[-2]: |
|
329 if len(tokens) > 0: |
|
330 self.error(tokens[i-1]) |
|
331 else: |
|
332 self.error(None) |
|
333 |
|
334 return self.buildTree(self._START, finalitem, |
|
335 tokens, len(sets)-2) |
|
336 |
|
337 def isnullable(self, sym): |
|
338 # |
|
339 # For symbols in G_e only. If we weren't supporting 1.5, |
|
340 # could just use sym.startswith(). |
|
341 # |
|
342 return self._NULLABLE == sym[0:len(self._NULLABLE)] |
|
343 |
|
344 def skip(self, (lhs, rhs), pos=0): |
|
345 n = len(rhs) |
|
346 while pos < n: |
|
347 if not self.isnullable(rhs[pos]): |
|
348 break |
|
349 pos = pos + 1 |
|
350 return pos |
|
351 |
|
352 def makeState(self, state, sym): |
|
353 assert sym is not None |
|
354 # |
|
355 # Compute \epsilon-kernel state's core and see if |
|
356 # it exists already. |
|
357 # |
|
358 kitems = [] |
|
359 for rule, pos in self.states[state].items: |
|
360 lhs, rhs = rule |
|
361 if rhs[pos:pos+1] == (sym,): |
|
362 kitems.append((rule, self.skip(rule, pos+1))) |
|
363 core = kitems |
|
364 |
|
365 core.sort() |
|
366 tcore = tuple(core) |
|
367 if self.cores.has_key(tcore): |
|
368 return self.cores[tcore] |
|
369 # |
|
370 # Nope, doesn't exist. Compute it and the associated |
|
371 # \epsilon-nonkernel state together; we'll need it right away. |
|
372 # |
|
373 k = self.cores[tcore] = len(self.states) |
|
374 K, NK = _State(k, kitems), _State(k+1, []) |
|
375 self.states[k] = K |
|
376 predicted = {} |
|
377 |
|
378 edges = self.edges |
|
379 rules = self.newrules |
|
380 for X in K, NK: |
|
381 worklist = X.items |
|
382 for item in worklist: |
|
383 rule, pos = item |
|
384 lhs, rhs = rule |
|
385 if pos == len(rhs): |
|
386 X.complete.append(rule) |
|
387 continue |
|
388 |
|
389 nextSym = rhs[pos] |
|
390 key = (X.stateno, nextSym) |
|
391 if not rules.has_key(nextSym): |
|
392 if not edges.has_key(key): |
|
393 edges[key] = None |
|
394 X.T.append(nextSym) |
|
395 else: |
|
396 edges[key] = None |
|
397 if not predicted.has_key(nextSym): |
|
398 predicted[nextSym] = 1 |
|
399 for prule in rules[nextSym]: |
|
400 ppos = self.skip(prule) |
|
401 new = (prule, ppos) |
|
402 NK.items.append(new) |
|
403 # |
|
404 # Problem: we know K needs generating, but we |
|
405 # don't yet know about NK. Can't commit anything |
|
406 # regarding NK to self.edges until we're sure. Should |
|
407 # we delay committing on both K and NK to avoid this |
|
408 # hacky code? This creates other problems.. |
|
409 # |
|
410 if X is K: |
|
411 edges = {} |
|
412 |
|
413 if NK.items == []: |
|
414 return k |
|
415 |
|
416 # |
|
417 # Check for \epsilon-nonkernel's core. Unfortunately we |
|
418 # need to know the entire set of predicted nonterminals |
|
419 # to do this without accidentally duplicating states. |
|
420 # |
|
421 core = predicted.keys() |
|
422 core.sort() |
|
423 tcore = tuple(core) |
|
424 if self.cores.has_key(tcore): |
|
425 self.edges[(k, None)] = self.cores[tcore] |
|
426 return k |
|
427 |
|
428 nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno |
|
429 self.edges.update(edges) |
|
430 self.states[nk] = NK |
|
431 return k |
|
432 |
|
433 def goto(self, state, sym): |
|
434 key = (state, sym) |
|
435 if not self.edges.has_key(key): |
|
436 # |
|
437 # No transitions from state on sym. |
|
438 # |
|
439 return None |
|
440 |
|
441 rv = self.edges[key] |
|
442 if rv is None: |
|
443 # |
|
444 # Target state isn't generated yet. Remedy this. |
|
445 # |
|
446 rv = self.makeState(state, sym) |
|
447 self.edges[key] = rv |
|
448 return rv |
|
449 |
|
450 def gotoT(self, state, t): |
|
451 return [self.goto(state, t)] |
|
452 |
|
453 def gotoST(self, state, st): |
|
454 rv = [] |
|
455 for t in self.states[state].T: |
|
456 if st == t: |
|
457 rv.append(self.goto(state, t)) |
|
458 return rv |
|
459 |
|
460 def add(self, set, item, i=None, predecessor=None, causal=None): |
|
461 if predecessor is None: |
|
462 if item not in set: |
|
463 set.append(item) |
|
464 else: |
|
465 key = (item, i) |
|
466 if item not in set: |
|
467 self.links[key] = [] |
|
468 set.append(item) |
|
469 self.links[key].append((predecessor, causal)) |
|
470 |
|
471 def makeSet(self, token, sets, i): |
|
472 cur, next = sets[i], sets[i+1] |
|
473 |
|
474 ttype = token is not None and self.typestring(token) or None |
|
475 if ttype is not None: |
|
476 fn, arg = self.gotoT, ttype |
|
477 else: |
|
478 fn, arg = self.gotoST, token |
|
479 |
|
480 for item in cur: |
|
481 ptr = (item, i) |
|
482 state, parent = item |
|
483 add = fn(state, arg) |
|
484 for k in add: |
|
485 if k is not None: |
|
486 self.add(next, (k, parent), i+1, ptr) |
|
487 nk = self.goto(k, None) |
|
488 if nk is not None: |
|
489 self.add(next, (nk, i+1)) |
|
490 |
|
491 if parent == i: |
|
492 continue |
|
493 |
|
494 for rule in self.states[state].complete: |
|
495 lhs, rhs = rule |
|
496 for pitem in sets[parent]: |
|
497 pstate, pparent = pitem |
|
498 k = self.goto(pstate, lhs) |
|
499 if k is not None: |
|
500 why = (item, i, rule) |
|
501 pptr = (pitem, parent) |
|
502 self.add(cur, (k, pparent), |
|
503 i, pptr, why) |
|
504 nk = self.goto(k, None) |
|
505 if nk is not None: |
|
506 self.add(cur, (nk, i)) |
|
507 |
|
508 def makeSet_fast(self, token, sets, i): |
|
509 # |
|
510 # Call *only* when the entire state machine has been built! |
|
511 # It relies on self.edges being filled in completely, and |
|
512 # then duplicates and inlines code to boost speed at the |
|
513 # cost of extreme ugliness. |
|
514 # |
|
515 cur, next = sets[i], sets[i+1] |
|
516 ttype = token is not None and self.typestring(token) or None |
|
517 |
|
518 for item in cur: |
|
519 ptr = (item, i) |
|
520 state, parent = item |
|
521 if ttype is not None: |
|
522 k = self.edges.get((state, ttype), None) |
|
523 if k is not None: |
|
524 #self.add(next, (k, parent), i+1, ptr) |
|
525 #INLINED --v |
|
526 new = (k, parent) |
|
527 key = (new, i+1) |
|
528 if new not in next: |
|
529 self.links[key] = [] |
|
530 next.append(new) |
|
531 self.links[key].append((ptr, None)) |
|
532 #INLINED --^ |
|
533 #nk = self.goto(k, None) |
|
534 nk = self.edges.get((k, None), None) |
|
535 if nk is not None: |
|
536 #self.add(next, (nk, i+1)) |
|
537 #INLINED --v |
|
538 new = (nk, i+1) |
|
539 if new not in next: |
|
540 next.append(new) |
|
541 #INLINED --^ |
|
542 else: |
|
543 add = self.gotoST(state, token) |
|
544 for k in add: |
|
545 if k is not None: |
|
546 self.add(next, (k, parent), i+1, ptr) |
|
547 #nk = self.goto(k, None) |
|
548 nk = self.edges.get((k, None), None) |
|
549 if nk is not None: |
|
550 self.add(next, (nk, i+1)) |
|
551 |
|
552 if parent == i: |
|
553 continue |
|
554 |
|
555 for rule in self.states[state].complete: |
|
556 lhs, rhs = rule |
|
557 for pitem in sets[parent]: |
|
558 pstate, pparent = pitem |
|
559 #k = self.goto(pstate, lhs) |
|
560 k = self.edges.get((pstate, lhs), None) |
|
561 if k is not None: |
|
562 why = (item, i, rule) |
|
563 pptr = (pitem, parent) |
|
564 #self.add(cur, (k, pparent), |
|
565 # i, pptr, why) |
|
566 #INLINED --v |
|
567 new = (k, pparent) |
|
568 key = (new, i) |
|
569 if new not in cur: |
|
570 self.links[key] = [] |
|
571 cur.append(new) |
|
572 self.links[key].append((pptr, why)) |
|
573 #INLINED --^ |
|
574 #nk = self.goto(k, None) |
|
575 nk = self.edges.get((k, None), None) |
|
576 if nk is not None: |
|
577 #self.add(cur, (nk, i)) |
|
578 #INLINED --v |
|
579 new = (nk, i) |
|
580 if new not in cur: |
|
581 cur.append(new) |
|
582 #INLINED --^ |
|
583 |
|
584 def predecessor(self, key, causal): |
|
585 for p, c in self.links[key]: |
|
586 if c == causal: |
|
587 return p |
|
588 assert 0 |
|
589 |
|
590 def causal(self, key): |
|
591 links = self.links[key] |
|
592 if len(links) == 1: |
|
593 return links[0][1] |
|
594 choices = [] |
|
595 rule2cause = {} |
|
596 for p, c in links: |
|
597 rule = c[2] |
|
598 choices.append(rule) |
|
599 rule2cause[rule] = c |
|
600 return rule2cause[self.ambiguity(choices)] |
|
601 |
|
602 def deriveEpsilon(self, nt): |
|
603 if len(self.newrules[nt]) > 1: |
|
604 rule = self.ambiguity(self.newrules[nt]) |
|
605 else: |
|
606 rule = self.newrules[nt][0] |
|
607 #print rule |
|
608 |
|
609 rhs = rule[1] |
|
610 attr = [None] * len(rhs) |
|
611 |
|
612 for i in range(len(rhs)-1, -1, -1): |
|
613 attr[i] = self.deriveEpsilon(rhs[i]) |
|
614 return self.rule2func[self.new2old[rule]](attr) |
|
615 |
|
616 def buildTree(self, nt, item, tokens, k): |
|
617 state, parent = item |
|
618 |
|
619 choices = [] |
|
620 for rule in self.states[state].complete: |
|
621 if rule[0] == nt: |
|
622 choices.append(rule) |
|
623 rule = choices[0] |
|
624 if len(choices) > 1: |
|
625 rule = self.ambiguity(choices) |
|
626 #print rule |
|
627 |
|
628 rhs = rule[1] |
|
629 attr = [None] * len(rhs) |
|
630 |
|
631 for i in range(len(rhs)-1, -1, -1): |
|
632 sym = rhs[i] |
|
633 if not self.newrules.has_key(sym): |
|
634 if sym != self._BOF: |
|
635 attr[i] = tokens[k-1] |
|
636 key = (item, k) |
|
637 item, k = self.predecessor(key, None) |
|
638 #elif self.isnullable(sym): |
|
639 elif self._NULLABLE == sym[0:len(self._NULLABLE)]: |
|
640 attr[i] = self.deriveEpsilon(sym) |
|
641 else: |
|
642 key = (item, k) |
|
643 why = self.causal(key) |
|
644 attr[i] = self.buildTree(sym, why[0], |
|
645 tokens, why[1]) |
|
646 item, k = self.predecessor(key, why) |
|
647 return self.rule2func[self.new2old[rule]](attr) |
|
648 |
|
649 def ambiguity(self, rules): |
|
650 # |
|
651 # XXX - problem here and in collectRules() if the same rule |
|
652 # appears in >1 method. Also undefined results if rules |
|
653 # causing the ambiguity appear in the same method. |
|
654 # |
|
655 sortlist = [] |
|
656 name2index = {} |
|
657 for i in range(len(rules)): |
|
658 lhs, rhs = rule = rules[i] |
|
659 name = self.rule2name[self.new2old[rule]] |
|
660 sortlist.append((len(rhs), name)) |
|
661 name2index[name] = i |
|
662 sortlist.sort() |
|
663 list = map(lambda (a,b): b, sortlist) |
|
664 return rules[name2index[self.resolve(list)]] |
|
665 |
|
666 def resolve(self, list): |
|
667 # |
|
668 # Resolve ambiguity in favor of the shortest RHS. |
|
669 # Since we walk the tree from the top down, this |
|
670 # should effectively resolve in favor of a "shift". |
|
671 # |
|
672 return list[0] |
|
673 |
|
674 # |
|
675 # GenericASTBuilder automagically constructs a concrete/abstract syntax tree |
|
676 # for a given input. The extra argument is a class (not an instance!) |
|
677 # which supports the "__setslice__" and "__len__" methods. |
|
678 # |
|
679 # XXX - silently overrides any user code in methods. |
|
680 # |
|
681 |
|
682 class GenericASTBuilder(GenericParser): |
|
683 def __init__(self, AST, start): |
|
684 GenericParser.__init__(self, start) |
|
685 self.AST = AST |
|
686 |
|
687 def preprocess(self, rule, func): |
|
688 rebind = lambda lhs, self=self: \ |
|
689 lambda args, lhs=lhs, self=self: \ |
|
690 self.buildASTNode(args, lhs) |
|
691 lhs, rhs = rule |
|
692 return rule, rebind(lhs) |
|
693 |
|
694 def buildASTNode(self, args, lhs): |
|
695 children = [] |
|
696 for arg in args: |
|
697 if isinstance(arg, self.AST): |
|
698 children.append(arg) |
|
699 else: |
|
700 children.append(self.terminal(arg)) |
|
701 return self.nonterminal(lhs, children) |
|
702 |
|
703 def terminal(self, token): return token |
|
704 |
|
705 def nonterminal(self, type, args): |
|
706 rv = self.AST(type) |
|
707 rv[:len(args)] = args |
|
708 return rv |
|
709 |
|
710 # |
|
711 # GenericASTTraversal is a Visitor pattern according to Design Patterns. For |
|
712 # each node it attempts to invoke the method n_<node type>, falling |
|
713 # back onto the default() method if the n_* can't be found. The preorder |
|
714 # traversal also looks for an exit hook named n_<node type>_exit (no default |
|
715 # routine is called if it's not found). To prematurely halt traversal |
|
716 # of a subtree, call the prune() method -- this only makes sense for a |
|
717 # preorder traversal. Node type is determined via the typestring() method. |
|
718 # |
|
719 |
|
720 class GenericASTTraversalPruningException: |
|
721 pass |
|
722 |
|
723 class GenericASTTraversal: |
|
724 def __init__(self, ast): |
|
725 self.ast = ast |
|
726 |
|
727 def typestring(self, node): |
|
728 return node.type |
|
729 |
|
730 def prune(self): |
|
731 raise GenericASTTraversalPruningException |
|
732 |
|
733 def preorder(self, node=None): |
|
734 if node is None: |
|
735 node = self.ast |
|
736 |
|
737 try: |
|
738 name = 'n_' + self.typestring(node) |
|
739 if hasattr(self, name): |
|
740 func = getattr(self, name) |
|
741 func(node) |
|
742 else: |
|
743 self.default(node) |
|
744 except GenericASTTraversalPruningException: |
|
745 return |
|
746 |
|
747 for kid in node: |
|
748 self.preorder(kid) |
|
749 |
|
750 name = name + '_exit' |
|
751 if hasattr(self, name): |
|
752 func = getattr(self, name) |
|
753 func(node) |
|
754 |
|
755 def postorder(self, node=None): |
|
756 if node is None: |
|
757 node = self.ast |
|
758 |
|
759 for kid in node: |
|
760 self.postorder(kid) |
|
761 |
|
762 name = 'n_' + self.typestring(node) |
|
763 if hasattr(self, name): |
|
764 func = getattr(self, name) |
|
765 func(node) |
|
766 else: |
|
767 self.default(node) |
|
768 |
|
769 |
|
770 def default(self, node): |
|
771 pass |
|
772 |
|
773 # |
|
774 # GenericASTMatcher. AST nodes must have "__getitem__" and "__cmp__" |
|
775 # implemented. |
|
776 # |
|
777 # XXX - makes assumptions about how GenericParser walks the parse tree. |
|
778 # |
|
779 |
|
780 class GenericASTMatcher(GenericParser): |
|
781 def __init__(self, start, ast): |
|
782 GenericParser.__init__(self, start) |
|
783 self.ast = ast |
|
784 |
|
785 def preprocess(self, rule, func): |
|
786 rebind = lambda func, self=self: \ |
|
787 lambda args, func=func, self=self: \ |
|
788 self.foundMatch(args, func) |
|
789 lhs, rhs = rule |
|
790 rhslist = list(rhs) |
|
791 rhslist.reverse() |
|
792 |
|
793 return (lhs, tuple(rhslist)), rebind(func) |
|
794 |
|
795 def foundMatch(self, args, func): |
|
796 func(args[-1]) |
|
797 return args[-1] |
|
798 |
|
799 def match_r(self, node): |
|
800 self.input.insert(0, node) |
|
801 children = 0 |
|
802 |
|
803 for child in node: |
|
804 if children == 0: |
|
805 self.input.insert(0, '(') |
|
806 children = children + 1 |
|
807 self.match_r(child) |
|
808 |
|
809 if children > 0: |
|
810 self.input.insert(0, ')') |
|
811 |
|
812 def match(self, ast=None): |
|
813 if ast is None: |
|
814 ast = self.ast |
|
815 self.input = [] |
|
816 |
|
817 self.match_r(ast) |
|
818 self.parse(self.input) |
|
819 |
|
820 def resolve(self, list): |
|
821 # |
|
822 # Resolve ambiguity in favor of the longest RHS. |
|
823 # |
|
824 return list[-1] |
|
825 |
|
826 def _dump(tokens, sets, states): |
|
827 for i in range(len(sets)): |
|
828 print 'set', i |
|
829 for item in sets[i]: |
|
830 print '\t', item |
|
831 for (lhs, rhs), pos in states[item[0]].items: |
|
832 print '\t\t', lhs, '::=', |
|
833 print string.join(rhs[:pos]), |
|
834 print '.', |
|
835 print string.join(rhs[pos:]) |
|
836 if i < len(tokens): |
|
837 print |
|
838 print 'token', str(tokens[i]) |
|
839 print |