|
1 # Copyright 2006 Google, Inc. All Rights Reserved. |
|
2 # Licensed to PSF under a Contributor Agreement. |
|
3 |
|
4 """Python parse tree definitions. |
|
5 |
|
6 This is a very concrete parse tree; we need to keep every token and |
|
7 even the comments and whitespace between tokens. |
|
8 |
|
9 There's also a pattern matching implementation here. |
|
10 """ |
|
11 |
|
12 __author__ = "Guido van Rossum <guido@python.org>" |
|
13 |
|
14 import sys |
|
15 from StringIO import StringIO |
|
16 |
|
17 |
|
18 HUGE = 0x7FFFFFFF # maximum repeat count, default max |
|
19 |
|
20 _type_reprs = {} |
|
21 def type_repr(type_num): |
|
22 global _type_reprs |
|
23 if not _type_reprs: |
|
24 from .pygram import python_symbols |
|
25 # printing tokens is possible but not as useful |
|
26 # from .pgen2 import token // token.__dict__.items(): |
|
27 for name, val in python_symbols.__dict__.items(): |
|
28 if type(val) == int: _type_reprs[val] = name |
|
29 return _type_reprs.setdefault(type_num, type_num) |
|
30 |
|
31 |
|
32 class Base(object): |
|
33 |
|
34 """Abstract base class for Node and Leaf. |
|
35 |
|
36 This provides some default functionality and boilerplate using the |
|
37 template pattern. |
|
38 |
|
39 A node may be a subnode of at most one parent. |
|
40 """ |
|
41 |
|
42 # Default values for instance variables |
|
43 type = None # int: token number (< 256) or symbol number (>= 256) |
|
44 parent = None # Parent node pointer, or None |
|
45 children = () # Tuple of subnodes |
|
46 was_changed = False |
|
47 |
|
48 def __new__(cls, *args, **kwds): |
|
49 """Constructor that prevents Base from being instantiated.""" |
|
50 assert cls is not Base, "Cannot instantiate Base" |
|
51 return object.__new__(cls) |
|
52 |
|
53 def __eq__(self, other): |
|
54 """Compares two nodes for equality. |
|
55 |
|
56 This calls the method _eq(). |
|
57 """ |
|
58 if self.__class__ is not other.__class__: |
|
59 return NotImplemented |
|
60 return self._eq(other) |
|
61 |
|
62 def __ne__(self, other): |
|
63 """Compares two nodes for inequality. |
|
64 |
|
65 This calls the method _eq(). |
|
66 """ |
|
67 if self.__class__ is not other.__class__: |
|
68 return NotImplemented |
|
69 return not self._eq(other) |
|
70 |
|
71 def _eq(self, other): |
|
72 """Compares two nodes for equality. |
|
73 |
|
74 This is called by __eq__ and __ne__. It is only called if the |
|
75 two nodes have the same type. This must be implemented by the |
|
76 concrete subclass. Nodes should be considered equal if they |
|
77 have the same structure, ignoring the prefix string and other |
|
78 context information. |
|
79 """ |
|
80 raise NotImplementedError |
|
81 |
|
82 def clone(self): |
|
83 """Returns a cloned (deep) copy of self. |
|
84 |
|
85 This must be implemented by the concrete subclass. |
|
86 """ |
|
87 raise NotImplementedError |
|
88 |
|
89 def post_order(self): |
|
90 """Returns a post-order iterator for the tree. |
|
91 |
|
92 This must be implemented by the concrete subclass. |
|
93 """ |
|
94 raise NotImplementedError |
|
95 |
|
96 def pre_order(self): |
|
97 """Returns a pre-order iterator for the tree. |
|
98 |
|
99 This must be implemented by the concrete subclass. |
|
100 """ |
|
101 raise NotImplementedError |
|
102 |
|
103 def set_prefix(self, prefix): |
|
104 """Sets the prefix for the node (see Leaf class). |
|
105 |
|
106 This must be implemented by the concrete subclass. |
|
107 """ |
|
108 raise NotImplementedError |
|
109 |
|
110 def get_prefix(self): |
|
111 """Returns the prefix for the node (see Leaf class). |
|
112 |
|
113 This must be implemented by the concrete subclass. |
|
114 """ |
|
115 raise NotImplementedError |
|
116 |
|
117 def replace(self, new): |
|
118 """Replaces this node with a new one in the parent.""" |
|
119 assert self.parent is not None, str(self) |
|
120 assert new is not None |
|
121 if not isinstance(new, list): |
|
122 new = [new] |
|
123 l_children = [] |
|
124 found = False |
|
125 for ch in self.parent.children: |
|
126 if ch is self: |
|
127 assert not found, (self.parent.children, self, new) |
|
128 if new is not None: |
|
129 l_children.extend(new) |
|
130 found = True |
|
131 else: |
|
132 l_children.append(ch) |
|
133 assert found, (self.children, self, new) |
|
134 self.parent.changed() |
|
135 self.parent.children = l_children |
|
136 for x in new: |
|
137 x.parent = self.parent |
|
138 self.parent = None |
|
139 |
|
140 def get_lineno(self): |
|
141 """Returns the line number which generated the invocant node.""" |
|
142 node = self |
|
143 while not isinstance(node, Leaf): |
|
144 if not node.children: |
|
145 return |
|
146 node = node.children[0] |
|
147 return node.lineno |
|
148 |
|
149 def changed(self): |
|
150 if self.parent: |
|
151 self.parent.changed() |
|
152 self.was_changed = True |
|
153 |
|
154 def remove(self): |
|
155 """Remove the node from the tree. Returns the position of the node |
|
156 in its parent's children before it was removed.""" |
|
157 if self.parent: |
|
158 for i, node in enumerate(self.parent.children): |
|
159 if node is self: |
|
160 self.parent.changed() |
|
161 del self.parent.children[i] |
|
162 self.parent = None |
|
163 return i |
|
164 |
|
165 def get_next_sibling(self): |
|
166 """Return the node immediately following the invocant in their |
|
167 parent's children list. If the invocant does not have a next |
|
168 sibling, return None.""" |
|
169 if self.parent is None: |
|
170 return None |
|
171 |
|
172 # Can't use index(); we need to test by identity |
|
173 for i, child in enumerate(self.parent.children): |
|
174 if child is self: |
|
175 try: |
|
176 return self.parent.children[i+1] |
|
177 except IndexError: |
|
178 return None |
|
179 |
|
180 def get_prev_sibling(self): |
|
181 """Return the node immediately preceding the invocant in their |
|
182 parent's children list. If the invocant does not have a previous |
|
183 sibling, return None.""" |
|
184 if self.parent is None: |
|
185 return None |
|
186 |
|
187 # Can't use index(); we need to test by identity |
|
188 for i, child in enumerate(self.parent.children): |
|
189 if child is self: |
|
190 if i == 0: |
|
191 return None |
|
192 return self.parent.children[i-1] |
|
193 |
|
194 def get_suffix(self): |
|
195 """Return the string immediately following the invocant node. This |
|
196 is effectively equivalent to node.get_next_sibling().get_prefix()""" |
|
197 next_sib = self.get_next_sibling() |
|
198 if next_sib is None: |
|
199 return "" |
|
200 return next_sib.get_prefix() |
|
201 |
|
202 |
|
203 class Node(Base): |
|
204 |
|
205 """Concrete implementation for interior nodes.""" |
|
206 |
|
207 def __init__(self, type, children, context=None, prefix=None): |
|
208 """Initializer. |
|
209 |
|
210 Takes a type constant (a symbol number >= 256), a sequence of |
|
211 child nodes, and an optional context keyword argument. |
|
212 |
|
213 As a side effect, the parent pointers of the children are updated. |
|
214 """ |
|
215 assert type >= 256, type |
|
216 self.type = type |
|
217 self.children = list(children) |
|
218 for ch in self.children: |
|
219 assert ch.parent is None, repr(ch) |
|
220 ch.parent = self |
|
221 if prefix is not None: |
|
222 self.set_prefix(prefix) |
|
223 |
|
224 def __repr__(self): |
|
225 """Returns a canonical string representation.""" |
|
226 return "%s(%s, %r)" % (self.__class__.__name__, |
|
227 type_repr(self.type), |
|
228 self.children) |
|
229 |
|
230 def __str__(self): |
|
231 """Returns a pretty string representation. |
|
232 |
|
233 This reproduces the input source exactly. |
|
234 """ |
|
235 return "".join(map(str, self.children)) |
|
236 |
|
237 def _eq(self, other): |
|
238 """Compares two nodes for equality.""" |
|
239 return (self.type, self.children) == (other.type, other.children) |
|
240 |
|
241 def clone(self): |
|
242 """Returns a cloned (deep) copy of self.""" |
|
243 return Node(self.type, [ch.clone() for ch in self.children]) |
|
244 |
|
245 def post_order(self): |
|
246 """Returns a post-order iterator for the tree.""" |
|
247 for child in self.children: |
|
248 for node in child.post_order(): |
|
249 yield node |
|
250 yield self |
|
251 |
|
252 def pre_order(self): |
|
253 """Returns a pre-order iterator for the tree.""" |
|
254 yield self |
|
255 for child in self.children: |
|
256 for node in child.post_order(): |
|
257 yield node |
|
258 |
|
259 def set_prefix(self, prefix): |
|
260 """Sets the prefix for the node. |
|
261 |
|
262 This passes the responsibility on to the first child. |
|
263 """ |
|
264 if self.children: |
|
265 self.children[0].set_prefix(prefix) |
|
266 |
|
267 def get_prefix(self): |
|
268 """Returns the prefix for the node. |
|
269 |
|
270 This passes the call on to the first child. |
|
271 """ |
|
272 if not self.children: |
|
273 return "" |
|
274 return self.children[0].get_prefix() |
|
275 |
|
276 def set_child(self, i, child): |
|
277 """Equivalent to 'node.children[i] = child'. This method also sets the |
|
278 child's parent attribute appropriately.""" |
|
279 child.parent = self |
|
280 self.children[i].parent = None |
|
281 self.children[i] = child |
|
282 |
|
283 def insert_child(self, i, child): |
|
284 """Equivalent to 'node.children.insert(i, child)'. This method also |
|
285 sets the child's parent attribute appropriately.""" |
|
286 child.parent = self |
|
287 self.children.insert(i, child) |
|
288 |
|
289 def append_child(self, child): |
|
290 """Equivalent to 'node.children.append(child)'. This method also |
|
291 sets the child's parent attribute appropriately.""" |
|
292 child.parent = self |
|
293 self.children.append(child) |
|
294 |
|
295 |
|
296 class Leaf(Base): |
|
297 |
|
298 """Concrete implementation for leaf nodes.""" |
|
299 |
|
300 # Default values for instance variables |
|
301 prefix = "" # Whitespace and comments preceding this token in the input |
|
302 lineno = 0 # Line where this token starts in the input |
|
303 column = 0 # Column where this token tarts in the input |
|
304 |
|
305 def __init__(self, type, value, context=None, prefix=None): |
|
306 """Initializer. |
|
307 |
|
308 Takes a type constant (a token number < 256), a string value, |
|
309 and an optional context keyword argument. |
|
310 """ |
|
311 assert 0 <= type < 256, type |
|
312 if context is not None: |
|
313 self.prefix, (self.lineno, self.column) = context |
|
314 self.type = type |
|
315 self.value = value |
|
316 if prefix is not None: |
|
317 self.prefix = prefix |
|
318 |
|
319 def __repr__(self): |
|
320 """Returns a canonical string representation.""" |
|
321 return "%s(%r, %r)" % (self.__class__.__name__, |
|
322 self.type, |
|
323 self.value) |
|
324 |
|
325 def __str__(self): |
|
326 """Returns a pretty string representation. |
|
327 |
|
328 This reproduces the input source exactly. |
|
329 """ |
|
330 return self.prefix + str(self.value) |
|
331 |
|
332 def _eq(self, other): |
|
333 """Compares two nodes for equality.""" |
|
334 return (self.type, self.value) == (other.type, other.value) |
|
335 |
|
336 def clone(self): |
|
337 """Returns a cloned (deep) copy of self.""" |
|
338 return Leaf(self.type, self.value, |
|
339 (self.prefix, (self.lineno, self.column))) |
|
340 |
|
341 def post_order(self): |
|
342 """Returns a post-order iterator for the tree.""" |
|
343 yield self |
|
344 |
|
345 def pre_order(self): |
|
346 """Returns a pre-order iterator for the tree.""" |
|
347 yield self |
|
348 |
|
349 def set_prefix(self, prefix): |
|
350 """Sets the prefix for the node.""" |
|
351 self.changed() |
|
352 self.prefix = prefix |
|
353 |
|
354 def get_prefix(self): |
|
355 """Returns the prefix for the node.""" |
|
356 return self.prefix |
|
357 |
|
358 |
|
359 def convert(gr, raw_node): |
|
360 """Converts raw node information to a Node or Leaf instance. |
|
361 |
|
362 This is passed to the parser driver which calls it whenever a |
|
363 reduction of a grammar rule produces a new complete node, so that |
|
364 the tree is build strictly bottom-up. |
|
365 """ |
|
366 type, value, context, children = raw_node |
|
367 if children or type in gr.number2symbol: |
|
368 # If there's exactly one child, return that child instead of |
|
369 # creating a new node. |
|
370 if len(children) == 1: |
|
371 return children[0] |
|
372 return Node(type, children, context=context) |
|
373 else: |
|
374 return Leaf(type, value, context=context) |
|
375 |
|
376 |
|
377 class BasePattern(object): |
|
378 |
|
379 """A pattern is a tree matching pattern. |
|
380 |
|
381 It looks for a specific node type (token or symbol), and |
|
382 optionally for a specific content. |
|
383 |
|
384 This is an abstract base class. There are three concrete |
|
385 subclasses: |
|
386 |
|
387 - LeafPattern matches a single leaf node; |
|
388 - NodePattern matches a single node (usually non-leaf); |
|
389 - WildcardPattern matches a sequence of nodes of variable length. |
|
390 """ |
|
391 |
|
392 # Defaults for instance variables |
|
393 type = None # Node type (token if < 256, symbol if >= 256) |
|
394 content = None # Optional content matching pattern |
|
395 name = None # Optional name used to store match in results dict |
|
396 |
|
397 def __new__(cls, *args, **kwds): |
|
398 """Constructor that prevents BasePattern from being instantiated.""" |
|
399 assert cls is not BasePattern, "Cannot instantiate BasePattern" |
|
400 return object.__new__(cls) |
|
401 |
|
402 def __repr__(self): |
|
403 args = [type_repr(self.type), self.content, self.name] |
|
404 while args and args[-1] is None: |
|
405 del args[-1] |
|
406 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args))) |
|
407 |
|
408 def optimize(self): |
|
409 """A subclass can define this as a hook for optimizations. |
|
410 |
|
411 Returns either self or another node with the same effect. |
|
412 """ |
|
413 return self |
|
414 |
|
415 def match(self, node, results=None): |
|
416 """Does this pattern exactly match a node? |
|
417 |
|
418 Returns True if it matches, False if not. |
|
419 |
|
420 If results is not None, it must be a dict which will be |
|
421 updated with the nodes matching named subpatterns. |
|
422 |
|
423 Default implementation for non-wildcard patterns. |
|
424 """ |
|
425 if self.type is not None and node.type != self.type: |
|
426 return False |
|
427 if self.content is not None: |
|
428 r = None |
|
429 if results is not None: |
|
430 r = {} |
|
431 if not self._submatch(node, r): |
|
432 return False |
|
433 if r: |
|
434 results.update(r) |
|
435 if results is not None and self.name: |
|
436 results[self.name] = node |
|
437 return True |
|
438 |
|
439 def match_seq(self, nodes, results=None): |
|
440 """Does this pattern exactly match a sequence of nodes? |
|
441 |
|
442 Default implementation for non-wildcard patterns. |
|
443 """ |
|
444 if len(nodes) != 1: |
|
445 return False |
|
446 return self.match(nodes[0], results) |
|
447 |
|
448 def generate_matches(self, nodes): |
|
449 """Generator yielding all matches for this pattern. |
|
450 |
|
451 Default implementation for non-wildcard patterns. |
|
452 """ |
|
453 r = {} |
|
454 if nodes and self.match(nodes[0], r): |
|
455 yield 1, r |
|
456 |
|
457 |
|
458 class LeafPattern(BasePattern): |
|
459 |
|
460 def __init__(self, type=None, content=None, name=None): |
|
461 """Initializer. Takes optional type, content, and name. |
|
462 |
|
463 The type, if given must be a token type (< 256). If not given, |
|
464 this matches any *leaf* node; the content may still be required. |
|
465 |
|
466 The content, if given, must be a string. |
|
467 |
|
468 If a name is given, the matching node is stored in the results |
|
469 dict under that key. |
|
470 """ |
|
471 if type is not None: |
|
472 assert 0 <= type < 256, type |
|
473 if content is not None: |
|
474 assert isinstance(content, basestring), repr(content) |
|
475 self.type = type |
|
476 self.content = content |
|
477 self.name = name |
|
478 |
|
479 def match(self, node, results=None): |
|
480 """Override match() to insist on a leaf node.""" |
|
481 if not isinstance(node, Leaf): |
|
482 return False |
|
483 return BasePattern.match(self, node, results) |
|
484 |
|
485 def _submatch(self, node, results=None): |
|
486 """Match the pattern's content to the node's children. |
|
487 |
|
488 This assumes the node type matches and self.content is not None. |
|
489 |
|
490 Returns True if it matches, False if not. |
|
491 |
|
492 If results is not None, it must be a dict which will be |
|
493 updated with the nodes matching named subpatterns. |
|
494 |
|
495 When returning False, the results dict may still be updated. |
|
496 """ |
|
497 return self.content == node.value |
|
498 |
|
499 |
|
500 class NodePattern(BasePattern): |
|
501 |
|
502 wildcards = False |
|
503 |
|
504 def __init__(self, type=None, content=None, name=None): |
|
505 """Initializer. Takes optional type, content, and name. |
|
506 |
|
507 The type, if given, must be a symbol type (>= 256). If the |
|
508 type is None this matches *any* single node (leaf or not), |
|
509 except if content is not None, in which it only matches |
|
510 non-leaf nodes that also match the content pattern. |
|
511 |
|
512 The content, if not None, must be a sequence of Patterns that |
|
513 must match the node's children exactly. If the content is |
|
514 given, the type must not be None. |
|
515 |
|
516 If a name is given, the matching node is stored in the results |
|
517 dict under that key. |
|
518 """ |
|
519 if type is not None: |
|
520 assert type >= 256, type |
|
521 if content is not None: |
|
522 assert not isinstance(content, basestring), repr(content) |
|
523 content = list(content) |
|
524 for i, item in enumerate(content): |
|
525 assert isinstance(item, BasePattern), (i, item) |
|
526 if isinstance(item, WildcardPattern): |
|
527 self.wildcards = True |
|
528 self.type = type |
|
529 self.content = content |
|
530 self.name = name |
|
531 |
|
532 def _submatch(self, node, results=None): |
|
533 """Match the pattern's content to the node's children. |
|
534 |
|
535 This assumes the node type matches and self.content is not None. |
|
536 |
|
537 Returns True if it matches, False if not. |
|
538 |
|
539 If results is not None, it must be a dict which will be |
|
540 updated with the nodes matching named subpatterns. |
|
541 |
|
542 When returning False, the results dict may still be updated. |
|
543 """ |
|
544 if self.wildcards: |
|
545 for c, r in generate_matches(self.content, node.children): |
|
546 if c == len(node.children): |
|
547 if results is not None: |
|
548 results.update(r) |
|
549 return True |
|
550 return False |
|
551 if len(self.content) != len(node.children): |
|
552 return False |
|
553 for subpattern, child in zip(self.content, node.children): |
|
554 if not subpattern.match(child, results): |
|
555 return False |
|
556 return True |
|
557 |
|
558 |
|
559 class WildcardPattern(BasePattern): |
|
560 |
|
561 """A wildcard pattern can match zero or more nodes. |
|
562 |
|
563 This has all the flexibility needed to implement patterns like: |
|
564 |
|
565 .* .+ .? .{m,n} |
|
566 (a b c | d e | f) |
|
567 (...)* (...)+ (...)? (...){m,n} |
|
568 |
|
569 except it always uses non-greedy matching. |
|
570 """ |
|
571 |
|
572 def __init__(self, content=None, min=0, max=HUGE, name=None): |
|
573 """Initializer. |
|
574 |
|
575 Args: |
|
576 content: optional sequence of subsequences of patterns; |
|
577 if absent, matches one node; |
|
578 if present, each subsequence is an alternative [*] |
|
579 min: optinal minumum number of times to match, default 0 |
|
580 max: optional maximum number of times tro match, default HUGE |
|
581 name: optional name assigned to this match |
|
582 |
|
583 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is |
|
584 equivalent to (a b c | d e | f g h); if content is None, |
|
585 this is equivalent to '.' in regular expression terms. |
|
586 The min and max parameters work as follows: |
|
587 min=0, max=maxint: .* |
|
588 min=1, max=maxint: .+ |
|
589 min=0, max=1: .? |
|
590 min=1, max=1: . |
|
591 If content is not None, replace the dot with the parenthesized |
|
592 list of alternatives, e.g. (a b c | d e | f g h)* |
|
593 """ |
|
594 assert 0 <= min <= max <= HUGE, (min, max) |
|
595 if content is not None: |
|
596 content = tuple(map(tuple, content)) # Protect against alterations |
|
597 # Check sanity of alternatives |
|
598 assert len(content), repr(content) # Can't have zero alternatives |
|
599 for alt in content: |
|
600 assert len(alt), repr(alt) # Can have empty alternatives |
|
601 self.content = content |
|
602 self.min = min |
|
603 self.max = max |
|
604 self.name = name |
|
605 |
|
606 def optimize(self): |
|
607 """Optimize certain stacked wildcard patterns.""" |
|
608 subpattern = None |
|
609 if (self.content is not None and |
|
610 len(self.content) == 1 and len(self.content[0]) == 1): |
|
611 subpattern = self.content[0][0] |
|
612 if self.min == 1 and self.max == 1: |
|
613 if self.content is None: |
|
614 return NodePattern(name=self.name) |
|
615 if subpattern is not None and self.name == subpattern.name: |
|
616 return subpattern.optimize() |
|
617 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and |
|
618 subpattern.min <= 1 and self.name == subpattern.name): |
|
619 return WildcardPattern(subpattern.content, |
|
620 self.min*subpattern.min, |
|
621 self.max*subpattern.max, |
|
622 subpattern.name) |
|
623 return self |
|
624 |
|
625 def match(self, node, results=None): |
|
626 """Does this pattern exactly match a node?""" |
|
627 return self.match_seq([node], results) |
|
628 |
|
629 def match_seq(self, nodes, results=None): |
|
630 """Does this pattern exactly match a sequence of nodes?""" |
|
631 for c, r in self.generate_matches(nodes): |
|
632 if c == len(nodes): |
|
633 if results is not None: |
|
634 results.update(r) |
|
635 if self.name: |
|
636 results[self.name] = list(nodes) |
|
637 return True |
|
638 return False |
|
639 |
|
640 def generate_matches(self, nodes): |
|
641 """Generator yielding matches for a sequence of nodes. |
|
642 |
|
643 Args: |
|
644 nodes: sequence of nodes |
|
645 |
|
646 Yields: |
|
647 (count, results) tuples where: |
|
648 count: the match comprises nodes[:count]; |
|
649 results: dict containing named submatches. |
|
650 """ |
|
651 if self.content is None: |
|
652 # Shortcut for special case (see __init__.__doc__) |
|
653 for count in xrange(self.min, 1 + min(len(nodes), self.max)): |
|
654 r = {} |
|
655 if self.name: |
|
656 r[self.name] = nodes[:count] |
|
657 yield count, r |
|
658 elif self.name == "bare_name": |
|
659 yield self._bare_name_matches(nodes) |
|
660 else: |
|
661 # The reason for this is that hitting the recursion limit usually |
|
662 # results in some ugly messages about how RuntimeErrors are being |
|
663 # ignored. |
|
664 save_stderr = sys.stderr |
|
665 sys.stderr = StringIO() |
|
666 try: |
|
667 for count, r in self._recursive_matches(nodes, 0): |
|
668 if self.name: |
|
669 r[self.name] = nodes[:count] |
|
670 yield count, r |
|
671 except RuntimeError: |
|
672 # We fall back to the iterative pattern matching scheme if the recursive |
|
673 # scheme hits the recursion limit. |
|
674 for count, r in self._iterative_matches(nodes): |
|
675 if self.name: |
|
676 r[self.name] = nodes[:count] |
|
677 yield count, r |
|
678 finally: |
|
679 sys.stderr = save_stderr |
|
680 |
|
681 def _iterative_matches(self, nodes): |
|
682 """Helper to iteratively yield the matches.""" |
|
683 nodelen = len(nodes) |
|
684 if 0 >= self.min: |
|
685 yield 0, {} |
|
686 |
|
687 results = [] |
|
688 # generate matches that use just one alt from self.content |
|
689 for alt in self.content: |
|
690 for c, r in generate_matches(alt, nodes): |
|
691 yield c, r |
|
692 results.append((c, r)) |
|
693 |
|
694 # for each match, iterate down the nodes |
|
695 while results: |
|
696 new_results = [] |
|
697 for c0, r0 in results: |
|
698 # stop if the entire set of nodes has been matched |
|
699 if c0 < nodelen and c0 <= self.max: |
|
700 for alt in self.content: |
|
701 for c1, r1 in generate_matches(alt, nodes[c0:]): |
|
702 if c1 > 0: |
|
703 r = {} |
|
704 r.update(r0) |
|
705 r.update(r1) |
|
706 yield c0 + c1, r |
|
707 new_results.append((c0 + c1, r)) |
|
708 results = new_results |
|
709 |
|
710 def _bare_name_matches(self, nodes): |
|
711 """Special optimized matcher for bare_name.""" |
|
712 count = 0 |
|
713 r = {} |
|
714 done = False |
|
715 max = len(nodes) |
|
716 while not done and count < max: |
|
717 done = True |
|
718 for leaf in self.content: |
|
719 if leaf[0].match(nodes[count], r): |
|
720 count += 1 |
|
721 done = False |
|
722 break |
|
723 r[self.name] = nodes[:count] |
|
724 return count, r |
|
725 |
|
726 def _recursive_matches(self, nodes, count): |
|
727 """Helper to recursively yield the matches.""" |
|
728 assert self.content is not None |
|
729 if count >= self.min: |
|
730 yield 0, {} |
|
731 if count < self.max: |
|
732 for alt in self.content: |
|
733 for c0, r0 in generate_matches(alt, nodes): |
|
734 for c1, r1 in self._recursive_matches(nodes[c0:], count+1): |
|
735 r = {} |
|
736 r.update(r0) |
|
737 r.update(r1) |
|
738 yield c0 + c1, r |
|
739 |
|
740 |
|
741 class NegatedPattern(BasePattern): |
|
742 |
|
743 def __init__(self, content=None): |
|
744 """Initializer. |
|
745 |
|
746 The argument is either a pattern or None. If it is None, this |
|
747 only matches an empty sequence (effectively '$' in regex |
|
748 lingo). If it is not None, this matches whenever the argument |
|
749 pattern doesn't have any matches. |
|
750 """ |
|
751 if content is not None: |
|
752 assert isinstance(content, BasePattern), repr(content) |
|
753 self.content = content |
|
754 |
|
755 def match(self, node): |
|
756 # We never match a node in its entirety |
|
757 return False |
|
758 |
|
759 def match_seq(self, nodes): |
|
760 # We only match an empty sequence of nodes in its entirety |
|
761 return len(nodes) == 0 |
|
762 |
|
763 def generate_matches(self, nodes): |
|
764 if self.content is None: |
|
765 # Return a match if there is an empty sequence |
|
766 if len(nodes) == 0: |
|
767 yield 0, {} |
|
768 else: |
|
769 # Return a match if the argument pattern has no matches |
|
770 for c, r in self.content.generate_matches(nodes): |
|
771 return |
|
772 yield 0, {} |
|
773 |
|
774 |
|
775 def generate_matches(patterns, nodes): |
|
776 """Generator yielding matches for a sequence of patterns and nodes. |
|
777 |
|
778 Args: |
|
779 patterns: a sequence of patterns |
|
780 nodes: a sequence of nodes |
|
781 |
|
782 Yields: |
|
783 (count, results) tuples where: |
|
784 count: the entire sequence of patterns matches nodes[:count]; |
|
785 results: dict containing named submatches. |
|
786 """ |
|
787 if not patterns: |
|
788 yield 0, {} |
|
789 else: |
|
790 p, rest = patterns[0], patterns[1:] |
|
791 for c0, r0 in p.generate_matches(nodes): |
|
792 if not rest: |
|
793 yield c0, r0 |
|
794 else: |
|
795 for c1, r1 in generate_matches(rest, nodes[c0:]): |
|
796 r = {} |
|
797 r.update(r0) |
|
798 r.update(r1) |
|
799 yield c0 + c1, r |