|
1 # Copyright 2006 Google, Inc. All Rights Reserved. |
|
2 # Licensed to PSF under a Contributor Agreement. |
|
3 |
|
4 """Pattern compiler. |
|
5 |
|
6 The grammer is taken from PatternGrammar.txt. |
|
7 |
|
8 The compiler compiles a pattern to a pytree.*Pattern instance. |
|
9 """ |
|
10 |
|
11 __author__ = "Guido van Rossum <guido@python.org>" |
|
12 |
|
13 # Python imports |
|
14 import os |
|
15 |
|
16 # Fairly local imports |
|
17 from .pgen2 import driver |
|
18 from .pgen2 import literals |
|
19 from .pgen2 import token |
|
20 from .pgen2 import tokenize |
|
21 |
|
22 # Really local imports |
|
23 from . import pytree |
|
24 from . import pygram |
|
25 |
|
26 # The pattern grammar file |
|
27 _PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), |
|
28 "PatternGrammar.txt") |
|
29 |
|
30 |
|
31 def tokenize_wrapper(input): |
|
32 """Tokenizes a string suppressing significant whitespace.""" |
|
33 skip = (token.NEWLINE, token.INDENT, token.DEDENT) |
|
34 tokens = tokenize.generate_tokens(driver.generate_lines(input).next) |
|
35 for quintuple in tokens: |
|
36 type, value, start, end, line_text = quintuple |
|
37 if type not in skip: |
|
38 yield quintuple |
|
39 |
|
40 |
|
41 class PatternCompiler(object): |
|
42 |
|
43 def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE): |
|
44 """Initializer. |
|
45 |
|
46 Takes an optional alternative filename for the pattern grammar. |
|
47 """ |
|
48 self.grammar = driver.load_grammar(grammar_file) |
|
49 self.syms = pygram.Symbols(self.grammar) |
|
50 self.pygrammar = pygram.python_grammar |
|
51 self.pysyms = pygram.python_symbols |
|
52 self.driver = driver.Driver(self.grammar, convert=pattern_convert) |
|
53 |
|
54 def compile_pattern(self, input, debug=False): |
|
55 """Compiles a pattern string to a nested pytree.*Pattern object.""" |
|
56 tokens = tokenize_wrapper(input) |
|
57 root = self.driver.parse_tokens(tokens, debug=debug) |
|
58 return self.compile_node(root) |
|
59 |
|
60 def compile_node(self, node): |
|
61 """Compiles a node, recursively. |
|
62 |
|
63 This is one big switch on the node type. |
|
64 """ |
|
65 # XXX Optimize certain Wildcard-containing-Wildcard patterns |
|
66 # that can be merged |
|
67 if node.type == self.syms.Matcher: |
|
68 node = node.children[0] # Avoid unneeded recursion |
|
69 |
|
70 if node.type == self.syms.Alternatives: |
|
71 # Skip the odd children since they are just '|' tokens |
|
72 alts = [self.compile_node(ch) for ch in node.children[::2]] |
|
73 if len(alts) == 1: |
|
74 return alts[0] |
|
75 p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1) |
|
76 return p.optimize() |
|
77 |
|
78 if node.type == self.syms.Alternative: |
|
79 units = [self.compile_node(ch) for ch in node.children] |
|
80 if len(units) == 1: |
|
81 return units[0] |
|
82 p = pytree.WildcardPattern([units], min=1, max=1) |
|
83 return p.optimize() |
|
84 |
|
85 if node.type == self.syms.NegatedUnit: |
|
86 pattern = self.compile_basic(node.children[1:]) |
|
87 p = pytree.NegatedPattern(pattern) |
|
88 return p.optimize() |
|
89 |
|
90 assert node.type == self.syms.Unit |
|
91 |
|
92 name = None |
|
93 nodes = node.children |
|
94 if len(nodes) >= 3 and nodes[1].type == token.EQUAL: |
|
95 name = nodes[0].value |
|
96 nodes = nodes[2:] |
|
97 repeat = None |
|
98 if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater: |
|
99 repeat = nodes[-1] |
|
100 nodes = nodes[:-1] |
|
101 |
|
102 # Now we've reduced it to: STRING | NAME [Details] | (...) | [...] |
|
103 pattern = self.compile_basic(nodes, repeat) |
|
104 |
|
105 if repeat is not None: |
|
106 assert repeat.type == self.syms.Repeater |
|
107 children = repeat.children |
|
108 child = children[0] |
|
109 if child.type == token.STAR: |
|
110 min = 0 |
|
111 max = pytree.HUGE |
|
112 elif child.type == token.PLUS: |
|
113 min = 1 |
|
114 max = pytree.HUGE |
|
115 elif child.type == token.LBRACE: |
|
116 assert children[-1].type == token.RBRACE |
|
117 assert len(children) in (3, 5) |
|
118 min = max = self.get_int(children[1]) |
|
119 if len(children) == 5: |
|
120 max = self.get_int(children[3]) |
|
121 else: |
|
122 assert False |
|
123 if min != 1 or max != 1: |
|
124 pattern = pattern.optimize() |
|
125 pattern = pytree.WildcardPattern([[pattern]], min=min, max=max) |
|
126 |
|
127 if name is not None: |
|
128 pattern.name = name |
|
129 return pattern.optimize() |
|
130 |
|
131 def compile_basic(self, nodes, repeat=None): |
|
132 # Compile STRING | NAME [Details] | (...) | [...] |
|
133 assert len(nodes) >= 1 |
|
134 node = nodes[0] |
|
135 if node.type == token.STRING: |
|
136 value = literals.evalString(node.value) |
|
137 return pytree.LeafPattern(content=value) |
|
138 elif node.type == token.NAME: |
|
139 value = node.value |
|
140 if value.isupper(): |
|
141 if value not in TOKEN_MAP: |
|
142 raise SyntaxError("Invalid token: %r" % value) |
|
143 return pytree.LeafPattern(TOKEN_MAP[value]) |
|
144 else: |
|
145 if value == "any": |
|
146 type = None |
|
147 elif not value.startswith("_"): |
|
148 type = getattr(self.pysyms, value, None) |
|
149 if type is None: |
|
150 raise SyntaxError("Invalid symbol: %r" % value) |
|
151 if nodes[1:]: # Details present |
|
152 content = [self.compile_node(nodes[1].children[1])] |
|
153 else: |
|
154 content = None |
|
155 return pytree.NodePattern(type, content) |
|
156 elif node.value == "(": |
|
157 return self.compile_node(nodes[1]) |
|
158 elif node.value == "[": |
|
159 assert repeat is None |
|
160 subpattern = self.compile_node(nodes[1]) |
|
161 return pytree.WildcardPattern([[subpattern]], min=0, max=1) |
|
162 assert False, node |
|
163 |
|
164 def get_int(self, node): |
|
165 assert node.type == token.NUMBER |
|
166 return int(node.value) |
|
167 |
|
168 |
|
169 # Map named tokens to the type value for a LeafPattern |
|
170 TOKEN_MAP = {"NAME": token.NAME, |
|
171 "STRING": token.STRING, |
|
172 "NUMBER": token.NUMBER, |
|
173 "TOKEN": None} |
|
174 |
|
175 |
|
176 def pattern_convert(grammar, raw_node_info): |
|
177 """Converts raw node information to a Node or Leaf instance.""" |
|
178 type, value, context, children = raw_node_info |
|
179 if children or type in grammar.number2symbol: |
|
180 return pytree.Node(type, children, context=context) |
|
181 else: |
|
182 return pytree.Leaf(type, value, context=context) |
|
183 |
|
184 |
|
185 def compile_pattern(pattern): |
|
186 return PatternCompiler().compile_pattern(pattern) |