|
1 """Module symbol-table generator""" |
|
2 |
|
3 from compiler import ast |
|
4 from compiler.consts import SC_LOCAL, SC_GLOBAL, SC_FREE, SC_CELL, SC_UNKNOWN |
|
5 from compiler.misc import mangle |
|
6 import types |
|
7 |
|
8 |
|
9 import sys |
|
10 |
|
11 MANGLE_LEN = 256 |
|
12 |
|
13 class Scope: |
|
14 # XXX how much information do I need about each name? |
|
15 def __init__(self, name, module, klass=None): |
|
16 self.name = name |
|
17 self.module = module |
|
18 self.defs = {} |
|
19 self.uses = {} |
|
20 self.globals = {} |
|
21 self.params = {} |
|
22 self.frees = {} |
|
23 self.cells = {} |
|
24 self.children = [] |
|
25 # nested is true if the class could contain free variables, |
|
26 # i.e. if it is nested within another function. |
|
27 self.nested = None |
|
28 self.generator = None |
|
29 self.klass = None |
|
30 if klass is not None: |
|
31 for i in range(len(klass)): |
|
32 if klass[i] != '_': |
|
33 self.klass = klass[i:] |
|
34 break |
|
35 |
|
36 def __repr__(self): |
|
37 return "<%s: %s>" % (self.__class__.__name__, self.name) |
|
38 |
|
39 def mangle(self, name): |
|
40 if self.klass is None: |
|
41 return name |
|
42 return mangle(name, self.klass) |
|
43 |
|
44 def add_def(self, name): |
|
45 self.defs[self.mangle(name)] = 1 |
|
46 |
|
47 def add_use(self, name): |
|
48 self.uses[self.mangle(name)] = 1 |
|
49 |
|
50 def add_global(self, name): |
|
51 name = self.mangle(name) |
|
52 if self.uses.has_key(name) or self.defs.has_key(name): |
|
53 pass # XXX warn about global following def/use |
|
54 if self.params.has_key(name): |
|
55 raise SyntaxError, "%s in %s is global and parameter" % \ |
|
56 (name, self.name) |
|
57 self.globals[name] = 1 |
|
58 self.module.add_def(name) |
|
59 |
|
60 def add_param(self, name): |
|
61 name = self.mangle(name) |
|
62 self.defs[name] = 1 |
|
63 self.params[name] = 1 |
|
64 |
|
65 def get_names(self): |
|
66 d = {} |
|
67 d.update(self.defs) |
|
68 d.update(self.uses) |
|
69 d.update(self.globals) |
|
70 return d.keys() |
|
71 |
|
72 def add_child(self, child): |
|
73 self.children.append(child) |
|
74 |
|
75 def get_children(self): |
|
76 return self.children |
|
77 |
|
78 def DEBUG(self): |
|
79 print >> sys.stderr, self.name, self.nested and "nested" or "" |
|
80 print >> sys.stderr, "\tglobals: ", self.globals |
|
81 print >> sys.stderr, "\tcells: ", self.cells |
|
82 print >> sys.stderr, "\tdefs: ", self.defs |
|
83 print >> sys.stderr, "\tuses: ", self.uses |
|
84 print >> sys.stderr, "\tfrees:", self.frees |
|
85 |
|
86 def check_name(self, name): |
|
87 """Return scope of name. |
|
88 |
|
89 The scope of a name could be LOCAL, GLOBAL, FREE, or CELL. |
|
90 """ |
|
91 if self.globals.has_key(name): |
|
92 return SC_GLOBAL |
|
93 if self.cells.has_key(name): |
|
94 return SC_CELL |
|
95 if self.defs.has_key(name): |
|
96 return SC_LOCAL |
|
97 if self.nested and (self.frees.has_key(name) or |
|
98 self.uses.has_key(name)): |
|
99 return SC_FREE |
|
100 if self.nested: |
|
101 return SC_UNKNOWN |
|
102 else: |
|
103 return SC_GLOBAL |
|
104 |
|
105 def get_free_vars(self): |
|
106 if not self.nested: |
|
107 return () |
|
108 free = {} |
|
109 free.update(self.frees) |
|
110 for name in self.uses.keys(): |
|
111 if not (self.defs.has_key(name) or |
|
112 self.globals.has_key(name)): |
|
113 free[name] = 1 |
|
114 return free.keys() |
|
115 |
|
116 def handle_children(self): |
|
117 for child in self.children: |
|
118 frees = child.get_free_vars() |
|
119 globals = self.add_frees(frees) |
|
120 for name in globals: |
|
121 child.force_global(name) |
|
122 |
|
123 def force_global(self, name): |
|
124 """Force name to be global in scope. |
|
125 |
|
126 Some child of the current node had a free reference to name. |
|
127 When the child was processed, it was labelled a free |
|
128 variable. Now that all its enclosing scope have been |
|
129 processed, the name is known to be a global or builtin. So |
|
130 walk back down the child chain and set the name to be global |
|
131 rather than free. |
|
132 |
|
133 Be careful to stop if a child does not think the name is |
|
134 free. |
|
135 """ |
|
136 self.globals[name] = 1 |
|
137 if self.frees.has_key(name): |
|
138 del self.frees[name] |
|
139 for child in self.children: |
|
140 if child.check_name(name) == SC_FREE: |
|
141 child.force_global(name) |
|
142 |
|
143 def add_frees(self, names): |
|
144 """Process list of free vars from nested scope. |
|
145 |
|
146 Returns a list of names that are either 1) declared global in the |
|
147 parent or 2) undefined in a top-level parent. In either case, |
|
148 the nested scope should treat them as globals. |
|
149 """ |
|
150 child_globals = [] |
|
151 for name in names: |
|
152 sc = self.check_name(name) |
|
153 if self.nested: |
|
154 if sc == SC_UNKNOWN or sc == SC_FREE \ |
|
155 or isinstance(self, ClassScope): |
|
156 self.frees[name] = 1 |
|
157 elif sc == SC_GLOBAL: |
|
158 child_globals.append(name) |
|
159 elif isinstance(self, FunctionScope) and sc == SC_LOCAL: |
|
160 self.cells[name] = 1 |
|
161 elif sc != SC_CELL: |
|
162 child_globals.append(name) |
|
163 else: |
|
164 if sc == SC_LOCAL: |
|
165 self.cells[name] = 1 |
|
166 elif sc != SC_CELL: |
|
167 child_globals.append(name) |
|
168 return child_globals |
|
169 |
|
170 def get_cell_vars(self): |
|
171 return self.cells.keys() |
|
172 |
|
173 class ModuleScope(Scope): |
|
174 __super_init = Scope.__init__ |
|
175 |
|
176 def __init__(self): |
|
177 self.__super_init("global", self) |
|
178 |
|
179 class FunctionScope(Scope): |
|
180 pass |
|
181 |
|
182 class GenExprScope(Scope): |
|
183 __super_init = Scope.__init__ |
|
184 |
|
185 __counter = 1 |
|
186 |
|
187 def __init__(self, module, klass=None): |
|
188 i = self.__counter |
|
189 self.__counter += 1 |
|
190 self.__super_init("generator expression<%d>"%i, module, klass) |
|
191 self.add_param('.0') |
|
192 |
|
193 def get_names(self): |
|
194 keys = Scope.get_names(self) |
|
195 return keys |
|
196 |
|
197 class LambdaScope(FunctionScope): |
|
198 __super_init = Scope.__init__ |
|
199 |
|
200 __counter = 1 |
|
201 |
|
202 def __init__(self, module, klass=None): |
|
203 i = self.__counter |
|
204 self.__counter += 1 |
|
205 self.__super_init("lambda.%d" % i, module, klass) |
|
206 |
|
207 class ClassScope(Scope): |
|
208 __super_init = Scope.__init__ |
|
209 |
|
210 def __init__(self, name, module): |
|
211 self.__super_init(name, module, name) |
|
212 |
|
213 class SymbolVisitor: |
|
214 def __init__(self): |
|
215 self.scopes = {} |
|
216 self.klass = None |
|
217 |
|
218 # node that define new scopes |
|
219 |
|
220 def visitModule(self, node): |
|
221 scope = self.module = self.scopes[node] = ModuleScope() |
|
222 self.visit(node.node, scope) |
|
223 |
|
224 visitExpression = visitModule |
|
225 |
|
226 def visitFunction(self, node, parent): |
|
227 if node.decorators: |
|
228 self.visit(node.decorators, parent) |
|
229 parent.add_def(node.name) |
|
230 for n in node.defaults: |
|
231 self.visit(n, parent) |
|
232 scope = FunctionScope(node.name, self.module, self.klass) |
|
233 if parent.nested or isinstance(parent, FunctionScope): |
|
234 scope.nested = 1 |
|
235 self.scopes[node] = scope |
|
236 self._do_args(scope, node.argnames) |
|
237 self.visit(node.code, scope) |
|
238 self.handle_free_vars(scope, parent) |
|
239 |
|
240 def visitGenExpr(self, node, parent): |
|
241 scope = GenExprScope(self.module, self.klass); |
|
242 if parent.nested or isinstance(parent, FunctionScope) \ |
|
243 or isinstance(parent, GenExprScope): |
|
244 scope.nested = 1 |
|
245 |
|
246 self.scopes[node] = scope |
|
247 self.visit(node.code, scope) |
|
248 |
|
249 self.handle_free_vars(scope, parent) |
|
250 |
|
251 def visitGenExprInner(self, node, scope): |
|
252 for genfor in node.quals: |
|
253 self.visit(genfor, scope) |
|
254 |
|
255 self.visit(node.expr, scope) |
|
256 |
|
257 def visitGenExprFor(self, node, scope): |
|
258 self.visit(node.assign, scope, 1) |
|
259 self.visit(node.iter, scope) |
|
260 for if_ in node.ifs: |
|
261 self.visit(if_, scope) |
|
262 |
|
263 def visitGenExprIf(self, node, scope): |
|
264 self.visit(node.test, scope) |
|
265 |
|
266 def visitLambda(self, node, parent, assign=0): |
|
267 # Lambda is an expression, so it could appear in an expression |
|
268 # context where assign is passed. The transformer should catch |
|
269 # any code that has a lambda on the left-hand side. |
|
270 assert not assign |
|
271 |
|
272 for n in node.defaults: |
|
273 self.visit(n, parent) |
|
274 scope = LambdaScope(self.module, self.klass) |
|
275 if parent.nested or isinstance(parent, FunctionScope): |
|
276 scope.nested = 1 |
|
277 self.scopes[node] = scope |
|
278 self._do_args(scope, node.argnames) |
|
279 self.visit(node.code, scope) |
|
280 self.handle_free_vars(scope, parent) |
|
281 |
|
282 def _do_args(self, scope, args): |
|
283 for name in args: |
|
284 if type(name) == types.TupleType: |
|
285 self._do_args(scope, name) |
|
286 else: |
|
287 scope.add_param(name) |
|
288 |
|
289 def handle_free_vars(self, scope, parent): |
|
290 parent.add_child(scope) |
|
291 scope.handle_children() |
|
292 |
|
293 def visitClass(self, node, parent): |
|
294 parent.add_def(node.name) |
|
295 for n in node.bases: |
|
296 self.visit(n, parent) |
|
297 scope = ClassScope(node.name, self.module) |
|
298 if parent.nested or isinstance(parent, FunctionScope): |
|
299 scope.nested = 1 |
|
300 if node.doc is not None: |
|
301 scope.add_def('__doc__') |
|
302 scope.add_def('__module__') |
|
303 self.scopes[node] = scope |
|
304 prev = self.klass |
|
305 self.klass = node.name |
|
306 self.visit(node.code, scope) |
|
307 self.klass = prev |
|
308 self.handle_free_vars(scope, parent) |
|
309 |
|
310 # name can be a def or a use |
|
311 |
|
312 # XXX a few calls and nodes expect a third "assign" arg that is |
|
313 # true if the name is being used as an assignment. only |
|
314 # expressions contained within statements may have the assign arg. |
|
315 |
|
316 def visitName(self, node, scope, assign=0): |
|
317 if assign: |
|
318 scope.add_def(node.name) |
|
319 else: |
|
320 scope.add_use(node.name) |
|
321 |
|
322 # operations that bind new names |
|
323 |
|
324 def visitFor(self, node, scope): |
|
325 self.visit(node.assign, scope, 1) |
|
326 self.visit(node.list, scope) |
|
327 self.visit(node.body, scope) |
|
328 if node.else_: |
|
329 self.visit(node.else_, scope) |
|
330 |
|
331 def visitFrom(self, node, scope): |
|
332 for name, asname in node.names: |
|
333 if name == "*": |
|
334 continue |
|
335 scope.add_def(asname or name) |
|
336 |
|
337 def visitImport(self, node, scope): |
|
338 for name, asname in node.names: |
|
339 i = name.find(".") |
|
340 if i > -1: |
|
341 name = name[:i] |
|
342 scope.add_def(asname or name) |
|
343 |
|
344 def visitGlobal(self, node, scope): |
|
345 for name in node.names: |
|
346 scope.add_global(name) |
|
347 |
|
348 def visitAssign(self, node, scope): |
|
349 """Propagate assignment flag down to child nodes. |
|
350 |
|
351 The Assign node doesn't itself contains the variables being |
|
352 assigned to. Instead, the children in node.nodes are visited |
|
353 with the assign flag set to true. When the names occur in |
|
354 those nodes, they are marked as defs. |
|
355 |
|
356 Some names that occur in an assignment target are not bound by |
|
357 the assignment, e.g. a name occurring inside a slice. The |
|
358 visitor handles these nodes specially; they do not propagate |
|
359 the assign flag to their children. |
|
360 """ |
|
361 for n in node.nodes: |
|
362 self.visit(n, scope, 1) |
|
363 self.visit(node.expr, scope) |
|
364 |
|
365 def visitAssName(self, node, scope, assign=1): |
|
366 scope.add_def(node.name) |
|
367 |
|
368 def visitAssAttr(self, node, scope, assign=0): |
|
369 self.visit(node.expr, scope, 0) |
|
370 |
|
371 def visitSubscript(self, node, scope, assign=0): |
|
372 self.visit(node.expr, scope, 0) |
|
373 for n in node.subs: |
|
374 self.visit(n, scope, 0) |
|
375 |
|
376 def visitSlice(self, node, scope, assign=0): |
|
377 self.visit(node.expr, scope, 0) |
|
378 if node.lower: |
|
379 self.visit(node.lower, scope, 0) |
|
380 if node.upper: |
|
381 self.visit(node.upper, scope, 0) |
|
382 |
|
383 def visitAugAssign(self, node, scope): |
|
384 # If the LHS is a name, then this counts as assignment. |
|
385 # Otherwise, it's just use. |
|
386 self.visit(node.node, scope) |
|
387 if isinstance(node.node, ast.Name): |
|
388 self.visit(node.node, scope, 1) # XXX worry about this |
|
389 self.visit(node.expr, scope) |
|
390 |
|
391 # prune if statements if tests are false |
|
392 |
|
393 _const_types = types.StringType, types.IntType, types.FloatType |
|
394 |
|
395 def visitIf(self, node, scope): |
|
396 for test, body in node.tests: |
|
397 if isinstance(test, ast.Const): |
|
398 if type(test.value) in self._const_types: |
|
399 if not test.value: |
|
400 continue |
|
401 self.visit(test, scope) |
|
402 self.visit(body, scope) |
|
403 if node.else_: |
|
404 self.visit(node.else_, scope) |
|
405 |
|
406 # a yield statement signals a generator |
|
407 |
|
408 def visitYield(self, node, scope): |
|
409 scope.generator = 1 |
|
410 self.visit(node.value, scope) |
|
411 |
|
412 def list_eq(l1, l2): |
|
413 return sorted(l1) == sorted(l2) |
|
414 |
|
415 if __name__ == "__main__": |
|
416 import sys |
|
417 from compiler import parseFile, walk |
|
418 import symtable |
|
419 |
|
420 def get_names(syms): |
|
421 return [s for s in [s.get_name() for s in syms.get_symbols()] |
|
422 if not (s.startswith('_[') or s.startswith('.'))] |
|
423 |
|
424 for file in sys.argv[1:]: |
|
425 print file |
|
426 f = open(file) |
|
427 buf = f.read() |
|
428 f.close() |
|
429 syms = symtable.symtable(buf, file, "exec") |
|
430 mod_names = get_names(syms) |
|
431 tree = parseFile(file) |
|
432 s = SymbolVisitor() |
|
433 walk(tree, s) |
|
434 |
|
435 # compare module-level symbols |
|
436 names2 = s.scopes[tree].get_names() |
|
437 |
|
438 if not list_eq(mod_names, names2): |
|
439 print |
|
440 print "oops", file |
|
441 print sorted(mod_names) |
|
442 print sorted(names2) |
|
443 sys.exit(-1) |
|
444 |
|
445 d = {} |
|
446 d.update(s.scopes) |
|
447 del d[tree] |
|
448 scopes = d.values() |
|
449 del d |
|
450 |
|
451 for s in syms.get_symbols(): |
|
452 if s.is_namespace(): |
|
453 l = [sc for sc in scopes |
|
454 if sc.name == s.get_name()] |
|
455 if len(l) > 1: |
|
456 print "skipping", s.get_name() |
|
457 else: |
|
458 if not list_eq(get_names(s.get_namespace()), |
|
459 l[0].get_names()): |
|
460 print s.get_name() |
|
461 print sorted(get_names(s.get_namespace())) |
|
462 print sorted(l[0].get_names()) |
|
463 sys.exit(-1) |