|
1 """A flow graph representation for Python bytecode""" |
|
2 |
|
3 import dis |
|
4 import types |
|
5 import sys |
|
6 |
|
7 from compiler import misc |
|
8 from compiler.consts \ |
|
9 import CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS |
|
10 |
|
11 class FlowGraph: |
|
12 def __init__(self): |
|
13 self.current = self.entry = Block() |
|
14 self.exit = Block("exit") |
|
15 self.blocks = misc.Set() |
|
16 self.blocks.add(self.entry) |
|
17 self.blocks.add(self.exit) |
|
18 |
|
19 def startBlock(self, block): |
|
20 if self._debug: |
|
21 if self.current: |
|
22 print "end", repr(self.current) |
|
23 print " next", self.current.next |
|
24 print " ", self.current.get_children() |
|
25 print repr(block) |
|
26 self.current = block |
|
27 |
|
28 def nextBlock(self, block=None): |
|
29 # XXX think we need to specify when there is implicit transfer |
|
30 # from one block to the next. might be better to represent this |
|
31 # with explicit JUMP_ABSOLUTE instructions that are optimized |
|
32 # out when they are unnecessary. |
|
33 # |
|
34 # I think this strategy works: each block has a child |
|
35 # designated as "next" which is returned as the last of the |
|
36 # children. because the nodes in a graph are emitted in |
|
37 # reverse post order, the "next" block will always be emitted |
|
38 # immediately after its parent. |
|
39 # Worry: maintaining this invariant could be tricky |
|
40 if block is None: |
|
41 block = self.newBlock() |
|
42 |
|
43 # Note: If the current block ends with an unconditional |
|
44 # control transfer, then it is incorrect to add an implicit |
|
45 # transfer to the block graph. The current code requires |
|
46 # these edges to get the blocks emitted in the right order, |
|
47 # however. :-( If a client needs to remove these edges, call |
|
48 # pruneEdges(). |
|
49 |
|
50 self.current.addNext(block) |
|
51 self.startBlock(block) |
|
52 |
|
53 def newBlock(self): |
|
54 b = Block() |
|
55 self.blocks.add(b) |
|
56 return b |
|
57 |
|
58 def startExitBlock(self): |
|
59 self.startBlock(self.exit) |
|
60 |
|
61 _debug = 0 |
|
62 |
|
63 def _enable_debug(self): |
|
64 self._debug = 1 |
|
65 |
|
66 def _disable_debug(self): |
|
67 self._debug = 0 |
|
68 |
|
69 def emit(self, *inst): |
|
70 if self._debug: |
|
71 print "\t", inst |
|
72 if inst[0] in ['RETURN_VALUE', 'YIELD_VALUE']: |
|
73 self.current.addOutEdge(self.exit) |
|
74 if len(inst) == 2 and isinstance(inst[1], Block): |
|
75 self.current.addOutEdge(inst[1]) |
|
76 self.current.emit(inst) |
|
77 |
|
78 def getBlocksInOrder(self): |
|
79 """Return the blocks in reverse postorder |
|
80 |
|
81 i.e. each node appears before all of its successors |
|
82 """ |
|
83 # XXX make sure every node that doesn't have an explicit next |
|
84 # is set so that next points to exit |
|
85 for b in self.blocks.elements(): |
|
86 if b is self.exit: |
|
87 continue |
|
88 if not b.next: |
|
89 b.addNext(self.exit) |
|
90 order = dfs_postorder(self.entry, {}) |
|
91 order.reverse() |
|
92 self.fixupOrder(order, self.exit) |
|
93 # hack alert |
|
94 if not self.exit in order: |
|
95 order.append(self.exit) |
|
96 |
|
97 return order |
|
98 |
|
99 def fixupOrder(self, blocks, default_next): |
|
100 """Fixup bad order introduced by DFS.""" |
|
101 |
|
102 # XXX This is a total mess. There must be a better way to get |
|
103 # the code blocks in the right order. |
|
104 |
|
105 self.fixupOrderHonorNext(blocks, default_next) |
|
106 self.fixupOrderForward(blocks, default_next) |
|
107 |
|
108 def fixupOrderHonorNext(self, blocks, default_next): |
|
109 """Fix one problem with DFS. |
|
110 |
|
111 The DFS uses child block, but doesn't know about the special |
|
112 "next" block. As a result, the DFS can order blocks so that a |
|
113 block isn't next to the right block for implicit control |
|
114 transfers. |
|
115 """ |
|
116 index = {} |
|
117 for i in range(len(blocks)): |
|
118 index[blocks[i]] = i |
|
119 |
|
120 for i in range(0, len(blocks) - 1): |
|
121 b = blocks[i] |
|
122 n = blocks[i + 1] |
|
123 if not b.next or b.next[0] == default_next or b.next[0] == n: |
|
124 continue |
|
125 # The blocks are in the wrong order. Find the chain of |
|
126 # blocks to insert where they belong. |
|
127 cur = b |
|
128 chain = [] |
|
129 elt = cur |
|
130 while elt.next and elt.next[0] != default_next: |
|
131 chain.append(elt.next[0]) |
|
132 elt = elt.next[0] |
|
133 # Now remove the blocks in the chain from the current |
|
134 # block list, so that they can be re-inserted. |
|
135 l = [] |
|
136 for b in chain: |
|
137 assert index[b] > i |
|
138 l.append((index[b], b)) |
|
139 l.sort() |
|
140 l.reverse() |
|
141 for j, b in l: |
|
142 del blocks[index[b]] |
|
143 # Insert the chain in the proper location |
|
144 blocks[i:i + 1] = [cur] + chain |
|
145 # Finally, re-compute the block indexes |
|
146 for i in range(len(blocks)): |
|
147 index[blocks[i]] = i |
|
148 |
|
149 def fixupOrderForward(self, blocks, default_next): |
|
150 """Make sure all JUMP_FORWARDs jump forward""" |
|
151 index = {} |
|
152 chains = [] |
|
153 cur = [] |
|
154 for b in blocks: |
|
155 index[b] = len(chains) |
|
156 cur.append(b) |
|
157 if b.next and b.next[0] == default_next: |
|
158 chains.append(cur) |
|
159 cur = [] |
|
160 chains.append(cur) |
|
161 |
|
162 while 1: |
|
163 constraints = [] |
|
164 |
|
165 for i in range(len(chains)): |
|
166 l = chains[i] |
|
167 for b in l: |
|
168 for c in b.get_children(): |
|
169 if index[c] < i: |
|
170 forward_p = 0 |
|
171 for inst in b.insts: |
|
172 if inst[0] == 'JUMP_FORWARD': |
|
173 if inst[1] == c: |
|
174 forward_p = 1 |
|
175 if not forward_p: |
|
176 continue |
|
177 constraints.append((index[c], i)) |
|
178 |
|
179 if not constraints: |
|
180 break |
|
181 |
|
182 # XXX just do one for now |
|
183 # do swaps to get things in the right order |
|
184 goes_before, a_chain = constraints[0] |
|
185 assert a_chain > goes_before |
|
186 c = chains[a_chain] |
|
187 chains.remove(c) |
|
188 chains.insert(goes_before, c) |
|
189 |
|
190 del blocks[:] |
|
191 for c in chains: |
|
192 for b in c: |
|
193 blocks.append(b) |
|
194 |
|
195 def getBlocks(self): |
|
196 return self.blocks.elements() |
|
197 |
|
198 def getRoot(self): |
|
199 """Return nodes appropriate for use with dominator""" |
|
200 return self.entry |
|
201 |
|
202 def getContainedGraphs(self): |
|
203 l = [] |
|
204 for b in self.getBlocks(): |
|
205 l.extend(b.getContainedGraphs()) |
|
206 return l |
|
207 |
|
208 def dfs_postorder(b, seen): |
|
209 """Depth-first search of tree rooted at b, return in postorder""" |
|
210 order = [] |
|
211 seen[b] = b |
|
212 for c in b.get_children(): |
|
213 if seen.has_key(c): |
|
214 continue |
|
215 order = order + dfs_postorder(c, seen) |
|
216 order.append(b) |
|
217 return order |
|
218 |
|
219 class Block: |
|
220 _count = 0 |
|
221 |
|
222 def __init__(self, label=''): |
|
223 self.insts = [] |
|
224 self.inEdges = misc.Set() |
|
225 self.outEdges = misc.Set() |
|
226 self.label = label |
|
227 self.bid = Block._count |
|
228 self.next = [] |
|
229 Block._count = Block._count + 1 |
|
230 |
|
231 def __repr__(self): |
|
232 if self.label: |
|
233 return "<block %s id=%d>" % (self.label, self.bid) |
|
234 else: |
|
235 return "<block id=%d>" % (self.bid) |
|
236 |
|
237 def __str__(self): |
|
238 insts = map(str, self.insts) |
|
239 return "<block %s %d:\n%s>" % (self.label, self.bid, |
|
240 '\n'.join(insts)) |
|
241 |
|
242 def emit(self, inst): |
|
243 op = inst[0] |
|
244 if op[:4] == 'JUMP': |
|
245 self.outEdges.add(inst[1]) |
|
246 self.insts.append(inst) |
|
247 |
|
248 def getInstructions(self): |
|
249 return self.insts |
|
250 |
|
251 def addInEdge(self, block): |
|
252 self.inEdges.add(block) |
|
253 |
|
254 def addOutEdge(self, block): |
|
255 self.outEdges.add(block) |
|
256 |
|
257 def addNext(self, block): |
|
258 self.next.append(block) |
|
259 assert len(self.next) == 1, map(str, self.next) |
|
260 |
|
261 _uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS', 'YIELD_VALUE', |
|
262 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP') |
|
263 |
|
264 def pruneNext(self): |
|
265 """Remove bogus edge for unconditional transfers |
|
266 |
|
267 Each block has a next edge that accounts for implicit control |
|
268 transfers, e.g. from a JUMP_IF_FALSE to the block that will be |
|
269 executed if the test is true. |
|
270 |
|
271 These edges must remain for the current assembler code to |
|
272 work. If they are removed, the dfs_postorder gets things in |
|
273 weird orders. However, they shouldn't be there for other |
|
274 purposes, e.g. conversion to SSA form. This method will |
|
275 remove the next edge when it follows an unconditional control |
|
276 transfer. |
|
277 """ |
|
278 try: |
|
279 op, arg = self.insts[-1] |
|
280 except (IndexError, ValueError): |
|
281 return |
|
282 if op in self._uncond_transfer: |
|
283 self.next = [] |
|
284 |
|
285 def get_children(self): |
|
286 if self.next and self.next[0] in self.outEdges: |
|
287 self.outEdges.remove(self.next[0]) |
|
288 return self.outEdges.elements() + self.next |
|
289 |
|
290 def getContainedGraphs(self): |
|
291 """Return all graphs contained within this block. |
|
292 |
|
293 For example, a MAKE_FUNCTION block will contain a reference to |
|
294 the graph for the function body. |
|
295 """ |
|
296 contained = [] |
|
297 for inst in self.insts: |
|
298 if len(inst) == 1: |
|
299 continue |
|
300 op = inst[1] |
|
301 if hasattr(op, 'graph'): |
|
302 contained.append(op.graph) |
|
303 return contained |
|
304 |
|
305 # flags for code objects |
|
306 |
|
307 # the FlowGraph is transformed in place; it exists in one of these states |
|
308 RAW = "RAW" |
|
309 FLAT = "FLAT" |
|
310 CONV = "CONV" |
|
311 DONE = "DONE" |
|
312 |
|
313 class PyFlowGraph(FlowGraph): |
|
314 super_init = FlowGraph.__init__ |
|
315 |
|
316 def __init__(self, name, filename, args=(), optimized=0, klass=None): |
|
317 self.super_init() |
|
318 self.name = name |
|
319 self.filename = filename |
|
320 self.docstring = None |
|
321 self.args = args # XXX |
|
322 self.argcount = getArgCount(args) |
|
323 self.klass = klass |
|
324 if optimized: |
|
325 self.flags = CO_OPTIMIZED | CO_NEWLOCALS |
|
326 else: |
|
327 self.flags = 0 |
|
328 self.consts = [] |
|
329 self.names = [] |
|
330 # Free variables found by the symbol table scan, including |
|
331 # variables used only in nested scopes, are included here. |
|
332 self.freevars = [] |
|
333 self.cellvars = [] |
|
334 # The closure list is used to track the order of cell |
|
335 # variables and free variables in the resulting code object. |
|
336 # The offsets used by LOAD_CLOSURE/LOAD_DEREF refer to both |
|
337 # kinds of variables. |
|
338 self.closure = [] |
|
339 self.varnames = list(args) or [] |
|
340 for i in range(len(self.varnames)): |
|
341 var = self.varnames[i] |
|
342 if isinstance(var, TupleArg): |
|
343 self.varnames[i] = var.getName() |
|
344 self.stage = RAW |
|
345 |
|
346 def setDocstring(self, doc): |
|
347 self.docstring = doc |
|
348 |
|
349 def setFlag(self, flag): |
|
350 self.flags = self.flags | flag |
|
351 if flag == CO_VARARGS: |
|
352 self.argcount = self.argcount - 1 |
|
353 |
|
354 def checkFlag(self, flag): |
|
355 if self.flags & flag: |
|
356 return 1 |
|
357 |
|
358 def setFreeVars(self, names): |
|
359 self.freevars = list(names) |
|
360 |
|
361 def setCellVars(self, names): |
|
362 self.cellvars = names |
|
363 |
|
364 def getCode(self): |
|
365 """Get a Python code object""" |
|
366 assert self.stage == RAW |
|
367 self.computeStackDepth() |
|
368 self.flattenGraph() |
|
369 assert self.stage == FLAT |
|
370 self.convertArgs() |
|
371 assert self.stage == CONV |
|
372 self.makeByteCode() |
|
373 assert self.stage == DONE |
|
374 return self.newCodeObject() |
|
375 |
|
376 def dump(self, io=None): |
|
377 if io: |
|
378 save = sys.stdout |
|
379 sys.stdout = io |
|
380 pc = 0 |
|
381 for t in self.insts: |
|
382 opname = t[0] |
|
383 if opname == "SET_LINENO": |
|
384 print |
|
385 if len(t) == 1: |
|
386 print "\t", "%3d" % pc, opname |
|
387 pc = pc + 1 |
|
388 else: |
|
389 print "\t", "%3d" % pc, opname, t[1] |
|
390 pc = pc + 3 |
|
391 if io: |
|
392 sys.stdout = save |
|
393 |
|
394 def computeStackDepth(self): |
|
395 """Compute the max stack depth. |
|
396 |
|
397 Approach is to compute the stack effect of each basic block. |
|
398 Then find the path through the code with the largest total |
|
399 effect. |
|
400 """ |
|
401 depth = {} |
|
402 exit = None |
|
403 for b in self.getBlocks(): |
|
404 depth[b] = findDepth(b.getInstructions()) |
|
405 |
|
406 seen = {} |
|
407 |
|
408 def max_depth(b, d): |
|
409 if seen.has_key(b): |
|
410 return d |
|
411 seen[b] = 1 |
|
412 d = d + depth[b] |
|
413 children = b.get_children() |
|
414 if children: |
|
415 return max([max_depth(c, d) for c in children]) |
|
416 else: |
|
417 if not b.label == "exit": |
|
418 return max_depth(self.exit, d) |
|
419 else: |
|
420 return d |
|
421 |
|
422 self.stacksize = max_depth(self.entry, 0) |
|
423 |
|
424 def flattenGraph(self): |
|
425 """Arrange the blocks in order and resolve jumps""" |
|
426 assert self.stage == RAW |
|
427 self.insts = insts = [] |
|
428 pc = 0 |
|
429 begin = {} |
|
430 end = {} |
|
431 for b in self.getBlocksInOrder(): |
|
432 begin[b] = pc |
|
433 for inst in b.getInstructions(): |
|
434 insts.append(inst) |
|
435 if len(inst) == 1: |
|
436 pc = pc + 1 |
|
437 elif inst[0] != "SET_LINENO": |
|
438 # arg takes 2 bytes |
|
439 pc = pc + 3 |
|
440 end[b] = pc |
|
441 pc = 0 |
|
442 for i in range(len(insts)): |
|
443 inst = insts[i] |
|
444 if len(inst) == 1: |
|
445 pc = pc + 1 |
|
446 elif inst[0] != "SET_LINENO": |
|
447 pc = pc + 3 |
|
448 opname = inst[0] |
|
449 if self.hasjrel.has_elt(opname): |
|
450 oparg = inst[1] |
|
451 offset = begin[oparg] - pc |
|
452 insts[i] = opname, offset |
|
453 elif self.hasjabs.has_elt(opname): |
|
454 insts[i] = opname, begin[inst[1]] |
|
455 self.stage = FLAT |
|
456 |
|
457 hasjrel = misc.Set() |
|
458 for i in dis.hasjrel: |
|
459 hasjrel.add(dis.opname[i]) |
|
460 hasjabs = misc.Set() |
|
461 for i in dis.hasjabs: |
|
462 hasjabs.add(dis.opname[i]) |
|
463 |
|
464 def convertArgs(self): |
|
465 """Convert arguments from symbolic to concrete form""" |
|
466 assert self.stage == FLAT |
|
467 self.consts.insert(0, self.docstring) |
|
468 self.sort_cellvars() |
|
469 for i in range(len(self.insts)): |
|
470 t = self.insts[i] |
|
471 if len(t) == 2: |
|
472 opname, oparg = t |
|
473 conv = self._converters.get(opname, None) |
|
474 if conv: |
|
475 self.insts[i] = opname, conv(self, oparg) |
|
476 self.stage = CONV |
|
477 |
|
478 def sort_cellvars(self): |
|
479 """Sort cellvars in the order of varnames and prune from freevars. |
|
480 """ |
|
481 cells = {} |
|
482 for name in self.cellvars: |
|
483 cells[name] = 1 |
|
484 self.cellvars = [name for name in self.varnames |
|
485 if cells.has_key(name)] |
|
486 for name in self.cellvars: |
|
487 del cells[name] |
|
488 self.cellvars = self.cellvars + cells.keys() |
|
489 self.closure = self.cellvars + self.freevars |
|
490 |
|
491 def _lookupName(self, name, list): |
|
492 """Return index of name in list, appending if necessary |
|
493 |
|
494 This routine uses a list instead of a dictionary, because a |
|
495 dictionary can't store two different keys if the keys have the |
|
496 same value but different types, e.g. 2 and 2L. The compiler |
|
497 must treat these two separately, so it does an explicit type |
|
498 comparison before comparing the values. |
|
499 """ |
|
500 t = type(name) |
|
501 for i in range(len(list)): |
|
502 if t == type(list[i]) and list[i] == name: |
|
503 return i |
|
504 end = len(list) |
|
505 list.append(name) |
|
506 return end |
|
507 |
|
508 _converters = {} |
|
509 def _convert_LOAD_CONST(self, arg): |
|
510 if hasattr(arg, 'getCode'): |
|
511 arg = arg.getCode() |
|
512 return self._lookupName(arg, self.consts) |
|
513 |
|
514 def _convert_LOAD_FAST(self, arg): |
|
515 self._lookupName(arg, self.names) |
|
516 return self._lookupName(arg, self.varnames) |
|
517 _convert_STORE_FAST = _convert_LOAD_FAST |
|
518 _convert_DELETE_FAST = _convert_LOAD_FAST |
|
519 |
|
520 def _convert_LOAD_NAME(self, arg): |
|
521 if self.klass is None: |
|
522 self._lookupName(arg, self.varnames) |
|
523 return self._lookupName(arg, self.names) |
|
524 |
|
525 def _convert_NAME(self, arg): |
|
526 if self.klass is None: |
|
527 self._lookupName(arg, self.varnames) |
|
528 return self._lookupName(arg, self.names) |
|
529 _convert_STORE_NAME = _convert_NAME |
|
530 _convert_DELETE_NAME = _convert_NAME |
|
531 _convert_IMPORT_NAME = _convert_NAME |
|
532 _convert_IMPORT_FROM = _convert_NAME |
|
533 _convert_STORE_ATTR = _convert_NAME |
|
534 _convert_LOAD_ATTR = _convert_NAME |
|
535 _convert_DELETE_ATTR = _convert_NAME |
|
536 _convert_LOAD_GLOBAL = _convert_NAME |
|
537 _convert_STORE_GLOBAL = _convert_NAME |
|
538 _convert_DELETE_GLOBAL = _convert_NAME |
|
539 |
|
540 def _convert_DEREF(self, arg): |
|
541 self._lookupName(arg, self.names) |
|
542 self._lookupName(arg, self.varnames) |
|
543 return self._lookupName(arg, self.closure) |
|
544 _convert_LOAD_DEREF = _convert_DEREF |
|
545 _convert_STORE_DEREF = _convert_DEREF |
|
546 |
|
547 def _convert_LOAD_CLOSURE(self, arg): |
|
548 self._lookupName(arg, self.varnames) |
|
549 return self._lookupName(arg, self.closure) |
|
550 |
|
551 _cmp = list(dis.cmp_op) |
|
552 def _convert_COMPARE_OP(self, arg): |
|
553 return self._cmp.index(arg) |
|
554 |
|
555 # similarly for other opcodes... |
|
556 |
|
557 for name, obj in locals().items(): |
|
558 if name[:9] == "_convert_": |
|
559 opname = name[9:] |
|
560 _converters[opname] = obj |
|
561 del name, obj, opname |
|
562 |
|
563 def makeByteCode(self): |
|
564 assert self.stage == CONV |
|
565 self.lnotab = lnotab = LineAddrTable() |
|
566 for t in self.insts: |
|
567 opname = t[0] |
|
568 if len(t) == 1: |
|
569 lnotab.addCode(self.opnum[opname]) |
|
570 else: |
|
571 oparg = t[1] |
|
572 if opname == "SET_LINENO": |
|
573 lnotab.nextLine(oparg) |
|
574 continue |
|
575 hi, lo = twobyte(oparg) |
|
576 try: |
|
577 lnotab.addCode(self.opnum[opname], lo, hi) |
|
578 except ValueError: |
|
579 print opname, oparg |
|
580 print self.opnum[opname], lo, hi |
|
581 raise |
|
582 self.stage = DONE |
|
583 |
|
584 opnum = {} |
|
585 for num in range(len(dis.opname)): |
|
586 opnum[dis.opname[num]] = num |
|
587 del num |
|
588 |
|
589 def newCodeObject(self): |
|
590 assert self.stage == DONE |
|
591 if (self.flags & CO_NEWLOCALS) == 0: |
|
592 nlocals = 0 |
|
593 else: |
|
594 nlocals = len(self.varnames) |
|
595 argcount = self.argcount |
|
596 if self.flags & CO_VARKEYWORDS: |
|
597 argcount = argcount - 1 |
|
598 return types.CodeType(argcount, nlocals, self.stacksize, self.flags, |
|
599 self.lnotab.getCode(), self.getConsts(), |
|
600 tuple(self.names), tuple(self.varnames), |
|
601 self.filename, self.name, self.lnotab.firstline, |
|
602 self.lnotab.getTable(), tuple(self.freevars), |
|
603 tuple(self.cellvars)) |
|
604 |
|
605 def getConsts(self): |
|
606 """Return a tuple for the const slot of the code object |
|
607 |
|
608 Must convert references to code (MAKE_FUNCTION) to code |
|
609 objects recursively. |
|
610 """ |
|
611 l = [] |
|
612 for elt in self.consts: |
|
613 if isinstance(elt, PyFlowGraph): |
|
614 elt = elt.getCode() |
|
615 l.append(elt) |
|
616 return tuple(l) |
|
617 |
|
618 def isJump(opname): |
|
619 if opname[:4] == 'JUMP': |
|
620 return 1 |
|
621 |
|
622 class TupleArg: |
|
623 """Helper for marking func defs with nested tuples in arglist""" |
|
624 def __init__(self, count, names): |
|
625 self.count = count |
|
626 self.names = names |
|
627 def __repr__(self): |
|
628 return "TupleArg(%s, %s)" % (self.count, self.names) |
|
629 def getName(self): |
|
630 return ".%d" % self.count |
|
631 |
|
632 def getArgCount(args): |
|
633 argcount = len(args) |
|
634 if args: |
|
635 for arg in args: |
|
636 if isinstance(arg, TupleArg): |
|
637 numNames = len(misc.flatten(arg.names)) |
|
638 argcount = argcount - numNames |
|
639 return argcount |
|
640 |
|
641 def twobyte(val): |
|
642 """Convert an int argument into high and low bytes""" |
|
643 assert isinstance(val, int) |
|
644 return divmod(val, 256) |
|
645 |
|
646 class LineAddrTable: |
|
647 """lnotab |
|
648 |
|
649 This class builds the lnotab, which is documented in compile.c. |
|
650 Here's a brief recap: |
|
651 |
|
652 For each SET_LINENO instruction after the first one, two bytes are |
|
653 added to lnotab. (In some cases, multiple two-byte entries are |
|
654 added.) The first byte is the distance in bytes between the |
|
655 instruction for the last SET_LINENO and the current SET_LINENO. |
|
656 The second byte is offset in line numbers. If either offset is |
|
657 greater than 255, multiple two-byte entries are added -- see |
|
658 compile.c for the delicate details. |
|
659 """ |
|
660 |
|
661 def __init__(self): |
|
662 self.code = [] |
|
663 self.codeOffset = 0 |
|
664 self.firstline = 0 |
|
665 self.lastline = 0 |
|
666 self.lastoff = 0 |
|
667 self.lnotab = [] |
|
668 |
|
669 def addCode(self, *args): |
|
670 for arg in args: |
|
671 self.code.append(chr(arg)) |
|
672 self.codeOffset = self.codeOffset + len(args) |
|
673 |
|
674 def nextLine(self, lineno): |
|
675 if self.firstline == 0: |
|
676 self.firstline = lineno |
|
677 self.lastline = lineno |
|
678 else: |
|
679 # compute deltas |
|
680 addr = self.codeOffset - self.lastoff |
|
681 line = lineno - self.lastline |
|
682 # Python assumes that lineno always increases with |
|
683 # increasing bytecode address (lnotab is unsigned char). |
|
684 # Depending on when SET_LINENO instructions are emitted |
|
685 # this is not always true. Consider the code: |
|
686 # a = (1, |
|
687 # b) |
|
688 # In the bytecode stream, the assignment to "a" occurs |
|
689 # after the loading of "b". This works with the C Python |
|
690 # compiler because it only generates a SET_LINENO instruction |
|
691 # for the assignment. |
|
692 if line >= 0: |
|
693 push = self.lnotab.append |
|
694 while addr > 255: |
|
695 push(255); push(0) |
|
696 addr -= 255 |
|
697 while line > 255: |
|
698 push(addr); push(255) |
|
699 line -= 255 |
|
700 addr = 0 |
|
701 if addr > 0 or line > 0: |
|
702 push(addr); push(line) |
|
703 self.lastline = lineno |
|
704 self.lastoff = self.codeOffset |
|
705 |
|
706 def getCode(self): |
|
707 return ''.join(self.code) |
|
708 |
|
709 def getTable(self): |
|
710 return ''.join(map(chr, self.lnotab)) |
|
711 |
|
712 class StackDepthTracker: |
|
713 # XXX 1. need to keep track of stack depth on jumps |
|
714 # XXX 2. at least partly as a result, this code is broken |
|
715 |
|
716 def findDepth(self, insts, debug=0): |
|
717 depth = 0 |
|
718 maxDepth = 0 |
|
719 for i in insts: |
|
720 opname = i[0] |
|
721 if debug: |
|
722 print i, |
|
723 delta = self.effect.get(opname, None) |
|
724 if delta is not None: |
|
725 depth = depth + delta |
|
726 else: |
|
727 # now check patterns |
|
728 for pat, pat_delta in self.patterns: |
|
729 if opname[:len(pat)] == pat: |
|
730 delta = pat_delta |
|
731 depth = depth + delta |
|
732 break |
|
733 # if we still haven't found a match |
|
734 if delta is None: |
|
735 meth = getattr(self, opname, None) |
|
736 if meth is not None: |
|
737 depth = depth + meth(i[1]) |
|
738 if depth > maxDepth: |
|
739 maxDepth = depth |
|
740 if debug: |
|
741 print depth, maxDepth |
|
742 return maxDepth |
|
743 |
|
744 effect = { |
|
745 'POP_TOP': -1, |
|
746 'DUP_TOP': 1, |
|
747 'LIST_APPEND': -2, |
|
748 'SLICE+1': -1, |
|
749 'SLICE+2': -1, |
|
750 'SLICE+3': -2, |
|
751 'STORE_SLICE+0': -1, |
|
752 'STORE_SLICE+1': -2, |
|
753 'STORE_SLICE+2': -2, |
|
754 'STORE_SLICE+3': -3, |
|
755 'DELETE_SLICE+0': -1, |
|
756 'DELETE_SLICE+1': -2, |
|
757 'DELETE_SLICE+2': -2, |
|
758 'DELETE_SLICE+3': -3, |
|
759 'STORE_SUBSCR': -3, |
|
760 'DELETE_SUBSCR': -2, |
|
761 # PRINT_EXPR? |
|
762 'PRINT_ITEM': -1, |
|
763 'RETURN_VALUE': -1, |
|
764 'YIELD_VALUE': -1, |
|
765 'EXEC_STMT': -3, |
|
766 'BUILD_CLASS': -2, |
|
767 'STORE_NAME': -1, |
|
768 'STORE_ATTR': -2, |
|
769 'DELETE_ATTR': -1, |
|
770 'STORE_GLOBAL': -1, |
|
771 'BUILD_MAP': 1, |
|
772 'COMPARE_OP': -1, |
|
773 'STORE_FAST': -1, |
|
774 'IMPORT_STAR': -1, |
|
775 'IMPORT_NAME': -1, |
|
776 'IMPORT_FROM': 1, |
|
777 'LOAD_ATTR': 0, # unlike other loads |
|
778 # close enough... |
|
779 'SETUP_EXCEPT': 3, |
|
780 'SETUP_FINALLY': 3, |
|
781 'FOR_ITER': 1, |
|
782 'WITH_CLEANUP': -1, |
|
783 } |
|
784 # use pattern match |
|
785 patterns = [ |
|
786 ('BINARY_', -1), |
|
787 ('LOAD_', 1), |
|
788 ] |
|
789 |
|
790 def UNPACK_SEQUENCE(self, count): |
|
791 return count-1 |
|
792 def BUILD_TUPLE(self, count): |
|
793 return -count+1 |
|
794 def BUILD_LIST(self, count): |
|
795 return -count+1 |
|
796 def CALL_FUNCTION(self, argc): |
|
797 hi, lo = divmod(argc, 256) |
|
798 return -(lo + hi * 2) |
|
799 def CALL_FUNCTION_VAR(self, argc): |
|
800 return self.CALL_FUNCTION(argc)-1 |
|
801 def CALL_FUNCTION_KW(self, argc): |
|
802 return self.CALL_FUNCTION(argc)-1 |
|
803 def CALL_FUNCTION_VAR_KW(self, argc): |
|
804 return self.CALL_FUNCTION(argc)-2 |
|
805 def MAKE_FUNCTION(self, argc): |
|
806 return -argc |
|
807 def MAKE_CLOSURE(self, argc): |
|
808 # XXX need to account for free variables too! |
|
809 return -argc |
|
810 def BUILD_SLICE(self, argc): |
|
811 if argc == 2: |
|
812 return -1 |
|
813 elif argc == 3: |
|
814 return -2 |
|
815 def DUP_TOPX(self, argc): |
|
816 return argc |
|
817 |
|
818 findDepth = StackDepthTracker().findDepth |