|
1 #! /usr/bin/env python |
|
2 |
|
3 """ |
|
4 combinerefs path |
|
5 |
|
6 A helper for analyzing PYTHONDUMPREFS output. |
|
7 |
|
8 When the PYTHONDUMPREFS envar is set in a debug build, at Python shutdown |
|
9 time Py_Finalize() prints the list of all live objects twice: first it |
|
10 prints the repr() of each object while the interpreter is still fully intact. |
|
11 After cleaning up everything it can, it prints all remaining live objects |
|
12 again, but the second time just prints their addresses, refcounts, and type |
|
13 names (because the interpreter has been torn down, calling repr methods at |
|
14 this point can get into infinite loops or blow up). |
|
15 |
|
16 Save all this output into a file, then run this script passing the path to |
|
17 that file. The script finds both output chunks, combines them, then prints |
|
18 a line of output for each object still alive at the end: |
|
19 |
|
20 address refcnt typename repr |
|
21 |
|
22 address is the address of the object, in whatever format the platform C |
|
23 produces for a %p format code. |
|
24 |
|
25 refcnt is of the form |
|
26 |
|
27 "[" ref "]" |
|
28 |
|
29 when the object's refcount is the same in both PYTHONDUMPREFS output blocks, |
|
30 or |
|
31 |
|
32 "[" ref_before "->" ref_after "]" |
|
33 |
|
34 if the refcount changed. |
|
35 |
|
36 typename is object->ob_type->tp_name, extracted from the second PYTHONDUMPREFS |
|
37 output block. |
|
38 |
|
39 repr is repr(object), extracted from the first PYTHONDUMPREFS output block. |
|
40 CAUTION: If object is a container type, it may not actually contain all the |
|
41 objects shown in the repr: the repr was captured from the first output block, |
|
42 and some of the containees may have been released since then. For example, |
|
43 it's common for the line showing the dict of interned strings to display |
|
44 strings that no longer exist at the end of Py_Finalize; this can be recognized |
|
45 (albeit painfully) because such containees don't have a line of their own. |
|
46 |
|
47 The objects are listed in allocation order, with most-recently allocated |
|
48 printed first, and the first object allocated printed last. |
|
49 |
|
50 |
|
51 Simple examples: |
|
52 |
|
53 00857060 [14] str '__len__' |
|
54 |
|
55 The str object '__len__' is alive at shutdown time, and both PYTHONDUMPREFS |
|
56 output blocks said there were 14 references to it. This is probably due to |
|
57 C modules that intern the string "__len__" and keep a reference to it in a |
|
58 file static. |
|
59 |
|
60 00857038 [46->5] tuple () |
|
61 |
|
62 46-5 = 41 references to the empty tuple were removed by the cleanup actions |
|
63 between the times PYTHONDUMPREFS produced output. |
|
64 |
|
65 00858028 [1025->1456] str '<dummy key>' |
|
66 |
|
67 The string '<dummy key>', which is used in dictobject.c to overwrite a real |
|
68 key that gets deleted, grew several hundred references during cleanup. It |
|
69 suggests that stuff did get removed from dicts by cleanup, but that the dicts |
|
70 themselves are staying alive for some reason. """ |
|
71 |
|
72 import re |
|
73 import sys |
|
74 |
|
75 # Generate lines from fileiter. If whilematch is true, continue reading |
|
76 # while the regexp object pat matches line. If whilematch is false, lines |
|
77 # are read so long as pat doesn't match them. In any case, the first line |
|
78 # that doesn't match pat (when whilematch is true), or that does match pat |
|
79 # (when whilematch is false), is lost, and fileiter will resume at the line |
|
80 # following it. |
|
81 def read(fileiter, pat, whilematch): |
|
82 for line in fileiter: |
|
83 if bool(pat.match(line)) == whilematch: |
|
84 yield line |
|
85 else: |
|
86 break |
|
87 |
|
88 def combine(fname): |
|
89 f = file(fname) |
|
90 fi = iter(f) |
|
91 |
|
92 for line in read(fi, re.compile(r'^Remaining objects:$'), False): |
|
93 pass |
|
94 |
|
95 crack = re.compile(r'([a-zA-Z\d]+) \[(\d+)\] (.*)') |
|
96 addr2rc = {} |
|
97 addr2guts = {} |
|
98 before = 0 |
|
99 for line in read(fi, re.compile(r'^Remaining object addresses:$'), False): |
|
100 m = crack.match(line) |
|
101 if m: |
|
102 addr, addr2rc[addr], addr2guts[addr] = m.groups() |
|
103 before += 1 |
|
104 else: |
|
105 print '??? skipped:', line |
|
106 |
|
107 after = 0 |
|
108 for line in read(fi, crack, True): |
|
109 after += 1 |
|
110 m = crack.match(line) |
|
111 assert m |
|
112 addr, rc, guts = m.groups() # guts is type name here |
|
113 if addr not in addr2rc: |
|
114 print '??? new object created while tearing down:', line.rstrip() |
|
115 continue |
|
116 print addr, |
|
117 if rc == addr2rc[addr]: |
|
118 print '[%s]' % rc, |
|
119 else: |
|
120 print '[%s->%s]' % (addr2rc[addr], rc), |
|
121 print guts, addr2guts[addr] |
|
122 |
|
123 f.close() |
|
124 print "%d objects before, %d after" % (before, after) |
|
125 |
|
126 if __name__ == '__main__': |
|
127 combine(sys.argv[1]) |