|
1 #! /usr/bin/env python |
|
2 # Find external dependencies of binary libraries. |
|
3 # Copyright (C) 2005, Giovanni Bajo |
|
4 # Based on previous work under copyright (c) 2002 McMillan Enterprises, Inc. |
|
5 # |
|
6 # This program is free software; you can redistribute it and/or |
|
7 # modify it under the terms of the GNU General Public License |
|
8 # as published by the Free Software Foundation; either version 2 |
|
9 # of the License, or (at your option) any later version. |
|
10 # |
|
11 # This program is distributed in the hope that it will be useful, |
|
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
14 # GNU General Public License for more details. |
|
15 # |
|
16 # You should have received a copy of the GNU General Public License |
|
17 # along with this program; if not, write to the Free Software |
|
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA |
|
19 |
|
20 # use dumpbin.exe (if present) to find the binary |
|
21 # dependencies of an extension module. |
|
22 # if dumpbin not available, pick apart the PE hdr of the binary |
|
23 # while this appears to work well, it is complex and subject to |
|
24 # problems with changes to PE hdrs (ie, this works only on 32 bit Intel |
|
25 # Windows format binaries) |
|
26 # |
|
27 # Note also that you should check the results to make sure that the |
|
28 # dlls are redistributable. I've listed most of the common MS dlls |
|
29 # under "excludes" below; add to this list as necessary (or use the |
|
30 # "excludes" option in the INSTALL section of the config file). |
|
31 |
|
32 import os |
|
33 import time |
|
34 import string |
|
35 import sys |
|
36 import re |
|
37 |
|
38 seen = {} |
|
39 _bpath = None |
|
40 iswin = sys.platform[:3] == 'win' |
|
41 cygwin = sys.platform == 'cygwin' |
|
42 excludes = {'KERNEL32.DLL':1, |
|
43 'ADVAPI.DLL':1, |
|
44 'MSVCRT.DLL':1, |
|
45 'ADVAPI32.DLL':1, |
|
46 'COMCTL32.DLL':1, |
|
47 'CRTDLL.DLL':1, |
|
48 'GDI32.DLL':1, |
|
49 'MFC42.DLL':1, |
|
50 'NTDLL.DLL':1, |
|
51 'OLE32.DLL':1, |
|
52 'OLEAUT32.DLL':1, |
|
53 'RPCRT4.DLL':1, |
|
54 'SHELL32.DLL':1, |
|
55 'USER32.DLL':1, |
|
56 'WINSPOOL.DRV':1, |
|
57 'WS2HELP.DLL':1, |
|
58 'WS2_32.DLL':1, |
|
59 'WSOCK32.DLL':1, |
|
60 'MSWSOCK.DLL':1, |
|
61 'WINMM.DLL':1, |
|
62 'COMDLG32.DLL':1, |
|
63 ## 'ZLIB.DLL':1, # test with python 1.5.2 |
|
64 'ODBC32.DLL':1, |
|
65 'VERSION.DLL':1, |
|
66 'IMM32.DLL':1, |
|
67 'DDRAW.DLL':1, |
|
68 'DCIMAN32.DLL':1, |
|
69 'OPENGL32.DLL':1, |
|
70 'GLU32.DLL':1, |
|
71 'GLUB32.DLL':1, |
|
72 '/usr/lib':1, |
|
73 '/lib':1,} |
|
74 |
|
75 def getfullnameof(mod, xtrapath = None): |
|
76 """Return the full path name of MOD. |
|
77 |
|
78 MOD is the basename of a dll or pyd. |
|
79 XTRAPATH is a path or list of paths to search first. |
|
80 Return the full path name of MOD. |
|
81 Will search the full Windows search path, as well as sys.path""" |
|
82 epath = getWindowsPath() + sys.path |
|
83 if xtrapath is not None: |
|
84 if type(xtrapath) == type(''): |
|
85 epath.insert(0, xtrapath) |
|
86 else: |
|
87 epath = xtrapath + epath |
|
88 for p in epath: |
|
89 npth = os.path.join(p, mod) |
|
90 if os.path.exists(npth): |
|
91 return npth |
|
92 return '' |
|
93 |
|
94 def getImports1(pth): |
|
95 """Find the binary dependencies of PTH. |
|
96 |
|
97 This implementation (not used right now) uses the MSVC utility dumpbin""" |
|
98 import tempfile |
|
99 rslt = [] |
|
100 tmpf = tempfile.mktemp() |
|
101 os.system('dumpbin /IMPORTS "%s" >%s' %(pth, tmpf)) |
|
102 time.sleep(0.1) |
|
103 txt = open(tmpf,'r').readlines() |
|
104 os.remove(tmpf) |
|
105 i = 0 |
|
106 while i < len(txt): |
|
107 tokens = string.split(txt[i]) |
|
108 if len(tokens) == 1 and string.find(tokens[0], '.') > 0: |
|
109 rslt.append(string.strip(tokens[0])) |
|
110 i = i + 1 |
|
111 return rslt |
|
112 |
|
113 def getImports2x(pth): |
|
114 """Find the binary dependencies of PTH. |
|
115 |
|
116 This implementation walks through the PE header""" |
|
117 import struct |
|
118 rslt = [] |
|
119 try: |
|
120 f = open(pth, 'rb').read() |
|
121 pehdrd = struct.unpack('l', f[60:64])[0] #after the MSDOS loader is the offset of the peheader |
|
122 magic = struct.unpack('l', f[pehdrd:pehdrd+4])[0] # pehdr starts with magic 'PE\000\000' (or 17744) |
|
123 # then 20 bytes of COFF header |
|
124 numsecs = struct.unpack('h', f[pehdrd+6:pehdrd+8])[0] # whence we get number of sections |
|
125 opthdrmagic = struct.unpack('h', f[pehdrd+24:pehdrd+26])[0] |
|
126 if opthdrmagic == 0x10b: # PE32 format |
|
127 numdictoffset = 116 |
|
128 importoffset = 128 |
|
129 elif opthdrmagic == 0x20b: # PE32+ format |
|
130 numdictoffset = 132 |
|
131 importoffset = 148 |
|
132 else: |
|
133 print "E: bindepend cannot analyze %s - unknown header format! %x" % (pth, opthdrmagic) |
|
134 return rslt |
|
135 numdirs = struct.unpack('l', f[pehdrd+numdictoffset:pehdrd+numdictoffset+4])[0] |
|
136 idata = '' |
|
137 if magic == 17744: |
|
138 importsec, sz = struct.unpack('2l', f[pehdrd+importoffset:pehdrd+importoffset+8]) |
|
139 if sz == 0: |
|
140 return rslt |
|
141 secttbl = pehdrd + numdictoffset + 4 + 8*numdirs |
|
142 secttblfmt = '8s7l2h' |
|
143 seclist = [] |
|
144 for i in range(numsecs): |
|
145 seclist.append(struct.unpack(secttblfmt, f[secttbl+i*40:secttbl+(i+1)*40])) |
|
146 #nm, vsz, va, rsz, praw, preloc, plnnums, qrelocs, qlnnums, flags \ |
|
147 # = seclist[-1] |
|
148 for i in range(len(seclist)-1): |
|
149 if seclist[i][2] <= importsec < seclist[i+1][2]: |
|
150 break |
|
151 vbase = seclist[i][2] |
|
152 raw = seclist[i][4] |
|
153 idatastart = raw + importsec - vbase |
|
154 idata = f[idatastart:idatastart+seclist[i][1]] |
|
155 i = 0 |
|
156 while 1: |
|
157 chunk = idata[i*20:(i+1)*20] |
|
158 if len(chunk) != 20: |
|
159 print "E: premature end of import table (chunk is %d, not 20)" % len(chunk) |
|
160 break |
|
161 vsa = struct.unpack('5l', chunk)[3] |
|
162 if vsa == 0: |
|
163 break |
|
164 sa = raw + vsa - vbase |
|
165 end = string.find(f, '\000', sa) |
|
166 nm = f[sa:end] |
|
167 if nm: |
|
168 rslt.append(nm) |
|
169 i = i + 1 |
|
170 else: |
|
171 print "E: bindepend cannot analyze %s - file is not in PE format!" % pth |
|
172 except IOError: |
|
173 print "E: bindepend cannot analyze %s - file not found!" % pth |
|
174 #except struct.error: |
|
175 # print "E: bindepend cannot analyze %s - error walking thru pehdr" % pth |
|
176 return rslt |
|
177 |
|
178 def getImports2(path): |
|
179 """Find the binary dependencies of PTH. |
|
180 |
|
181 This implementation walks through the PE header""" |
|
182 import struct |
|
183 f = open(path, 'rb') |
|
184 # skip the MSDOS loader |
|
185 f.seek(60) |
|
186 # get offset to PE header |
|
187 offset = struct.unpack('l', f.read(4))[0] |
|
188 f.seek(offset) |
|
189 signature = struct.unpack('l', f.read(4))[0] |
|
190 coffhdrfmt = 'hhlllhh' |
|
191 rawcoffhdr = f.read(struct.calcsize(coffhdrfmt)) |
|
192 coffhdr = struct.unpack(coffhdrfmt, rawcoffhdr) |
|
193 coffhdr_numsections = coffhdr[1] |
|
194 |
|
195 opthdrfmt = 'hbblllllllllhhhhhhllllhhllllll' |
|
196 rawopthdr = f.read(struct.calcsize(opthdrfmt)) |
|
197 opthdr = struct.unpack(opthdrfmt, rawopthdr) |
|
198 opthdr_numrvas = opthdr[-1] |
|
199 |
|
200 datadirs = [] |
|
201 datadirsize = struct.calcsize('ll') # virtual address, size |
|
202 for i in range(opthdr_numrvas): |
|
203 rawdatadir = f.read(datadirsize) |
|
204 datadirs.append(struct.unpack('ll', rawdatadir)) |
|
205 |
|
206 sectionfmt = '8s6l2hl' |
|
207 sectionsize = struct.calcsize(sectionfmt) |
|
208 sections = [] |
|
209 for i in range(coffhdr_numsections): |
|
210 rawsection = f.read(sectionsize) |
|
211 sections.append(struct.unpack(sectionfmt, rawsection)) |
|
212 |
|
213 importva, importsz = datadirs[1] |
|
214 if importsz == 0: |
|
215 return [] |
|
216 # figure out what section it's in |
|
217 NAME, MISC, VIRTADDRESS, RAWSIZE, POINTERTORAW = range(5) |
|
218 for j in range(len(sections)-1): |
|
219 if sections[j][VIRTADDRESS] <= importva < sections[j+1][VIRTADDRESS]: |
|
220 importsection = sections[j] |
|
221 break |
|
222 else: |
|
223 if importva >= sections[-1][VIRTADDRESS]: |
|
224 importsection = sections[-1] |
|
225 else: |
|
226 print "E: import section is unavailable" |
|
227 return [] |
|
228 f.seek(importsection[POINTERTORAW] + importva - importsection[VIRTADDRESS]) |
|
229 data = f.read(importsz) |
|
230 iidescrfmt = 'lllll' |
|
231 CHARACTERISTICS, DATETIME, FWDRCHAIN, NAMERVA, FIRSTTHUNK = range(5) |
|
232 iidescrsz = struct.calcsize(iidescrfmt) |
|
233 dlls = [] |
|
234 while data: |
|
235 iid = struct.unpack(iidescrfmt, data[:iidescrsz]) |
|
236 if iid[NAMERVA] == 0: |
|
237 break |
|
238 f.seek(importsection[POINTERTORAW] + iid[NAMERVA] - importsection[VIRTADDRESS]) |
|
239 nm = f.read(256) |
|
240 nm, jnk = string.split(nm, '\0', 1) |
|
241 if nm: |
|
242 dlls.append(nm) |
|
243 data = data[iidescrsz:] |
|
244 return dlls |
|
245 |
|
246 def Dependencies(lTOC): |
|
247 """Expand LTOC to include all the closure of binary dependencies. |
|
248 |
|
249 LTOC is a logical table of contents, ie, a seq of tuples (name, path). |
|
250 Return LTOC expanded by all the binary dependencies of the entries |
|
251 in LTOC, except those listed in the module global EXCLUDES""" |
|
252 for nm, pth, typ in lTOC: |
|
253 fullnm = string.upper(os.path.basename(pth)) |
|
254 if seen.get(string.upper(nm),0): |
|
255 continue |
|
256 #print "I: analyzing", pth |
|
257 seen[string.upper(nm)] = 1 |
|
258 dlls = getImports(pth) |
|
259 for lib in dlls: |
|
260 #print "I: found", lib |
|
261 if not iswin and not cygwin: |
|
262 npth = lib |
|
263 dir, lib = os.path.split(lib) |
|
264 if excludes.get(dir,0): |
|
265 continue |
|
266 if excludes.get(string.upper(lib),0): |
|
267 continue |
|
268 if seen.get(string.upper(lib),0): |
|
269 continue |
|
270 if iswin or cygwin: |
|
271 npth = getfullnameof(lib, os.path.dirname(pth)) |
|
272 if npth: |
|
273 lTOC.append((lib, npth, 'BINARY')) |
|
274 else: |
|
275 print "E: lib not found:", lib, "dependency of", pth |
|
276 return lTOC |
|
277 |
|
278 def getImports3(pth): |
|
279 """Find the binary dependencies of PTH. |
|
280 |
|
281 This implementation is for ldd platforms""" |
|
282 rslt = [] |
|
283 for line in os.popen('ldd "%s"' % pth).readlines(): |
|
284 m = re.search(r"\s+(.*?)\s+=>\s+(.*?)\s+\(.*\)", line) |
|
285 if m: |
|
286 name, lib = m.group(1), m.group(2) |
|
287 if name[:10] == 'linux-gate': |
|
288 # linux-gate is a fake library which does not exist and |
|
289 # should be ignored. See also: |
|
290 # http://www.trilithium.com/johan/2005/08/linux-gate/ |
|
291 continue |
|
292 if os.path.exists(lib): |
|
293 rslt.append(lib) |
|
294 else: |
|
295 print 'E: cannot find %s in path %s (needed by %s)' % \ |
|
296 (name, lib, pth) |
|
297 return rslt |
|
298 |
|
299 def getImports(pth): |
|
300 """Forwards to either getImports2 or getImports3 |
|
301 """ |
|
302 if sys.platform[:3] == 'win' or sys.platform == 'cygwin': |
|
303 return getImports2(pth) |
|
304 return getImports3(pth) |
|
305 |
|
306 def getWindowsPath(): |
|
307 """Return the path that Windows will search for dlls.""" |
|
308 global _bpath |
|
309 if _bpath is None: |
|
310 _bpath = [] |
|
311 if iswin: |
|
312 try: |
|
313 import win32api |
|
314 except ImportError: |
|
315 print "W: Cannot determine your Windows or System directories" |
|
316 print "W: Please add them to your PATH if .dlls are not found" |
|
317 print "W: or install starship.python.net/skippy/win32/Downloads.html" |
|
318 else: |
|
319 sysdir = win32api.GetSystemDirectory() |
|
320 sysdir2 = os.path.normpath(os.path.join(sysdir, '..', 'SYSTEM')) |
|
321 windir = win32api.GetWindowsDirectory() |
|
322 _bpath = [sysdir, sysdir2, windir] |
|
323 _bpath.extend(string.split(os.environ.get('PATH', ''), os.pathsep)) |
|
324 return _bpath |
|
325 |
|
326 if __name__ == "__main__": |
|
327 if len(sys.argv) < 2: |
|
328 print "Usage: python %s BINARYFILE" % sys.argv[0] |
|
329 sys.exit(0) |
|
330 print getImports(sys.argv[1]) |