srcanamdw/codescanner/pyinstaller/bindepend.py
changeset 1 22878952f6e2
equal deleted inserted replaced
0:509e4801c378 1:22878952f6e2
       
     1 #! /usr/bin/env python
       
     2 # Find external dependencies of binary libraries.
       
     3 # Copyright (C) 2005, Giovanni Bajo
       
     4 # Based on previous work under copyright (c) 2002 McMillan Enterprises, Inc.
       
     5 #
       
     6 # This program is free software; you can redistribute it and/or
       
     7 # modify it under the terms of the GNU General Public License
       
     8 # as published by the Free Software Foundation; either version 2
       
     9 # of the License, or (at your option) any later version.
       
    10 #
       
    11 # This program is distributed in the hope that it will be useful,
       
    12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
       
    14 # GNU General Public License for more details.
       
    15 #
       
    16 # You should have received a copy of the GNU General Public License
       
    17 # along with this program; if not, write to the Free Software
       
    18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
       
    19 
       
    20 # use dumpbin.exe (if present) to find the binary
       
    21 # dependencies of an extension module.
       
    22 # if dumpbin not available, pick apart the PE hdr of the binary
       
    23 # while this appears to work well, it is complex and subject to
       
    24 # problems with changes to PE hdrs (ie, this works only on 32 bit Intel
       
    25 # Windows format binaries)
       
    26 #
       
    27 # Note also that you should check the results to make sure that the
       
    28 # dlls are redistributable. I've listed most of the common MS dlls
       
    29 # under "excludes" below; add to this list as necessary (or use the
       
    30 # "excludes" option in the INSTALL section of the config file).
       
    31 
       
    32 import os
       
    33 import time
       
    34 import string
       
    35 import sys
       
    36 import re
       
    37 
       
    38 seen = {}
       
    39 _bpath = None
       
    40 iswin = sys.platform[:3] == 'win'
       
    41 cygwin = sys.platform == 'cygwin'
       
    42 excludes = {'KERNEL32.DLL':1,
       
    43       'ADVAPI.DLL':1,
       
    44       'MSVCRT.DLL':1,
       
    45       'ADVAPI32.DLL':1,
       
    46       'COMCTL32.DLL':1,
       
    47       'CRTDLL.DLL':1,
       
    48       'GDI32.DLL':1,
       
    49       'MFC42.DLL':1,
       
    50       'NTDLL.DLL':1,
       
    51       'OLE32.DLL':1,
       
    52       'OLEAUT32.DLL':1,
       
    53       'RPCRT4.DLL':1,
       
    54       'SHELL32.DLL':1,
       
    55       'USER32.DLL':1,
       
    56       'WINSPOOL.DRV':1,
       
    57       'WS2HELP.DLL':1,
       
    58       'WS2_32.DLL':1,
       
    59       'WSOCK32.DLL':1,
       
    60       'MSWSOCK.DLL':1,
       
    61       'WINMM.DLL':1,
       
    62       'COMDLG32.DLL':1,
       
    63 ##      'ZLIB.DLL':1,   # test with python 1.5.2
       
    64       'ODBC32.DLL':1,
       
    65       'VERSION.DLL':1,
       
    66       'IMM32.DLL':1,
       
    67       'DDRAW.DLL':1,
       
    68       'DCIMAN32.DLL':1,
       
    69       'OPENGL32.DLL':1,
       
    70       'GLU32.DLL':1,
       
    71       'GLUB32.DLL':1,
       
    72       '/usr/lib':1,
       
    73       '/lib':1,}
       
    74 
       
    75 def getfullnameof(mod, xtrapath = None):
       
    76   """Return the full path name of MOD.
       
    77 
       
    78       MOD is the basename of a dll or pyd.
       
    79       XTRAPATH is a path or list of paths to search first.
       
    80       Return the full path name of MOD.
       
    81       Will search the full Windows search path, as well as sys.path"""
       
    82   epath = getWindowsPath() + sys.path
       
    83   if xtrapath is not None:
       
    84     if type(xtrapath) == type(''):
       
    85       epath.insert(0, xtrapath)
       
    86     else:
       
    87       epath = xtrapath + epath
       
    88   for p in epath:
       
    89     npth = os.path.join(p, mod)
       
    90     if os.path.exists(npth):
       
    91       return npth
       
    92   return ''
       
    93 
       
    94 def getImports1(pth):
       
    95     """Find the binary dependencies of PTH.
       
    96 
       
    97         This implementation (not used right now) uses the MSVC utility dumpbin"""
       
    98     import tempfile
       
    99     rslt = []
       
   100     tmpf = tempfile.mktemp()
       
   101     os.system('dumpbin /IMPORTS "%s" >%s' %(pth, tmpf))
       
   102     time.sleep(0.1)
       
   103     txt = open(tmpf,'r').readlines()
       
   104     os.remove(tmpf)
       
   105     i = 0
       
   106     while i < len(txt):
       
   107         tokens = string.split(txt[i])
       
   108         if len(tokens) == 1 and string.find(tokens[0], '.') > 0:
       
   109             rslt.append(string.strip(tokens[0]))
       
   110         i = i + 1
       
   111     return rslt
       
   112 
       
   113 def getImports2x(pth):
       
   114     """Find the binary dependencies of PTH.
       
   115 
       
   116         This implementation walks through the PE header"""
       
   117     import struct
       
   118     rslt = []
       
   119     try:
       
   120       f = open(pth, 'rb').read()
       
   121       pehdrd = struct.unpack('l', f[60:64])[0]  #after the MSDOS loader is the offset of the peheader
       
   122       magic = struct.unpack('l', f[pehdrd:pehdrd+4])[0] # pehdr starts with magic 'PE\000\000' (or 17744)
       
   123                                                         # then 20 bytes of COFF header
       
   124       numsecs = struct.unpack('h', f[pehdrd+6:pehdrd+8])[0] # whence we get number of sections
       
   125       opthdrmagic = struct.unpack('h', f[pehdrd+24:pehdrd+26])[0]
       
   126       if opthdrmagic == 0x10b: # PE32 format
       
   127           numdictoffset = 116
       
   128           importoffset = 128
       
   129       elif opthdrmagic == 0x20b: # PE32+ format
       
   130           numdictoffset = 132
       
   131           importoffset = 148
       
   132       else:
       
   133           print "E: bindepend cannot analyze %s - unknown header format! %x" % (pth, opthdrmagic)
       
   134           return rslt
       
   135       numdirs = struct.unpack('l', f[pehdrd+numdictoffset:pehdrd+numdictoffset+4])[0]
       
   136       idata = ''
       
   137       if magic == 17744:
       
   138           importsec, sz = struct.unpack('2l', f[pehdrd+importoffset:pehdrd+importoffset+8])
       
   139           if sz == 0:
       
   140               return rslt
       
   141           secttbl = pehdrd + numdictoffset + 4 + 8*numdirs
       
   142           secttblfmt = '8s7l2h'
       
   143           seclist = []
       
   144           for i in range(numsecs):
       
   145               seclist.append(struct.unpack(secttblfmt, f[secttbl+i*40:secttbl+(i+1)*40]))
       
   146               #nm, vsz, va, rsz, praw, preloc, plnnums, qrelocs, qlnnums, flags \
       
   147               # = seclist[-1]
       
   148           for i in range(len(seclist)-1):
       
   149               if seclist[i][2] <= importsec < seclist[i+1][2]:
       
   150                   break
       
   151           vbase = seclist[i][2]
       
   152           raw = seclist[i][4]
       
   153           idatastart = raw + importsec - vbase
       
   154           idata = f[idatastart:idatastart+seclist[i][1]]
       
   155           i = 0
       
   156           while 1:
       
   157               chunk = idata[i*20:(i+1)*20]
       
   158               if len(chunk) != 20:
       
   159                   print "E: premature end of import table (chunk is %d, not 20)" % len(chunk)
       
   160                   break
       
   161               vsa =  struct.unpack('5l', chunk)[3]
       
   162               if vsa == 0:
       
   163                   break
       
   164               sa = raw + vsa - vbase
       
   165               end = string.find(f, '\000', sa)
       
   166               nm = f[sa:end]
       
   167               if nm:
       
   168                   rslt.append(nm)
       
   169               i = i + 1
       
   170       else:
       
   171           print "E: bindepend cannot analyze %s - file is not in PE format!" % pth
       
   172     except IOError:
       
   173         print "E: bindepend cannot analyze %s - file not found!" % pth
       
   174     #except struct.error:
       
   175     #    print "E: bindepend cannot analyze %s - error walking thru pehdr" % pth
       
   176     return rslt
       
   177 
       
   178 def getImports2(path):
       
   179     """Find the binary dependencies of PTH.
       
   180 
       
   181         This implementation walks through the PE header"""
       
   182     import struct
       
   183     f = open(path, 'rb')
       
   184     # skip the MSDOS loader
       
   185     f.seek(60)
       
   186     # get offset to PE header
       
   187     offset = struct.unpack('l', f.read(4))[0]
       
   188     f.seek(offset)
       
   189     signature = struct.unpack('l', f.read(4))[0]
       
   190     coffhdrfmt = 'hhlllhh'
       
   191     rawcoffhdr = f.read(struct.calcsize(coffhdrfmt))
       
   192     coffhdr = struct.unpack(coffhdrfmt, rawcoffhdr)
       
   193     coffhdr_numsections = coffhdr[1]
       
   194 
       
   195     opthdrfmt = 'hbblllllllllhhhhhhllllhhllllll'
       
   196     rawopthdr = f.read(struct.calcsize(opthdrfmt))
       
   197     opthdr = struct.unpack(opthdrfmt, rawopthdr)
       
   198     opthdr_numrvas = opthdr[-1]
       
   199 
       
   200     datadirs = []
       
   201     datadirsize = struct.calcsize('ll') # virtual address, size
       
   202     for i in range(opthdr_numrvas):
       
   203         rawdatadir = f.read(datadirsize)
       
   204         datadirs.append(struct.unpack('ll', rawdatadir))
       
   205 
       
   206     sectionfmt = '8s6l2hl'
       
   207     sectionsize = struct.calcsize(sectionfmt)
       
   208     sections = []
       
   209     for i in range(coffhdr_numsections):
       
   210         rawsection = f.read(sectionsize)
       
   211         sections.append(struct.unpack(sectionfmt, rawsection))
       
   212 
       
   213     importva, importsz = datadirs[1]
       
   214     if importsz == 0:
       
   215         return []
       
   216     # figure out what section it's in
       
   217     NAME, MISC, VIRTADDRESS, RAWSIZE, POINTERTORAW = range(5)
       
   218     for j in range(len(sections)-1):
       
   219         if sections[j][VIRTADDRESS] <= importva < sections[j+1][VIRTADDRESS]:
       
   220             importsection = sections[j]
       
   221             break
       
   222     else:
       
   223         if importva >= sections[-1][VIRTADDRESS]:
       
   224             importsection = sections[-1]
       
   225         else:
       
   226             print "E: import section is unavailable"
       
   227             return []
       
   228     f.seek(importsection[POINTERTORAW] + importva - importsection[VIRTADDRESS])
       
   229     data = f.read(importsz)
       
   230     iidescrfmt = 'lllll'
       
   231     CHARACTERISTICS, DATETIME, FWDRCHAIN, NAMERVA, FIRSTTHUNK = range(5)
       
   232     iidescrsz = struct.calcsize(iidescrfmt)
       
   233     dlls = []
       
   234     while data:
       
   235         iid = struct.unpack(iidescrfmt, data[:iidescrsz])
       
   236         if iid[NAMERVA] == 0:
       
   237             break
       
   238         f.seek(importsection[POINTERTORAW] + iid[NAMERVA] - importsection[VIRTADDRESS])
       
   239         nm = f.read(256)
       
   240         nm, jnk = string.split(nm, '\0', 1)
       
   241         if nm:
       
   242             dlls.append(nm)
       
   243         data = data[iidescrsz:]
       
   244     return dlls
       
   245 
       
   246 def Dependencies(lTOC):
       
   247   """Expand LTOC to include all the closure of binary dependencies.
       
   248 
       
   249      LTOC is a logical table of contents, ie, a seq of tuples (name, path).
       
   250      Return LTOC expanded by all the binary dependencies of the entries
       
   251      in LTOC, except those listed in the module global EXCLUDES"""
       
   252   for nm, pth, typ in lTOC:
       
   253     fullnm = string.upper(os.path.basename(pth))
       
   254     if seen.get(string.upper(nm),0):
       
   255       continue
       
   256     #print "I: analyzing", pth
       
   257     seen[string.upper(nm)] = 1
       
   258     dlls = getImports(pth)
       
   259     for lib in dlls:
       
   260         #print "I: found", lib
       
   261         if not iswin and not cygwin:
       
   262             npth = lib
       
   263             dir, lib = os.path.split(lib)
       
   264             if excludes.get(dir,0):
       
   265                 continue
       
   266         if excludes.get(string.upper(lib),0):
       
   267             continue
       
   268         if seen.get(string.upper(lib),0):
       
   269             continue
       
   270         if iswin or cygwin:
       
   271             npth = getfullnameof(lib, os.path.dirname(pth))
       
   272         if npth:
       
   273             lTOC.append((lib, npth, 'BINARY'))
       
   274         else:
       
   275             print "E: lib not found:", lib, "dependency of", pth
       
   276   return lTOC
       
   277 
       
   278 def getImports3(pth):
       
   279     """Find the binary dependencies of PTH.
       
   280 
       
   281         This implementation is for ldd platforms"""
       
   282     rslt = []
       
   283     for line in os.popen('ldd "%s"' % pth).readlines():
       
   284         m = re.search(r"\s+(.*?)\s+=>\s+(.*?)\s+\(.*\)", line)
       
   285         if m:
       
   286             name, lib = m.group(1), m.group(2)
       
   287             if name[:10] == 'linux-gate':
       
   288                 # linux-gate is a fake library which does not exist and
       
   289                 # should be ignored. See also:
       
   290                 # http://www.trilithium.com/johan/2005/08/linux-gate/
       
   291                 continue
       
   292             if os.path.exists(lib):
       
   293                 rslt.append(lib)
       
   294             else:
       
   295                 print 'E: cannot find %s in path %s (needed by %s)' % \
       
   296                       (name, lib, pth)
       
   297     return rslt
       
   298 
       
   299 def getImports(pth):
       
   300     """Forwards to either getImports2 or getImports3
       
   301     """
       
   302     if sys.platform[:3] == 'win' or sys.platform == 'cygwin':
       
   303         return getImports2(pth)
       
   304     return getImports3(pth)
       
   305 
       
   306 def getWindowsPath():
       
   307     """Return the path that Windows will search for dlls."""
       
   308     global _bpath
       
   309     if _bpath is None:
       
   310         _bpath = []
       
   311         if iswin:
       
   312             try:
       
   313                 import win32api
       
   314             except ImportError:
       
   315                 print "W: Cannot determine your Windows or System directories"
       
   316                 print "W: Please add them to your PATH if .dlls are not found"
       
   317                 print "W: or install starship.python.net/skippy/win32/Downloads.html"
       
   318             else:
       
   319                 sysdir = win32api.GetSystemDirectory()
       
   320                 sysdir2 = os.path.normpath(os.path.join(sysdir, '..', 'SYSTEM'))
       
   321                 windir = win32api.GetWindowsDirectory()
       
   322                 _bpath = [sysdir, sysdir2, windir]
       
   323         _bpath.extend(string.split(os.environ.get('PATH', ''), os.pathsep))
       
   324     return _bpath
       
   325 
       
   326 if __name__ == "__main__":
       
   327   if len(sys.argv) < 2:
       
   328     print "Usage: python %s BINARYFILE" % sys.argv[0]
       
   329     sys.exit(0)
       
   330   print getImports(sys.argv[1])