python-2.5.2/win32/Lib/modulefinder.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 """Find modules used by a script, using introspection."""
       
     2 
       
     3 # This module should be kept compatible with Python 2.2, see PEP 291.
       
     4 
       
     5 import dis
       
     6 import imp
       
     7 import marshal
       
     8 import os
       
     9 import sys
       
    10 import new
       
    11 
       
    12 if hasattr(sys.__stdout__, "newlines"):
       
    13     READ_MODE = "U"  # universal line endings
       
    14 else:
       
    15     # remain compatible with Python  < 2.3
       
    16     READ_MODE = "r"
       
    17 
       
    18 LOAD_CONST = dis.opname.index('LOAD_CONST')
       
    19 IMPORT_NAME = dis.opname.index('IMPORT_NAME')
       
    20 STORE_NAME = dis.opname.index('STORE_NAME')
       
    21 STORE_GLOBAL = dis.opname.index('STORE_GLOBAL')
       
    22 STORE_OPS = [STORE_NAME, STORE_GLOBAL]
       
    23 
       
    24 # Modulefinder does a good job at simulating Python's, but it can not
       
    25 # handle __path__ modifications packages make at runtime.  Therefore there
       
    26 # is a mechanism whereby you can register extra paths in this map for a
       
    27 # package, and it will be honored.
       
    28 
       
    29 # Note this is a mapping is lists of paths.
       
    30 packagePathMap = {}
       
    31 
       
    32 # A Public interface
       
    33 def AddPackagePath(packagename, path):
       
    34     paths = packagePathMap.get(packagename, [])
       
    35     paths.append(path)
       
    36     packagePathMap[packagename] = paths
       
    37 
       
    38 replacePackageMap = {}
       
    39 
       
    40 # This ReplacePackage mechanism allows modulefinder to work around the
       
    41 # way the _xmlplus package injects itself under the name "xml" into
       
    42 # sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml")
       
    43 # before running ModuleFinder.
       
    44 
       
    45 def ReplacePackage(oldname, newname):
       
    46     replacePackageMap[oldname] = newname
       
    47 
       
    48 
       
    49 class Module:
       
    50 
       
    51     def __init__(self, name, file=None, path=None):
       
    52         self.__name__ = name
       
    53         self.__file__ = file
       
    54         self.__path__ = path
       
    55         self.__code__ = None
       
    56         # The set of global names that are assigned to in the module.
       
    57         # This includes those names imported through starimports of
       
    58         # Python modules.
       
    59         self.globalnames = {}
       
    60         # The set of starimports this module did that could not be
       
    61         # resolved, ie. a starimport from a non-Python module.
       
    62         self.starimports = {}
       
    63 
       
    64     def __repr__(self):
       
    65         s = "Module(%r" % (self.__name__,)
       
    66         if self.__file__ is not None:
       
    67             s = s + ", %r" % (self.__file__,)
       
    68         if self.__path__ is not None:
       
    69             s = s + ", %r" % (self.__path__,)
       
    70         s = s + ")"
       
    71         return s
       
    72 
       
    73 class ModuleFinder:
       
    74 
       
    75     def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
       
    76         if path is None:
       
    77             path = sys.path
       
    78         self.path = path
       
    79         self.modules = {}
       
    80         self.badmodules = {}
       
    81         self.debug = debug
       
    82         self.indent = 0
       
    83         self.excludes = excludes
       
    84         self.replace_paths = replace_paths
       
    85         self.processed_paths = []   # Used in debugging only
       
    86 
       
    87     def msg(self, level, str, *args):
       
    88         if level <= self.debug:
       
    89             for i in range(self.indent):
       
    90                 print "   ",
       
    91             print str,
       
    92             for arg in args:
       
    93                 print repr(arg),
       
    94             print
       
    95 
       
    96     def msgin(self, *args):
       
    97         level = args[0]
       
    98         if level <= self.debug:
       
    99             self.indent = self.indent + 1
       
   100             self.msg(*args)
       
   101 
       
   102     def msgout(self, *args):
       
   103         level = args[0]
       
   104         if level <= self.debug:
       
   105             self.indent = self.indent - 1
       
   106             self.msg(*args)
       
   107 
       
   108     def run_script(self, pathname):
       
   109         self.msg(2, "run_script", pathname)
       
   110         fp = open(pathname, READ_MODE)
       
   111         stuff = ("", "r", imp.PY_SOURCE)
       
   112         self.load_module('__main__', fp, pathname, stuff)
       
   113 
       
   114     def load_file(self, pathname):
       
   115         dir, name = os.path.split(pathname)
       
   116         name, ext = os.path.splitext(name)
       
   117         fp = open(pathname, READ_MODE)
       
   118         stuff = (ext, "r", imp.PY_SOURCE)
       
   119         self.load_module(name, fp, pathname, stuff)
       
   120 
       
   121     def import_hook(self, name, caller=None, fromlist=None):
       
   122         self.msg(3, "import_hook", name, caller, fromlist)
       
   123         parent = self.determine_parent(caller)
       
   124         q, tail = self.find_head_package(parent, name)
       
   125         m = self.load_tail(q, tail)
       
   126         if not fromlist:
       
   127             return q
       
   128         if m.__path__:
       
   129             self.ensure_fromlist(m, fromlist)
       
   130         return None
       
   131 
       
   132     def determine_parent(self, caller):
       
   133         self.msgin(4, "determine_parent", caller)
       
   134         if not caller:
       
   135             self.msgout(4, "determine_parent -> None")
       
   136             return None
       
   137         pname = caller.__name__
       
   138         if caller.__path__:
       
   139             parent = self.modules[pname]
       
   140             assert caller is parent
       
   141             self.msgout(4, "determine_parent ->", parent)
       
   142             return parent
       
   143         if '.' in pname:
       
   144             i = pname.rfind('.')
       
   145             pname = pname[:i]
       
   146             parent = self.modules[pname]
       
   147             assert parent.__name__ == pname
       
   148             self.msgout(4, "determine_parent ->", parent)
       
   149             return parent
       
   150         self.msgout(4, "determine_parent -> None")
       
   151         return None
       
   152 
       
   153     def find_head_package(self, parent, name):
       
   154         self.msgin(4, "find_head_package", parent, name)
       
   155         if '.' in name:
       
   156             i = name.find('.')
       
   157             head = name[:i]
       
   158             tail = name[i+1:]
       
   159         else:
       
   160             head = name
       
   161             tail = ""
       
   162         if parent:
       
   163             qname = "%s.%s" % (parent.__name__, head)
       
   164         else:
       
   165             qname = head
       
   166         q = self.import_module(head, qname, parent)
       
   167         if q:
       
   168             self.msgout(4, "find_head_package ->", (q, tail))
       
   169             return q, tail
       
   170         if parent:
       
   171             qname = head
       
   172             parent = None
       
   173             q = self.import_module(head, qname, parent)
       
   174             if q:
       
   175                 self.msgout(4, "find_head_package ->", (q, tail))
       
   176                 return q, tail
       
   177         self.msgout(4, "raise ImportError: No module named", qname)
       
   178         raise ImportError, "No module named " + qname
       
   179 
       
   180     def load_tail(self, q, tail):
       
   181         self.msgin(4, "load_tail", q, tail)
       
   182         m = q
       
   183         while tail:
       
   184             i = tail.find('.')
       
   185             if i < 0: i = len(tail)
       
   186             head, tail = tail[:i], tail[i+1:]
       
   187             mname = "%s.%s" % (m.__name__, head)
       
   188             m = self.import_module(head, mname, m)
       
   189             if not m:
       
   190                 self.msgout(4, "raise ImportError: No module named", mname)
       
   191                 raise ImportError, "No module named " + mname
       
   192         self.msgout(4, "load_tail ->", m)
       
   193         return m
       
   194 
       
   195     def ensure_fromlist(self, m, fromlist, recursive=0):
       
   196         self.msg(4, "ensure_fromlist", m, fromlist, recursive)
       
   197         for sub in fromlist:
       
   198             if sub == "*":
       
   199                 if not recursive:
       
   200                     all = self.find_all_submodules(m)
       
   201                     if all:
       
   202                         self.ensure_fromlist(m, all, 1)
       
   203             elif not hasattr(m, sub):
       
   204                 subname = "%s.%s" % (m.__name__, sub)
       
   205                 submod = self.import_module(sub, subname, m)
       
   206                 if not submod:
       
   207                     raise ImportError, "No module named " + subname
       
   208 
       
   209     def find_all_submodules(self, m):
       
   210         if not m.__path__:
       
   211             return
       
   212         modules = {}
       
   213         # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"].
       
   214         # But we must also collect Python extension modules - although
       
   215         # we cannot separate normal dlls from Python extensions.
       
   216         suffixes = []
       
   217         for triple in imp.get_suffixes():
       
   218             suffixes.append(triple[0])
       
   219         for dir in m.__path__:
       
   220             try:
       
   221                 names = os.listdir(dir)
       
   222             except os.error:
       
   223                 self.msg(2, "can't list directory", dir)
       
   224                 continue
       
   225             for name in names:
       
   226                 mod = None
       
   227                 for suff in suffixes:
       
   228                     n = len(suff)
       
   229                     if name[-n:] == suff:
       
   230                         mod = name[:-n]
       
   231                         break
       
   232                 if mod and mod != "__init__":
       
   233                     modules[mod] = mod
       
   234         return modules.keys()
       
   235 
       
   236     def import_module(self, partname, fqname, parent):
       
   237         self.msgin(3, "import_module", partname, fqname, parent)
       
   238         try:
       
   239             m = self.modules[fqname]
       
   240         except KeyError:
       
   241             pass
       
   242         else:
       
   243             self.msgout(3, "import_module ->", m)
       
   244             return m
       
   245         if self.badmodules.has_key(fqname):
       
   246             self.msgout(3, "import_module -> None")
       
   247             return None
       
   248         if parent and parent.__path__ is None:
       
   249             self.msgout(3, "import_module -> None")
       
   250             return None
       
   251         try:
       
   252             fp, pathname, stuff = self.find_module(partname,
       
   253                                                    parent and parent.__path__, parent)
       
   254         except ImportError:
       
   255             self.msgout(3, "import_module ->", None)
       
   256             return None
       
   257         try:
       
   258             m = self.load_module(fqname, fp, pathname, stuff)
       
   259         finally:
       
   260             if fp: fp.close()
       
   261         if parent:
       
   262             setattr(parent, partname, m)
       
   263         self.msgout(3, "import_module ->", m)
       
   264         return m
       
   265 
       
   266     def load_module(self, fqname, fp, pathname, (suffix, mode, type)):
       
   267         self.msgin(2, "load_module", fqname, fp and "fp", pathname)
       
   268         if type == imp.PKG_DIRECTORY:
       
   269             m = self.load_package(fqname, pathname)
       
   270             self.msgout(2, "load_module ->", m)
       
   271             return m
       
   272         if type == imp.PY_SOURCE:
       
   273             co = compile(fp.read()+'\n', pathname, 'exec')
       
   274         elif type == imp.PY_COMPILED:
       
   275             if fp.read(4) != imp.get_magic():
       
   276                 self.msgout(2, "raise ImportError: Bad magic number", pathname)
       
   277                 raise ImportError, "Bad magic number in %s" % pathname
       
   278             fp.read(4)
       
   279             co = marshal.load(fp)
       
   280         else:
       
   281             co = None
       
   282         m = self.add_module(fqname)
       
   283         m.__file__ = pathname
       
   284         if co:
       
   285             if self.replace_paths:
       
   286                 co = self.replace_paths_in_code(co)
       
   287             m.__code__ = co
       
   288             self.scan_code(co, m)
       
   289         self.msgout(2, "load_module ->", m)
       
   290         return m
       
   291 
       
   292     def _add_badmodule(self, name, caller):
       
   293         if name not in self.badmodules:
       
   294             self.badmodules[name] = {}
       
   295         self.badmodules[name][caller.__name__] = 1
       
   296 
       
   297     def _safe_import_hook(self, name, caller, fromlist):
       
   298         # wrapper for self.import_hook() that won't raise ImportError
       
   299         if name in self.badmodules:
       
   300             self._add_badmodule(name, caller)
       
   301             return
       
   302         try:
       
   303             self.import_hook(name, caller)
       
   304         except ImportError, msg:
       
   305             self.msg(2, "ImportError:", str(msg))
       
   306             self._add_badmodule(name, caller)
       
   307         else:
       
   308             if fromlist:
       
   309                 for sub in fromlist:
       
   310                     if sub in self.badmodules:
       
   311                         self._add_badmodule(sub, caller)
       
   312                         continue
       
   313                     try:
       
   314                         self.import_hook(name, caller, [sub])
       
   315                     except ImportError, msg:
       
   316                         self.msg(2, "ImportError:", str(msg))
       
   317                         fullname = name + "." + sub
       
   318                         self._add_badmodule(fullname, caller)
       
   319 
       
   320     def scan_code(self, co, m):
       
   321         code = co.co_code
       
   322         n = len(code)
       
   323         i = 0
       
   324         fromlist = None
       
   325         while i < n:
       
   326             c = code[i]
       
   327             i = i+1
       
   328             op = ord(c)
       
   329             if op >= dis.HAVE_ARGUMENT:
       
   330                 oparg = ord(code[i]) + ord(code[i+1])*256
       
   331                 i = i+2
       
   332             if op == LOAD_CONST:
       
   333                 # An IMPORT_NAME is always preceded by a LOAD_CONST, it's
       
   334                 # a tuple of "from" names, or None for a regular import.
       
   335                 # The tuple may contain "*" for "from <mod> import *"
       
   336                 fromlist = co.co_consts[oparg]
       
   337             elif op == IMPORT_NAME:
       
   338                 assert fromlist is None or type(fromlist) is tuple
       
   339                 name = co.co_names[oparg]
       
   340                 have_star = 0
       
   341                 if fromlist is not None:
       
   342                     if "*" in fromlist:
       
   343                         have_star = 1
       
   344                     fromlist = [f for f in fromlist if f != "*"]
       
   345                 self._safe_import_hook(name, m, fromlist)
       
   346                 if have_star:
       
   347                     # We've encountered an "import *". If it is a Python module,
       
   348                     # the code has already been parsed and we can suck out the
       
   349                     # global names.
       
   350                     mm = None
       
   351                     if m.__path__:
       
   352                         # At this point we don't know whether 'name' is a
       
   353                         # submodule of 'm' or a global module. Let's just try
       
   354                         # the full name first.
       
   355                         mm = self.modules.get(m.__name__ + "." + name)
       
   356                     if mm is None:
       
   357                         mm = self.modules.get(name)
       
   358                     if mm is not None:
       
   359                         m.globalnames.update(mm.globalnames)
       
   360                         m.starimports.update(mm.starimports)
       
   361                         if mm.__code__ is None:
       
   362                             m.starimports[name] = 1
       
   363                     else:
       
   364                         m.starimports[name] = 1
       
   365             elif op in STORE_OPS:
       
   366                 # keep track of all global names that are assigned to
       
   367                 name = co.co_names[oparg]
       
   368                 m.globalnames[name] = 1
       
   369         for c in co.co_consts:
       
   370             if isinstance(c, type(co)):
       
   371                 self.scan_code(c, m)
       
   372 
       
   373     def load_package(self, fqname, pathname):
       
   374         self.msgin(2, "load_package", fqname, pathname)
       
   375         newname = replacePackageMap.get(fqname)
       
   376         if newname:
       
   377             fqname = newname
       
   378         m = self.add_module(fqname)
       
   379         m.__file__ = pathname
       
   380         m.__path__ = [pathname]
       
   381 
       
   382         # As per comment at top of file, simulate runtime __path__ additions.
       
   383         m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
       
   384 
       
   385         fp, buf, stuff = self.find_module("__init__", m.__path__)
       
   386         self.load_module(fqname, fp, buf, stuff)
       
   387         self.msgout(2, "load_package ->", m)
       
   388         return m
       
   389 
       
   390     def add_module(self, fqname):
       
   391         if self.modules.has_key(fqname):
       
   392             return self.modules[fqname]
       
   393         self.modules[fqname] = m = Module(fqname)
       
   394         return m
       
   395 
       
   396     def find_module(self, name, path, parent=None):
       
   397         if parent is not None:
       
   398             # assert path is not None
       
   399             fullname = parent.__name__+'.'+name
       
   400         else:
       
   401             fullname = name
       
   402         if fullname in self.excludes:
       
   403             self.msgout(3, "find_module -> Excluded", fullname)
       
   404             raise ImportError, name
       
   405 
       
   406         if path is None:
       
   407             if name in sys.builtin_module_names:
       
   408                 return (None, None, ("", "", imp.C_BUILTIN))
       
   409 
       
   410             path = self.path
       
   411         return imp.find_module(name, path)
       
   412 
       
   413     def report(self):
       
   414         """Print a report to stdout, listing the found modules with their
       
   415         paths, as well as modules that are missing, or seem to be missing.
       
   416         """
       
   417         print
       
   418         print "  %-25s %s" % ("Name", "File")
       
   419         print "  %-25s %s" % ("----", "----")
       
   420         # Print modules found
       
   421         keys = self.modules.keys()
       
   422         keys.sort()
       
   423         for key in keys:
       
   424             m = self.modules[key]
       
   425             if m.__path__:
       
   426                 print "P",
       
   427             else:
       
   428                 print "m",
       
   429             print "%-25s" % key, m.__file__ or ""
       
   430 
       
   431         # Print missing modules
       
   432         missing, maybe = self.any_missing_maybe()
       
   433         if missing:
       
   434             print
       
   435             print "Missing modules:"
       
   436             for name in missing:
       
   437                 mods = self.badmodules[name].keys()
       
   438                 mods.sort()
       
   439                 print "?", name, "imported from", ', '.join(mods)
       
   440         # Print modules that may be missing, but then again, maybe not...
       
   441         if maybe:
       
   442             print
       
   443             print "Submodules thay appear to be missing, but could also be",
       
   444             print "global names in the parent package:"
       
   445             for name in maybe:
       
   446                 mods = self.badmodules[name].keys()
       
   447                 mods.sort()
       
   448                 print "?", name, "imported from", ', '.join(mods)
       
   449 
       
   450     def any_missing(self):
       
   451         """Return a list of modules that appear to be missing. Use
       
   452         any_missing_maybe() if you want to know which modules are
       
   453         certain to be missing, and which *may* be missing.
       
   454         """
       
   455         missing, maybe = self.any_missing_maybe()
       
   456         return missing + maybe
       
   457 
       
   458     def any_missing_maybe(self):
       
   459         """Return two lists, one with modules that are certainly missing
       
   460         and one with modules that *may* be missing. The latter names could
       
   461         either be submodules *or* just global names in the package.
       
   462 
       
   463         The reason it can't always be determined is that it's impossible to
       
   464         tell which names are imported when "from module import *" is done
       
   465         with an extension module, short of actually importing it.
       
   466         """
       
   467         missing = []
       
   468         maybe = []
       
   469         for name in self.badmodules:
       
   470             if name in self.excludes:
       
   471                 continue
       
   472             i = name.rfind(".")
       
   473             if i < 0:
       
   474                 missing.append(name)
       
   475                 continue
       
   476             subname = name[i+1:]
       
   477             pkgname = name[:i]
       
   478             pkg = self.modules.get(pkgname)
       
   479             if pkg is not None:
       
   480                 if pkgname in self.badmodules[name]:
       
   481                     # The package tried to import this module itself and
       
   482                     # failed. It's definitely missing.
       
   483                     missing.append(name)
       
   484                 elif subname in pkg.globalnames:
       
   485                     # It's a global in the package: definitely not missing.
       
   486                     pass
       
   487                 elif pkg.starimports:
       
   488                     # It could be missing, but the package did an "import *"
       
   489                     # from a non-Python module, so we simply can't be sure.
       
   490                     maybe.append(name)
       
   491                 else:
       
   492                     # It's not a global in the package, the package didn't
       
   493                     # do funny star imports, it's very likely to be missing.
       
   494                     # The symbol could be inserted into the package from the
       
   495                     # outside, but since that's not good style we simply list
       
   496                     # it missing.
       
   497                     missing.append(name)
       
   498             else:
       
   499                 missing.append(name)
       
   500         missing.sort()
       
   501         maybe.sort()
       
   502         return missing, maybe
       
   503 
       
   504     def replace_paths_in_code(self, co):
       
   505         new_filename = original_filename = os.path.normpath(co.co_filename)
       
   506         for f, r in self.replace_paths:
       
   507             if original_filename.startswith(f):
       
   508                 new_filename = r + original_filename[len(f):]
       
   509                 break
       
   510 
       
   511         if self.debug and original_filename not in self.processed_paths:
       
   512             if new_filename != original_filename:
       
   513                 self.msgout(2, "co_filename %r changed to %r" \
       
   514                                     % (original_filename,new_filename,))
       
   515             else:
       
   516                 self.msgout(2, "co_filename %r remains unchanged" \
       
   517                                     % (original_filename,))
       
   518             self.processed_paths.append(original_filename)
       
   519 
       
   520         consts = list(co.co_consts)
       
   521         for i in range(len(consts)):
       
   522             if isinstance(consts[i], type(co)):
       
   523                 consts[i] = self.replace_paths_in_code(consts[i])
       
   524 
       
   525         return new.code(co.co_argcount, co.co_nlocals, co.co_stacksize,
       
   526                          co.co_flags, co.co_code, tuple(consts), co.co_names,
       
   527                          co.co_varnames, new_filename, co.co_name,
       
   528                          co.co_firstlineno, co.co_lnotab,
       
   529                          co.co_freevars, co.co_cellvars)
       
   530 
       
   531 
       
   532 def test():
       
   533     # Parse command line
       
   534     import getopt
       
   535     try:
       
   536         opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
       
   537     except getopt.error, msg:
       
   538         print msg
       
   539         return
       
   540 
       
   541     # Process options
       
   542     debug = 1
       
   543     domods = 0
       
   544     addpath = []
       
   545     exclude = []
       
   546     for o, a in opts:
       
   547         if o == '-d':
       
   548             debug = debug + 1
       
   549         if o == '-m':
       
   550             domods = 1
       
   551         if o == '-p':
       
   552             addpath = addpath + a.split(os.pathsep)
       
   553         if o == '-q':
       
   554             debug = 0
       
   555         if o == '-x':
       
   556             exclude.append(a)
       
   557 
       
   558     # Provide default arguments
       
   559     if not args:
       
   560         script = "hello.py"
       
   561     else:
       
   562         script = args[0]
       
   563 
       
   564     # Set the path based on sys.path and the script directory
       
   565     path = sys.path[:]
       
   566     path[0] = os.path.dirname(script)
       
   567     path = addpath + path
       
   568     if debug > 1:
       
   569         print "path:"
       
   570         for item in path:
       
   571             print "   ", repr(item)
       
   572 
       
   573     # Create the module finder and turn its crank
       
   574     mf = ModuleFinder(path, debug, exclude)
       
   575     for arg in args[1:]:
       
   576         if arg == '-m':
       
   577             domods = 1
       
   578             continue
       
   579         if domods:
       
   580             if arg[-2:] == '.*':
       
   581                 mf.import_hook(arg[:-2], None, ["*"])
       
   582             else:
       
   583                 mf.import_hook(arg)
       
   584         else:
       
   585             mf.load_file(arg)
       
   586     mf.run_script(script)
       
   587     mf.report()
       
   588     return mf  # for -i debugging
       
   589 
       
   590 
       
   591 if __name__ == '__main__':
       
   592     try:
       
   593         mf = test()
       
   594     except KeyboardInterrupt:
       
   595         print "\n[interrupt]"