|
1 """Find modules used by a script, using introspection.""" |
|
2 |
|
3 # This module should be kept compatible with Python 2.2, see PEP 291. |
|
4 |
|
5 import dis |
|
6 import imp |
|
7 import marshal |
|
8 import os |
|
9 import sys |
|
10 import new |
|
11 |
|
12 if hasattr(sys.__stdout__, "newlines"): |
|
13 READ_MODE = "U" # universal line endings |
|
14 else: |
|
15 # remain compatible with Python < 2.3 |
|
16 READ_MODE = "r" |
|
17 |
|
18 LOAD_CONST = dis.opname.index('LOAD_CONST') |
|
19 IMPORT_NAME = dis.opname.index('IMPORT_NAME') |
|
20 STORE_NAME = dis.opname.index('STORE_NAME') |
|
21 STORE_GLOBAL = dis.opname.index('STORE_GLOBAL') |
|
22 STORE_OPS = [STORE_NAME, STORE_GLOBAL] |
|
23 |
|
24 # Modulefinder does a good job at simulating Python's, but it can not |
|
25 # handle __path__ modifications packages make at runtime. Therefore there |
|
26 # is a mechanism whereby you can register extra paths in this map for a |
|
27 # package, and it will be honored. |
|
28 |
|
29 # Note this is a mapping is lists of paths. |
|
30 packagePathMap = {} |
|
31 |
|
32 # A Public interface |
|
33 def AddPackagePath(packagename, path): |
|
34 paths = packagePathMap.get(packagename, []) |
|
35 paths.append(path) |
|
36 packagePathMap[packagename] = paths |
|
37 |
|
38 replacePackageMap = {} |
|
39 |
|
40 # This ReplacePackage mechanism allows modulefinder to work around the |
|
41 # way the _xmlplus package injects itself under the name "xml" into |
|
42 # sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml") |
|
43 # before running ModuleFinder. |
|
44 |
|
45 def ReplacePackage(oldname, newname): |
|
46 replacePackageMap[oldname] = newname |
|
47 |
|
48 |
|
49 class Module: |
|
50 |
|
51 def __init__(self, name, file=None, path=None): |
|
52 self.__name__ = name |
|
53 self.__file__ = file |
|
54 self.__path__ = path |
|
55 self.__code__ = None |
|
56 # The set of global names that are assigned to in the module. |
|
57 # This includes those names imported through starimports of |
|
58 # Python modules. |
|
59 self.globalnames = {} |
|
60 # The set of starimports this module did that could not be |
|
61 # resolved, ie. a starimport from a non-Python module. |
|
62 self.starimports = {} |
|
63 |
|
64 def __repr__(self): |
|
65 s = "Module(%r" % (self.__name__,) |
|
66 if self.__file__ is not None: |
|
67 s = s + ", %r" % (self.__file__,) |
|
68 if self.__path__ is not None: |
|
69 s = s + ", %r" % (self.__path__,) |
|
70 s = s + ")" |
|
71 return s |
|
72 |
|
73 class ModuleFinder: |
|
74 |
|
75 def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]): |
|
76 if path is None: |
|
77 path = sys.path |
|
78 self.path = path |
|
79 self.modules = {} |
|
80 self.badmodules = {} |
|
81 self.debug = debug |
|
82 self.indent = 0 |
|
83 self.excludes = excludes |
|
84 self.replace_paths = replace_paths |
|
85 self.processed_paths = [] # Used in debugging only |
|
86 |
|
87 def msg(self, level, str, *args): |
|
88 if level <= self.debug: |
|
89 for i in range(self.indent): |
|
90 print " ", |
|
91 print str, |
|
92 for arg in args: |
|
93 print repr(arg), |
|
94 print |
|
95 |
|
96 def msgin(self, *args): |
|
97 level = args[0] |
|
98 if level <= self.debug: |
|
99 self.indent = self.indent + 1 |
|
100 self.msg(*args) |
|
101 |
|
102 def msgout(self, *args): |
|
103 level = args[0] |
|
104 if level <= self.debug: |
|
105 self.indent = self.indent - 1 |
|
106 self.msg(*args) |
|
107 |
|
108 def run_script(self, pathname): |
|
109 self.msg(2, "run_script", pathname) |
|
110 fp = open(pathname, READ_MODE) |
|
111 stuff = ("", "r", imp.PY_SOURCE) |
|
112 self.load_module('__main__', fp, pathname, stuff) |
|
113 |
|
114 def load_file(self, pathname): |
|
115 dir, name = os.path.split(pathname) |
|
116 name, ext = os.path.splitext(name) |
|
117 fp = open(pathname, READ_MODE) |
|
118 stuff = (ext, "r", imp.PY_SOURCE) |
|
119 self.load_module(name, fp, pathname, stuff) |
|
120 |
|
121 def import_hook(self, name, caller=None, fromlist=None): |
|
122 self.msg(3, "import_hook", name, caller, fromlist) |
|
123 parent = self.determine_parent(caller) |
|
124 q, tail = self.find_head_package(parent, name) |
|
125 m = self.load_tail(q, tail) |
|
126 if not fromlist: |
|
127 return q |
|
128 if m.__path__: |
|
129 self.ensure_fromlist(m, fromlist) |
|
130 return None |
|
131 |
|
132 def determine_parent(self, caller): |
|
133 self.msgin(4, "determine_parent", caller) |
|
134 if not caller: |
|
135 self.msgout(4, "determine_parent -> None") |
|
136 return None |
|
137 pname = caller.__name__ |
|
138 if caller.__path__: |
|
139 parent = self.modules[pname] |
|
140 assert caller is parent |
|
141 self.msgout(4, "determine_parent ->", parent) |
|
142 return parent |
|
143 if '.' in pname: |
|
144 i = pname.rfind('.') |
|
145 pname = pname[:i] |
|
146 parent = self.modules[pname] |
|
147 assert parent.__name__ == pname |
|
148 self.msgout(4, "determine_parent ->", parent) |
|
149 return parent |
|
150 self.msgout(4, "determine_parent -> None") |
|
151 return None |
|
152 |
|
153 def find_head_package(self, parent, name): |
|
154 self.msgin(4, "find_head_package", parent, name) |
|
155 if '.' in name: |
|
156 i = name.find('.') |
|
157 head = name[:i] |
|
158 tail = name[i+1:] |
|
159 else: |
|
160 head = name |
|
161 tail = "" |
|
162 if parent: |
|
163 qname = "%s.%s" % (parent.__name__, head) |
|
164 else: |
|
165 qname = head |
|
166 q = self.import_module(head, qname, parent) |
|
167 if q: |
|
168 self.msgout(4, "find_head_package ->", (q, tail)) |
|
169 return q, tail |
|
170 if parent: |
|
171 qname = head |
|
172 parent = None |
|
173 q = self.import_module(head, qname, parent) |
|
174 if q: |
|
175 self.msgout(4, "find_head_package ->", (q, tail)) |
|
176 return q, tail |
|
177 self.msgout(4, "raise ImportError: No module named", qname) |
|
178 raise ImportError, "No module named " + qname |
|
179 |
|
180 def load_tail(self, q, tail): |
|
181 self.msgin(4, "load_tail", q, tail) |
|
182 m = q |
|
183 while tail: |
|
184 i = tail.find('.') |
|
185 if i < 0: i = len(tail) |
|
186 head, tail = tail[:i], tail[i+1:] |
|
187 mname = "%s.%s" % (m.__name__, head) |
|
188 m = self.import_module(head, mname, m) |
|
189 if not m: |
|
190 self.msgout(4, "raise ImportError: No module named", mname) |
|
191 raise ImportError, "No module named " + mname |
|
192 self.msgout(4, "load_tail ->", m) |
|
193 return m |
|
194 |
|
195 def ensure_fromlist(self, m, fromlist, recursive=0): |
|
196 self.msg(4, "ensure_fromlist", m, fromlist, recursive) |
|
197 for sub in fromlist: |
|
198 if sub == "*": |
|
199 if not recursive: |
|
200 all = self.find_all_submodules(m) |
|
201 if all: |
|
202 self.ensure_fromlist(m, all, 1) |
|
203 elif not hasattr(m, sub): |
|
204 subname = "%s.%s" % (m.__name__, sub) |
|
205 submod = self.import_module(sub, subname, m) |
|
206 if not submod: |
|
207 raise ImportError, "No module named " + subname |
|
208 |
|
209 def find_all_submodules(self, m): |
|
210 if not m.__path__: |
|
211 return |
|
212 modules = {} |
|
213 # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"]. |
|
214 # But we must also collect Python extension modules - although |
|
215 # we cannot separate normal dlls from Python extensions. |
|
216 suffixes = [] |
|
217 for triple in imp.get_suffixes(): |
|
218 suffixes.append(triple[0]) |
|
219 for dir in m.__path__: |
|
220 try: |
|
221 names = os.listdir(dir) |
|
222 except os.error: |
|
223 self.msg(2, "can't list directory", dir) |
|
224 continue |
|
225 for name in names: |
|
226 mod = None |
|
227 for suff in suffixes: |
|
228 n = len(suff) |
|
229 if name[-n:] == suff: |
|
230 mod = name[:-n] |
|
231 break |
|
232 if mod and mod != "__init__": |
|
233 modules[mod] = mod |
|
234 return modules.keys() |
|
235 |
|
236 def import_module(self, partname, fqname, parent): |
|
237 self.msgin(3, "import_module", partname, fqname, parent) |
|
238 try: |
|
239 m = self.modules[fqname] |
|
240 except KeyError: |
|
241 pass |
|
242 else: |
|
243 self.msgout(3, "import_module ->", m) |
|
244 return m |
|
245 if self.badmodules.has_key(fqname): |
|
246 self.msgout(3, "import_module -> None") |
|
247 return None |
|
248 if parent and parent.__path__ is None: |
|
249 self.msgout(3, "import_module -> None") |
|
250 return None |
|
251 try: |
|
252 fp, pathname, stuff = self.find_module(partname, |
|
253 parent and parent.__path__, parent) |
|
254 except ImportError: |
|
255 self.msgout(3, "import_module ->", None) |
|
256 return None |
|
257 try: |
|
258 m = self.load_module(fqname, fp, pathname, stuff) |
|
259 finally: |
|
260 if fp: fp.close() |
|
261 if parent: |
|
262 setattr(parent, partname, m) |
|
263 self.msgout(3, "import_module ->", m) |
|
264 return m |
|
265 |
|
266 def load_module(self, fqname, fp, pathname, (suffix, mode, type)): |
|
267 self.msgin(2, "load_module", fqname, fp and "fp", pathname) |
|
268 if type == imp.PKG_DIRECTORY: |
|
269 m = self.load_package(fqname, pathname) |
|
270 self.msgout(2, "load_module ->", m) |
|
271 return m |
|
272 if type == imp.PY_SOURCE: |
|
273 co = compile(fp.read()+'\n', pathname, 'exec') |
|
274 elif type == imp.PY_COMPILED: |
|
275 if fp.read(4) != imp.get_magic(): |
|
276 self.msgout(2, "raise ImportError: Bad magic number", pathname) |
|
277 raise ImportError, "Bad magic number in %s" % pathname |
|
278 fp.read(4) |
|
279 co = marshal.load(fp) |
|
280 else: |
|
281 co = None |
|
282 m = self.add_module(fqname) |
|
283 m.__file__ = pathname |
|
284 if co: |
|
285 if self.replace_paths: |
|
286 co = self.replace_paths_in_code(co) |
|
287 m.__code__ = co |
|
288 self.scan_code(co, m) |
|
289 self.msgout(2, "load_module ->", m) |
|
290 return m |
|
291 |
|
292 def _add_badmodule(self, name, caller): |
|
293 if name not in self.badmodules: |
|
294 self.badmodules[name] = {} |
|
295 self.badmodules[name][caller.__name__] = 1 |
|
296 |
|
297 def _safe_import_hook(self, name, caller, fromlist): |
|
298 # wrapper for self.import_hook() that won't raise ImportError |
|
299 if name in self.badmodules: |
|
300 self._add_badmodule(name, caller) |
|
301 return |
|
302 try: |
|
303 self.import_hook(name, caller) |
|
304 except ImportError, msg: |
|
305 self.msg(2, "ImportError:", str(msg)) |
|
306 self._add_badmodule(name, caller) |
|
307 else: |
|
308 if fromlist: |
|
309 for sub in fromlist: |
|
310 if sub in self.badmodules: |
|
311 self._add_badmodule(sub, caller) |
|
312 continue |
|
313 try: |
|
314 self.import_hook(name, caller, [sub]) |
|
315 except ImportError, msg: |
|
316 self.msg(2, "ImportError:", str(msg)) |
|
317 fullname = name + "." + sub |
|
318 self._add_badmodule(fullname, caller) |
|
319 |
|
320 def scan_code(self, co, m): |
|
321 code = co.co_code |
|
322 n = len(code) |
|
323 i = 0 |
|
324 fromlist = None |
|
325 while i < n: |
|
326 c = code[i] |
|
327 i = i+1 |
|
328 op = ord(c) |
|
329 if op >= dis.HAVE_ARGUMENT: |
|
330 oparg = ord(code[i]) + ord(code[i+1])*256 |
|
331 i = i+2 |
|
332 if op == LOAD_CONST: |
|
333 # An IMPORT_NAME is always preceded by a LOAD_CONST, it's |
|
334 # a tuple of "from" names, or None for a regular import. |
|
335 # The tuple may contain "*" for "from <mod> import *" |
|
336 fromlist = co.co_consts[oparg] |
|
337 elif op == IMPORT_NAME: |
|
338 assert fromlist is None or type(fromlist) is tuple |
|
339 name = co.co_names[oparg] |
|
340 have_star = 0 |
|
341 if fromlist is not None: |
|
342 if "*" in fromlist: |
|
343 have_star = 1 |
|
344 fromlist = [f for f in fromlist if f != "*"] |
|
345 self._safe_import_hook(name, m, fromlist) |
|
346 if have_star: |
|
347 # We've encountered an "import *". If it is a Python module, |
|
348 # the code has already been parsed and we can suck out the |
|
349 # global names. |
|
350 mm = None |
|
351 if m.__path__: |
|
352 # At this point we don't know whether 'name' is a |
|
353 # submodule of 'm' or a global module. Let's just try |
|
354 # the full name first. |
|
355 mm = self.modules.get(m.__name__ + "." + name) |
|
356 if mm is None: |
|
357 mm = self.modules.get(name) |
|
358 if mm is not None: |
|
359 m.globalnames.update(mm.globalnames) |
|
360 m.starimports.update(mm.starimports) |
|
361 if mm.__code__ is None: |
|
362 m.starimports[name] = 1 |
|
363 else: |
|
364 m.starimports[name] = 1 |
|
365 elif op in STORE_OPS: |
|
366 # keep track of all global names that are assigned to |
|
367 name = co.co_names[oparg] |
|
368 m.globalnames[name] = 1 |
|
369 for c in co.co_consts: |
|
370 if isinstance(c, type(co)): |
|
371 self.scan_code(c, m) |
|
372 |
|
373 def load_package(self, fqname, pathname): |
|
374 self.msgin(2, "load_package", fqname, pathname) |
|
375 newname = replacePackageMap.get(fqname) |
|
376 if newname: |
|
377 fqname = newname |
|
378 m = self.add_module(fqname) |
|
379 m.__file__ = pathname |
|
380 m.__path__ = [pathname] |
|
381 |
|
382 # As per comment at top of file, simulate runtime __path__ additions. |
|
383 m.__path__ = m.__path__ + packagePathMap.get(fqname, []) |
|
384 |
|
385 fp, buf, stuff = self.find_module("__init__", m.__path__) |
|
386 self.load_module(fqname, fp, buf, stuff) |
|
387 self.msgout(2, "load_package ->", m) |
|
388 return m |
|
389 |
|
390 def add_module(self, fqname): |
|
391 if self.modules.has_key(fqname): |
|
392 return self.modules[fqname] |
|
393 self.modules[fqname] = m = Module(fqname) |
|
394 return m |
|
395 |
|
396 def find_module(self, name, path, parent=None): |
|
397 if parent is not None: |
|
398 # assert path is not None |
|
399 fullname = parent.__name__+'.'+name |
|
400 else: |
|
401 fullname = name |
|
402 if fullname in self.excludes: |
|
403 self.msgout(3, "find_module -> Excluded", fullname) |
|
404 raise ImportError, name |
|
405 |
|
406 if path is None: |
|
407 if name in sys.builtin_module_names: |
|
408 return (None, None, ("", "", imp.C_BUILTIN)) |
|
409 |
|
410 path = self.path |
|
411 return imp.find_module(name, path) |
|
412 |
|
413 def report(self): |
|
414 """Print a report to stdout, listing the found modules with their |
|
415 paths, as well as modules that are missing, or seem to be missing. |
|
416 """ |
|
417 print |
|
418 print " %-25s %s" % ("Name", "File") |
|
419 print " %-25s %s" % ("----", "----") |
|
420 # Print modules found |
|
421 keys = self.modules.keys() |
|
422 keys.sort() |
|
423 for key in keys: |
|
424 m = self.modules[key] |
|
425 if m.__path__: |
|
426 print "P", |
|
427 else: |
|
428 print "m", |
|
429 print "%-25s" % key, m.__file__ or "" |
|
430 |
|
431 # Print missing modules |
|
432 missing, maybe = self.any_missing_maybe() |
|
433 if missing: |
|
434 print |
|
435 print "Missing modules:" |
|
436 for name in missing: |
|
437 mods = self.badmodules[name].keys() |
|
438 mods.sort() |
|
439 print "?", name, "imported from", ', '.join(mods) |
|
440 # Print modules that may be missing, but then again, maybe not... |
|
441 if maybe: |
|
442 print |
|
443 print "Submodules thay appear to be missing, but could also be", |
|
444 print "global names in the parent package:" |
|
445 for name in maybe: |
|
446 mods = self.badmodules[name].keys() |
|
447 mods.sort() |
|
448 print "?", name, "imported from", ', '.join(mods) |
|
449 |
|
450 def any_missing(self): |
|
451 """Return a list of modules that appear to be missing. Use |
|
452 any_missing_maybe() if you want to know which modules are |
|
453 certain to be missing, and which *may* be missing. |
|
454 """ |
|
455 missing, maybe = self.any_missing_maybe() |
|
456 return missing + maybe |
|
457 |
|
458 def any_missing_maybe(self): |
|
459 """Return two lists, one with modules that are certainly missing |
|
460 and one with modules that *may* be missing. The latter names could |
|
461 either be submodules *or* just global names in the package. |
|
462 |
|
463 The reason it can't always be determined is that it's impossible to |
|
464 tell which names are imported when "from module import *" is done |
|
465 with an extension module, short of actually importing it. |
|
466 """ |
|
467 missing = [] |
|
468 maybe = [] |
|
469 for name in self.badmodules: |
|
470 if name in self.excludes: |
|
471 continue |
|
472 i = name.rfind(".") |
|
473 if i < 0: |
|
474 missing.append(name) |
|
475 continue |
|
476 subname = name[i+1:] |
|
477 pkgname = name[:i] |
|
478 pkg = self.modules.get(pkgname) |
|
479 if pkg is not None: |
|
480 if pkgname in self.badmodules[name]: |
|
481 # The package tried to import this module itself and |
|
482 # failed. It's definitely missing. |
|
483 missing.append(name) |
|
484 elif subname in pkg.globalnames: |
|
485 # It's a global in the package: definitely not missing. |
|
486 pass |
|
487 elif pkg.starimports: |
|
488 # It could be missing, but the package did an "import *" |
|
489 # from a non-Python module, so we simply can't be sure. |
|
490 maybe.append(name) |
|
491 else: |
|
492 # It's not a global in the package, the package didn't |
|
493 # do funny star imports, it's very likely to be missing. |
|
494 # The symbol could be inserted into the package from the |
|
495 # outside, but since that's not good style we simply list |
|
496 # it missing. |
|
497 missing.append(name) |
|
498 else: |
|
499 missing.append(name) |
|
500 missing.sort() |
|
501 maybe.sort() |
|
502 return missing, maybe |
|
503 |
|
504 def replace_paths_in_code(self, co): |
|
505 new_filename = original_filename = os.path.normpath(co.co_filename) |
|
506 for f, r in self.replace_paths: |
|
507 if original_filename.startswith(f): |
|
508 new_filename = r + original_filename[len(f):] |
|
509 break |
|
510 |
|
511 if self.debug and original_filename not in self.processed_paths: |
|
512 if new_filename != original_filename: |
|
513 self.msgout(2, "co_filename %r changed to %r" \ |
|
514 % (original_filename,new_filename,)) |
|
515 else: |
|
516 self.msgout(2, "co_filename %r remains unchanged" \ |
|
517 % (original_filename,)) |
|
518 self.processed_paths.append(original_filename) |
|
519 |
|
520 consts = list(co.co_consts) |
|
521 for i in range(len(consts)): |
|
522 if isinstance(consts[i], type(co)): |
|
523 consts[i] = self.replace_paths_in_code(consts[i]) |
|
524 |
|
525 return new.code(co.co_argcount, co.co_nlocals, co.co_stacksize, |
|
526 co.co_flags, co.co_code, tuple(consts), co.co_names, |
|
527 co.co_varnames, new_filename, co.co_name, |
|
528 co.co_firstlineno, co.co_lnotab, |
|
529 co.co_freevars, co.co_cellvars) |
|
530 |
|
531 |
|
532 def test(): |
|
533 # Parse command line |
|
534 import getopt |
|
535 try: |
|
536 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:") |
|
537 except getopt.error, msg: |
|
538 print msg |
|
539 return |
|
540 |
|
541 # Process options |
|
542 debug = 1 |
|
543 domods = 0 |
|
544 addpath = [] |
|
545 exclude = [] |
|
546 for o, a in opts: |
|
547 if o == '-d': |
|
548 debug = debug + 1 |
|
549 if o == '-m': |
|
550 domods = 1 |
|
551 if o == '-p': |
|
552 addpath = addpath + a.split(os.pathsep) |
|
553 if o == '-q': |
|
554 debug = 0 |
|
555 if o == '-x': |
|
556 exclude.append(a) |
|
557 |
|
558 # Provide default arguments |
|
559 if not args: |
|
560 script = "hello.py" |
|
561 else: |
|
562 script = args[0] |
|
563 |
|
564 # Set the path based on sys.path and the script directory |
|
565 path = sys.path[:] |
|
566 path[0] = os.path.dirname(script) |
|
567 path = addpath + path |
|
568 if debug > 1: |
|
569 print "path:" |
|
570 for item in path: |
|
571 print " ", repr(item) |
|
572 |
|
573 # Create the module finder and turn its crank |
|
574 mf = ModuleFinder(path, debug, exclude) |
|
575 for arg in args[1:]: |
|
576 if arg == '-m': |
|
577 domods = 1 |
|
578 continue |
|
579 if domods: |
|
580 if arg[-2:] == '.*': |
|
581 mf.import_hook(arg[:-2], None, ["*"]) |
|
582 else: |
|
583 mf.import_hook(arg) |
|
584 else: |
|
585 mf.load_file(arg) |
|
586 mf.run_script(script) |
|
587 mf.report() |
|
588 return mf # for -i debugging |
|
589 |
|
590 |
|
591 if __name__ == '__main__': |
|
592 try: |
|
593 mf = test() |
|
594 except KeyboardInterrupt: |
|
595 print "\n[interrupt]" |