|
1 #!/usr/bin/env python |
|
2 |
|
3 """\ |
|
4 List python source files. |
|
5 |
|
6 There are three functions to check whether a file is a Python source, listed |
|
7 here with increasing complexity: |
|
8 |
|
9 - has_python_ext() checks whether a file name ends in '.py[w]'. |
|
10 - look_like_python() checks whether the file is not binary and either has |
|
11 the '.py[w]' extension or the first line contains the word 'python'. |
|
12 - can_be_compiled() checks whether the file can be compiled by compile(). |
|
13 |
|
14 The file also must be of appropriate size - not bigger than a megabyte. |
|
15 |
|
16 walk_python_files() recursively lists all Python files under the given directories. |
|
17 """ |
|
18 __author__ = "Oleg Broytmann, Georg Brandl" |
|
19 |
|
20 __all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"] |
|
21 |
|
22 |
|
23 import os, re |
|
24 |
|
25 binary_re = re.compile('[\x00-\x08\x0E-\x1F\x7F]') |
|
26 |
|
27 debug = False |
|
28 |
|
29 def print_debug(msg): |
|
30 if debug: print msg |
|
31 |
|
32 |
|
33 def _open(fullpath): |
|
34 try: |
|
35 size = os.stat(fullpath).st_size |
|
36 except OSError, err: # Permission denied - ignore the file |
|
37 print_debug("%s: permission denied: %s" % (fullpath, err)) |
|
38 return None |
|
39 |
|
40 if size > 1024*1024: # too big |
|
41 print_debug("%s: the file is too big: %d bytes" % (fullpath, size)) |
|
42 return None |
|
43 |
|
44 try: |
|
45 return open(fullpath, 'rU') |
|
46 except IOError, err: # Access denied, or a special file - ignore it |
|
47 print_debug("%s: access denied: %s" % (fullpath, err)) |
|
48 return None |
|
49 |
|
50 def has_python_ext(fullpath): |
|
51 return fullpath.endswith(".py") or fullpath.endswith(".pyw") |
|
52 |
|
53 def looks_like_python(fullpath): |
|
54 infile = _open(fullpath) |
|
55 if infile is None: |
|
56 return False |
|
57 |
|
58 line = infile.readline() |
|
59 infile.close() |
|
60 |
|
61 if binary_re.search(line): |
|
62 # file appears to be binary |
|
63 print_debug("%s: appears to be binary" % fullpath) |
|
64 return False |
|
65 |
|
66 if fullpath.endswith(".py") or fullpath.endswith(".pyw"): |
|
67 return True |
|
68 elif "python" in line: |
|
69 # disguised Python script (e.g. CGI) |
|
70 return True |
|
71 |
|
72 return False |
|
73 |
|
74 def can_be_compiled(fullpath): |
|
75 infile = _open(fullpath) |
|
76 if infile is None: |
|
77 return False |
|
78 |
|
79 code = infile.read() |
|
80 infile.close() |
|
81 |
|
82 try: |
|
83 compile(code, fullpath, "exec") |
|
84 except Exception, err: |
|
85 print_debug("%s: cannot compile: %s" % (fullpath, err)) |
|
86 return False |
|
87 |
|
88 return True |
|
89 |
|
90 |
|
91 def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None): |
|
92 """\ |
|
93 Recursively yield all Python source files below the given paths. |
|
94 |
|
95 paths: a list of files and/or directories to be checked. |
|
96 is_python: a function that takes a file name and checks whether it is a |
|
97 Python source file |
|
98 exclude_dirs: a list of directory base names that should be excluded in |
|
99 the search |
|
100 """ |
|
101 if exclude_dirs is None: |
|
102 exclude_dirs=[] |
|
103 |
|
104 for path in paths: |
|
105 print_debug("testing: %s" % path) |
|
106 if os.path.isfile(path): |
|
107 if is_python(path): |
|
108 yield path |
|
109 elif os.path.isdir(path): |
|
110 print_debug(" it is a directory") |
|
111 for dirpath, dirnames, filenames in os.walk(path): |
|
112 for exclude in exclude_dirs: |
|
113 if exclude in dirnames: |
|
114 dirnames.remove(exclude) |
|
115 for filename in filenames: |
|
116 fullpath = os.path.join(dirpath, filename) |
|
117 print_debug("testing: %s" % fullpath) |
|
118 if is_python(fullpath): |
|
119 yield fullpath |
|
120 else: |
|
121 print_debug(" unknown type") |
|
122 |
|
123 |
|
124 if __name__ == "__main__": |
|
125 # Two simple examples/tests |
|
126 for fullpath in walk_python_files(['.']): |
|
127 print fullpath |
|
128 print "----------" |
|
129 for fullpath in walk_python_files(['.'], is_python=can_be_compiled): |
|
130 print fullpath |