|
1 #! /usr/bin/env python |
|
2 |
|
3 # Released to the public domain, by Tim Peters, 03 October 2000. |
|
4 |
|
5 """reindent [-d][-r][-v] [ path ... ] |
|
6 |
|
7 -d (--dryrun) Dry run. Analyze, but don't make any changes to, files. |
|
8 -r (--recurse) Recurse. Search for all .py files in subdirectories too. |
|
9 -n (--nobackup) No backup. Does not make a ".bak" file before reindenting. |
|
10 -v (--verbose) Verbose. Print informative msgs; else no output. |
|
11 -h (--help) Help. Print this usage information and exit. |
|
12 |
|
13 Change Python (.py) files to use 4-space indents and no hard tab characters. |
|
14 Also trim excess spaces and tabs from ends of lines, and remove empty lines |
|
15 at the end of files. Also ensure the last line ends with a newline. |
|
16 |
|
17 If no paths are given on the command line, reindent operates as a filter, |
|
18 reading a single source file from standard input and writing the transformed |
|
19 source to standard output. In this case, the -d, -r and -v flags are |
|
20 ignored. |
|
21 |
|
22 You can pass one or more file and/or directory paths. When a directory |
|
23 path, all .py files within the directory will be examined, and, if the -r |
|
24 option is given, likewise recursively for subdirectories. |
|
25 |
|
26 If output is not to standard output, reindent overwrites files in place, |
|
27 renaming the originals with a .bak extension. If it finds nothing to |
|
28 change, the file is left alone. If reindent does change a file, the changed |
|
29 file is a fixed-point for future runs (i.e., running reindent on the |
|
30 resulting .py file won't change it again). |
|
31 |
|
32 The hard part of reindenting is figuring out what to do with comment |
|
33 lines. So long as the input files get a clean bill of health from |
|
34 tabnanny.py, reindent should do a good job. |
|
35 |
|
36 The backup file is a copy of the one that is being reindented. The ".bak" |
|
37 file is generated with shutil.copy(), but some corner cases regarding |
|
38 user/group and permissions could leave the backup file more readable that |
|
39 you'd prefer. You can always use the --nobackup option to prevent this. |
|
40 """ |
|
41 |
|
42 __version__ = "1" |
|
43 |
|
44 import tokenize |
|
45 import os, shutil |
|
46 import sys |
|
47 |
|
48 verbose = 0 |
|
49 recurse = 0 |
|
50 dryrun = 0 |
|
51 makebackup = True |
|
52 |
|
53 def usage(msg=None): |
|
54 if msg is not None: |
|
55 print >> sys.stderr, msg |
|
56 print >> sys.stderr, __doc__ |
|
57 |
|
58 def errprint(*args): |
|
59 sep = "" |
|
60 for arg in args: |
|
61 sys.stderr.write(sep + str(arg)) |
|
62 sep = " " |
|
63 sys.stderr.write("\n") |
|
64 |
|
65 def main(): |
|
66 import getopt |
|
67 global verbose, recurse, dryrun, makebackup |
|
68 try: |
|
69 opts, args = getopt.getopt(sys.argv[1:], "drnvh", |
|
70 ["dryrun", "recurse", "nobackup", "verbose", "help"]) |
|
71 except getopt.error, msg: |
|
72 usage(msg) |
|
73 return |
|
74 for o, a in opts: |
|
75 if o in ('-d', '--dryrun'): |
|
76 dryrun += 1 |
|
77 elif o in ('-r', '--recurse'): |
|
78 recurse += 1 |
|
79 elif o in ('-n', '--nobackup'): |
|
80 makebackup = False |
|
81 elif o in ('-v', '--verbose'): |
|
82 verbose += 1 |
|
83 elif o in ('-h', '--help'): |
|
84 usage() |
|
85 return |
|
86 if not args: |
|
87 r = Reindenter(sys.stdin) |
|
88 r.run() |
|
89 r.write(sys.stdout) |
|
90 return |
|
91 for arg in args: |
|
92 check(arg) |
|
93 |
|
94 def check(file): |
|
95 if os.path.isdir(file) and not os.path.islink(file): |
|
96 if verbose: |
|
97 print "listing directory", file |
|
98 names = os.listdir(file) |
|
99 for name in names: |
|
100 fullname = os.path.join(file, name) |
|
101 if ((recurse and os.path.isdir(fullname) and |
|
102 not os.path.islink(fullname) and |
|
103 not os.path.split(fullname)[1].startswith(".")) |
|
104 or name.lower().endswith(".py")): |
|
105 check(fullname) |
|
106 return |
|
107 |
|
108 if verbose: |
|
109 print "checking", file, "...", |
|
110 try: |
|
111 f = open(file) |
|
112 except IOError, msg: |
|
113 errprint("%s: I/O Error: %s" % (file, str(msg))) |
|
114 return |
|
115 |
|
116 r = Reindenter(f) |
|
117 f.close() |
|
118 if r.run(): |
|
119 if verbose: |
|
120 print "changed." |
|
121 if dryrun: |
|
122 print "But this is a dry run, so leaving it alone." |
|
123 if not dryrun: |
|
124 bak = file + ".bak" |
|
125 if makebackup: |
|
126 shutil.copyfile(file, bak) |
|
127 if verbose: |
|
128 print "backed up", file, "to", bak |
|
129 f = open(file, "w") |
|
130 r.write(f) |
|
131 f.close() |
|
132 if verbose: |
|
133 print "wrote new", file |
|
134 return True |
|
135 else: |
|
136 if verbose: |
|
137 print "unchanged." |
|
138 return False |
|
139 |
|
140 def _rstrip(line, JUNK='\n \t'): |
|
141 """Return line stripped of trailing spaces, tabs, newlines. |
|
142 |
|
143 Note that line.rstrip() instead also strips sundry control characters, |
|
144 but at least one known Emacs user expects to keep junk like that, not |
|
145 mentioning Barry by name or anything <wink>. |
|
146 """ |
|
147 |
|
148 i = len(line) |
|
149 while i > 0 and line[i-1] in JUNK: |
|
150 i -= 1 |
|
151 return line[:i] |
|
152 |
|
153 class Reindenter: |
|
154 |
|
155 def __init__(self, f): |
|
156 self.find_stmt = 1 # next token begins a fresh stmt? |
|
157 self.level = 0 # current indent level |
|
158 |
|
159 # Raw file lines. |
|
160 self.raw = f.readlines() |
|
161 |
|
162 # File lines, rstripped & tab-expanded. Dummy at start is so |
|
163 # that we can use tokenize's 1-based line numbering easily. |
|
164 # Note that a line is all-blank iff it's "\n". |
|
165 self.lines = [_rstrip(line).expandtabs() + "\n" |
|
166 for line in self.raw] |
|
167 self.lines.insert(0, None) |
|
168 self.index = 1 # index into self.lines of next line |
|
169 |
|
170 # List of (lineno, indentlevel) pairs, one for each stmt and |
|
171 # comment line. indentlevel is -1 for comment lines, as a |
|
172 # signal that tokenize doesn't know what to do about them; |
|
173 # indeed, they're our headache! |
|
174 self.stats = [] |
|
175 |
|
176 def run(self): |
|
177 tokenize.tokenize(self.getline, self.tokeneater) |
|
178 # Remove trailing empty lines. |
|
179 lines = self.lines |
|
180 while lines and lines[-1] == "\n": |
|
181 lines.pop() |
|
182 # Sentinel. |
|
183 stats = self.stats |
|
184 stats.append((len(lines), 0)) |
|
185 # Map count of leading spaces to # we want. |
|
186 have2want = {} |
|
187 # Program after transformation. |
|
188 after = self.after = [] |
|
189 # Copy over initial empty lines -- there's nothing to do until |
|
190 # we see a line with *something* on it. |
|
191 i = stats[0][0] |
|
192 after.extend(lines[1:i]) |
|
193 for i in range(len(stats)-1): |
|
194 thisstmt, thislevel = stats[i] |
|
195 nextstmt = stats[i+1][0] |
|
196 have = getlspace(lines[thisstmt]) |
|
197 want = thislevel * 4 |
|
198 if want < 0: |
|
199 # A comment line. |
|
200 if have: |
|
201 # An indented comment line. If we saw the same |
|
202 # indentation before, reuse what it most recently |
|
203 # mapped to. |
|
204 want = have2want.get(have, -1) |
|
205 if want < 0: |
|
206 # Then it probably belongs to the next real stmt. |
|
207 for j in xrange(i+1, len(stats)-1): |
|
208 jline, jlevel = stats[j] |
|
209 if jlevel >= 0: |
|
210 if have == getlspace(lines[jline]): |
|
211 want = jlevel * 4 |
|
212 break |
|
213 if want < 0: # Maybe it's a hanging |
|
214 # comment like this one, |
|
215 # in which case we should shift it like its base |
|
216 # line got shifted. |
|
217 for j in xrange(i-1, -1, -1): |
|
218 jline, jlevel = stats[j] |
|
219 if jlevel >= 0: |
|
220 want = have + getlspace(after[jline-1]) - \ |
|
221 getlspace(lines[jline]) |
|
222 break |
|
223 if want < 0: |
|
224 # Still no luck -- leave it alone. |
|
225 want = have |
|
226 else: |
|
227 want = 0 |
|
228 assert want >= 0 |
|
229 have2want[have] = want |
|
230 diff = want - have |
|
231 if diff == 0 or have == 0: |
|
232 after.extend(lines[thisstmt:nextstmt]) |
|
233 else: |
|
234 for line in lines[thisstmt:nextstmt]: |
|
235 if diff > 0: |
|
236 if line == "\n": |
|
237 after.append(line) |
|
238 else: |
|
239 after.append(" " * diff + line) |
|
240 else: |
|
241 remove = min(getlspace(line), -diff) |
|
242 after.append(line[remove:]) |
|
243 return self.raw != self.after |
|
244 |
|
245 def write(self, f): |
|
246 f.writelines(self.after) |
|
247 |
|
248 # Line-getter for tokenize. |
|
249 def getline(self): |
|
250 if self.index >= len(self.lines): |
|
251 line = "" |
|
252 else: |
|
253 line = self.lines[self.index] |
|
254 self.index += 1 |
|
255 return line |
|
256 |
|
257 # Line-eater for tokenize. |
|
258 def tokeneater(self, type, token, (sline, scol), end, line, |
|
259 INDENT=tokenize.INDENT, |
|
260 DEDENT=tokenize.DEDENT, |
|
261 NEWLINE=tokenize.NEWLINE, |
|
262 COMMENT=tokenize.COMMENT, |
|
263 NL=tokenize.NL): |
|
264 |
|
265 if type == NEWLINE: |
|
266 # A program statement, or ENDMARKER, will eventually follow, |
|
267 # after some (possibly empty) run of tokens of the form |
|
268 # (NL | COMMENT)* (INDENT | DEDENT+)? |
|
269 self.find_stmt = 1 |
|
270 |
|
271 elif type == INDENT: |
|
272 self.find_stmt = 1 |
|
273 self.level += 1 |
|
274 |
|
275 elif type == DEDENT: |
|
276 self.find_stmt = 1 |
|
277 self.level -= 1 |
|
278 |
|
279 elif type == COMMENT: |
|
280 if self.find_stmt: |
|
281 self.stats.append((sline, -1)) |
|
282 # but we're still looking for a new stmt, so leave |
|
283 # find_stmt alone |
|
284 |
|
285 elif type == NL: |
|
286 pass |
|
287 |
|
288 elif self.find_stmt: |
|
289 # This is the first "real token" following a NEWLINE, so it |
|
290 # must be the first token of the next program statement, or an |
|
291 # ENDMARKER. |
|
292 self.find_stmt = 0 |
|
293 if line: # not endmarker |
|
294 self.stats.append((sline, self.level)) |
|
295 |
|
296 # Count number of leading blanks. |
|
297 def getlspace(line): |
|
298 i, n = 0, len(line) |
|
299 while i < n and line[i] == " ": |
|
300 i += 1 |
|
301 return i |
|
302 |
|
303 if __name__ == '__main__': |
|
304 main() |