|
1 #! /usr/bin/env python |
|
2 |
|
3 # Released to the public domain, by Tim Peters, 03 October 2000. |
|
4 |
|
5 """reindent [-d][-r][-v] [ path ... ] |
|
6 |
|
7 -d (--dryrun) Dry run. Analyze, but don't make any changes to, files. |
|
8 -r (--recurse) Recurse. Search for all .py files in subdirectories too. |
|
9 -v (--verbose) Verbose. Print informative msgs; else no output. |
|
10 -h (--help) Help. Print this usage information and exit. |
|
11 |
|
12 Change Python (.py) files to use 4-space indents and no hard tab characters. |
|
13 Also trim excess spaces and tabs from ends of lines, and remove empty lines |
|
14 at the end of files. Also ensure the last line ends with a newline. |
|
15 |
|
16 If no paths are given on the command line, reindent operates as a filter, |
|
17 reading a single source file from standard input and writing the transformed |
|
18 source to standard output. In this case, the -d, -r and -v flags are |
|
19 ignored. |
|
20 |
|
21 You can pass one or more file and/or directory paths. When a directory |
|
22 path, all .py files within the directory will be examined, and, if the -r |
|
23 option is given, likewise recursively for subdirectories. |
|
24 |
|
25 If output is not to standard output, reindent overwrites files in place, |
|
26 renaming the originals with a .bak extension. If it finds nothing to |
|
27 change, the file is left alone. If reindent does change a file, the changed |
|
28 file is a fixed-point for future runs (i.e., running reindent on the |
|
29 resulting .py file won't change it again). |
|
30 |
|
31 The hard part of reindenting is figuring out what to do with comment |
|
32 lines. So long as the input files get a clean bill of health from |
|
33 tabnanny.py, reindent should do a good job. |
|
34 """ |
|
35 |
|
36 __version__ = "1" |
|
37 |
|
38 import tokenize |
|
39 import os |
|
40 import sys |
|
41 |
|
42 verbose = 0 |
|
43 recurse = 0 |
|
44 dryrun = 0 |
|
45 |
|
46 def usage(msg=None): |
|
47 if msg is not None: |
|
48 print >> sys.stderr, msg |
|
49 print >> sys.stderr, __doc__ |
|
50 |
|
51 def errprint(*args): |
|
52 sep = "" |
|
53 for arg in args: |
|
54 sys.stderr.write(sep + str(arg)) |
|
55 sep = " " |
|
56 sys.stderr.write("\n") |
|
57 |
|
58 def main(): |
|
59 import getopt |
|
60 global verbose, recurse, dryrun |
|
61 try: |
|
62 opts, args = getopt.getopt(sys.argv[1:], "drvh", |
|
63 ["dryrun", "recurse", "verbose", "help"]) |
|
64 except getopt.error, msg: |
|
65 usage(msg) |
|
66 return |
|
67 for o, a in opts: |
|
68 if o in ('-d', '--dryrun'): |
|
69 dryrun += 1 |
|
70 elif o in ('-r', '--recurse'): |
|
71 recurse += 1 |
|
72 elif o in ('-v', '--verbose'): |
|
73 verbose += 1 |
|
74 elif o in ('-h', '--help'): |
|
75 usage() |
|
76 return |
|
77 if not args: |
|
78 r = Reindenter(sys.stdin) |
|
79 r.run() |
|
80 r.write(sys.stdout) |
|
81 return |
|
82 for arg in args: |
|
83 check(arg) |
|
84 |
|
85 def check(file): |
|
86 if os.path.isdir(file) and not os.path.islink(file): |
|
87 if verbose: |
|
88 print "listing directory", file |
|
89 names = os.listdir(file) |
|
90 for name in names: |
|
91 fullname = os.path.join(file, name) |
|
92 if ((recurse and os.path.isdir(fullname) and |
|
93 not os.path.islink(fullname)) |
|
94 or name.lower().endswith(".py")): |
|
95 check(fullname) |
|
96 return |
|
97 |
|
98 if verbose: |
|
99 print "checking", file, "...", |
|
100 try: |
|
101 f = open(file) |
|
102 except IOError, msg: |
|
103 errprint("%s: I/O Error: %s" % (file, str(msg))) |
|
104 return |
|
105 |
|
106 r = Reindenter(f) |
|
107 f.close() |
|
108 if r.run(): |
|
109 if verbose: |
|
110 print "changed." |
|
111 if dryrun: |
|
112 print "But this is a dry run, so leaving it alone." |
|
113 if not dryrun: |
|
114 bak = file + ".bak" |
|
115 if os.path.exists(bak): |
|
116 os.remove(bak) |
|
117 os.rename(file, bak) |
|
118 if verbose: |
|
119 print "renamed", file, "to", bak |
|
120 f = open(file, "w") |
|
121 r.write(f) |
|
122 f.close() |
|
123 if verbose: |
|
124 print "wrote new", file |
|
125 else: |
|
126 if verbose: |
|
127 print "unchanged." |
|
128 |
|
129 def _rstrip(line, JUNK='\n \t'): |
|
130 """Return line stripped of trailing spaces, tabs, newlines. |
|
131 |
|
132 Note that line.rstrip() instead also strips sundry control characters, |
|
133 but at least one known Emacs user expects to keep junk like that, not |
|
134 mentioning Barry by name or anything <wink>. |
|
135 """ |
|
136 |
|
137 i = len(line) |
|
138 while i > 0 and line[i-1] in JUNK: |
|
139 i -= 1 |
|
140 return line[:i] |
|
141 |
|
142 class Reindenter: |
|
143 |
|
144 def __init__(self, f): |
|
145 self.find_stmt = 1 # next token begins a fresh stmt? |
|
146 self.level = 0 # current indent level |
|
147 |
|
148 # Raw file lines. |
|
149 self.raw = f.readlines() |
|
150 |
|
151 # File lines, rstripped & tab-expanded. Dummy at start is so |
|
152 # that we can use tokenize's 1-based line numbering easily. |
|
153 # Note that a line is all-blank iff it's "\n". |
|
154 self.lines = [_rstrip(line).expandtabs() + "\n" |
|
155 for line in self.raw] |
|
156 self.lines.insert(0, None) |
|
157 self.index = 1 # index into self.lines of next line |
|
158 |
|
159 # List of (lineno, indentlevel) pairs, one for each stmt and |
|
160 # comment line. indentlevel is -1 for comment lines, as a |
|
161 # signal that tokenize doesn't know what to do about them; |
|
162 # indeed, they're our headache! |
|
163 self.stats = [] |
|
164 |
|
165 def run(self): |
|
166 tokenize.tokenize(self.getline, self.tokeneater) |
|
167 # Remove trailing empty lines. |
|
168 lines = self.lines |
|
169 while lines and lines[-1] == "\n": |
|
170 lines.pop() |
|
171 # Sentinel. |
|
172 stats = self.stats |
|
173 stats.append((len(lines), 0)) |
|
174 # Map count of leading spaces to # we want. |
|
175 have2want = {} |
|
176 # Program after transformation. |
|
177 after = self.after = [] |
|
178 # Copy over initial empty lines -- there's nothing to do until |
|
179 # we see a line with *something* on it. |
|
180 i = stats[0][0] |
|
181 after.extend(lines[1:i]) |
|
182 for i in range(len(stats)-1): |
|
183 thisstmt, thislevel = stats[i] |
|
184 nextstmt = stats[i+1][0] |
|
185 have = getlspace(lines[thisstmt]) |
|
186 want = thislevel * 4 |
|
187 if want < 0: |
|
188 # A comment line. |
|
189 if have: |
|
190 # An indented comment line. If we saw the same |
|
191 # indentation before, reuse what it most recently |
|
192 # mapped to. |
|
193 want = have2want.get(have, -1) |
|
194 if want < 0: |
|
195 # Then it probably belongs to the next real stmt. |
|
196 for j in xrange(i+1, len(stats)-1): |
|
197 jline, jlevel = stats[j] |
|
198 if jlevel >= 0: |
|
199 if have == getlspace(lines[jline]): |
|
200 want = jlevel * 4 |
|
201 break |
|
202 if want < 0: # Maybe it's a hanging |
|
203 # comment like this one, |
|
204 # in which case we should shift it like its base |
|
205 # line got shifted. |
|
206 for j in xrange(i-1, -1, -1): |
|
207 jline, jlevel = stats[j] |
|
208 if jlevel >= 0: |
|
209 want = have + getlspace(after[jline-1]) - \ |
|
210 getlspace(lines[jline]) |
|
211 break |
|
212 if want < 0: |
|
213 # Still no luck -- leave it alone. |
|
214 want = have |
|
215 else: |
|
216 want = 0 |
|
217 assert want >= 0 |
|
218 have2want[have] = want |
|
219 diff = want - have |
|
220 if diff == 0 or have == 0: |
|
221 after.extend(lines[thisstmt:nextstmt]) |
|
222 else: |
|
223 for line in lines[thisstmt:nextstmt]: |
|
224 if diff > 0: |
|
225 if line == "\n": |
|
226 after.append(line) |
|
227 else: |
|
228 after.append(" " * diff + line) |
|
229 else: |
|
230 remove = min(getlspace(line), -diff) |
|
231 after.append(line[remove:]) |
|
232 return self.raw != self.after |
|
233 |
|
234 def write(self, f): |
|
235 f.writelines(self.after) |
|
236 |
|
237 # Line-getter for tokenize. |
|
238 def getline(self): |
|
239 if self.index >= len(self.lines): |
|
240 line = "" |
|
241 else: |
|
242 line = self.lines[self.index] |
|
243 self.index += 1 |
|
244 return line |
|
245 |
|
246 # Line-eater for tokenize. |
|
247 def tokeneater(self, type, token, (sline, scol), end, line, |
|
248 INDENT=tokenize.INDENT, |
|
249 DEDENT=tokenize.DEDENT, |
|
250 NEWLINE=tokenize.NEWLINE, |
|
251 COMMENT=tokenize.COMMENT, |
|
252 NL=tokenize.NL): |
|
253 |
|
254 if type == NEWLINE: |
|
255 # A program statement, or ENDMARKER, will eventually follow, |
|
256 # after some (possibly empty) run of tokens of the form |
|
257 # (NL | COMMENT)* (INDENT | DEDENT+)? |
|
258 self.find_stmt = 1 |
|
259 |
|
260 elif type == INDENT: |
|
261 self.find_stmt = 1 |
|
262 self.level += 1 |
|
263 |
|
264 elif type == DEDENT: |
|
265 self.find_stmt = 1 |
|
266 self.level -= 1 |
|
267 |
|
268 elif type == COMMENT: |
|
269 if self.find_stmt: |
|
270 self.stats.append((sline, -1)) |
|
271 # but we're still looking for a new stmt, so leave |
|
272 # find_stmt alone |
|
273 |
|
274 elif type == NL: |
|
275 pass |
|
276 |
|
277 elif self.find_stmt: |
|
278 # This is the first "real token" following a NEWLINE, so it |
|
279 # must be the first token of the next program statement, or an |
|
280 # ENDMARKER. |
|
281 self.find_stmt = 0 |
|
282 if line: # not endmarker |
|
283 self.stats.append((sline, self.level)) |
|
284 |
|
285 # Count number of leading blanks. |
|
286 def getlspace(line): |
|
287 i, n = 0, len(line) |
|
288 while i < n and line[i] == " ": |
|
289 i += 1 |
|
290 return i |
|
291 |
|
292 if __name__ == '__main__': |
|
293 main() |