|
1 #! /usr/bin/env python |
|
2 |
|
3 # This file contains a class and a main program that perform three |
|
4 # related (though complimentary) formatting operations on Python |
|
5 # programs. When called as "pindent -c", it takes a valid Python |
|
6 # program as input and outputs a version augmented with block-closing |
|
7 # comments. When called as "pindent -d", it assumes its input is a |
|
8 # Python program with block-closing comments and outputs a commentless |
|
9 # version. When called as "pindent -r" it assumes its input is a |
|
10 # Python program with block-closing comments but with its indentation |
|
11 # messed up, and outputs a properly indented version. |
|
12 |
|
13 # A "block-closing comment" is a comment of the form '# end <keyword>' |
|
14 # where <keyword> is the keyword that opened the block. If the |
|
15 # opening keyword is 'def' or 'class', the function or class name may |
|
16 # be repeated in the block-closing comment as well. Here is an |
|
17 # example of a program fully augmented with block-closing comments: |
|
18 |
|
19 # def foobar(a, b): |
|
20 # if a == b: |
|
21 # a = a+1 |
|
22 # elif a < b: |
|
23 # b = b-1 |
|
24 # if b > a: a = a-1 |
|
25 # # end if |
|
26 # else: |
|
27 # print 'oops!' |
|
28 # # end if |
|
29 # # end def foobar |
|
30 |
|
31 # Note that only the last part of an if...elif...else... block needs a |
|
32 # block-closing comment; the same is true for other compound |
|
33 # statements (e.g. try...except). Also note that "short-form" blocks |
|
34 # like the second 'if' in the example must be closed as well; |
|
35 # otherwise the 'else' in the example would be ambiguous (remember |
|
36 # that indentation is not significant when interpreting block-closing |
|
37 # comments). |
|
38 |
|
39 # The operations are idempotent (i.e. applied to their own output |
|
40 # they yield an identical result). Running first "pindent -c" and |
|
41 # then "pindent -r" on a valid Python program produces a program that |
|
42 # is semantically identical to the input (though its indentation may |
|
43 # be different). Running "pindent -e" on that output produces a |
|
44 # program that only differs from the original in indentation. |
|
45 |
|
46 # Other options: |
|
47 # -s stepsize: set the indentation step size (default 8) |
|
48 # -t tabsize : set the number of spaces a tab character is worth (default 8) |
|
49 # -e : expand TABs into spaces |
|
50 # file ... : input file(s) (default standard input) |
|
51 # The results always go to standard output |
|
52 |
|
53 # Caveats: |
|
54 # - comments ending in a backslash will be mistaken for continued lines |
|
55 # - continuations using backslash are always left unchanged |
|
56 # - continuations inside parentheses are not extra indented by -r |
|
57 # but must be indented for -c to work correctly (this breaks |
|
58 # idempotency!) |
|
59 # - continued lines inside triple-quoted strings are totally garbled |
|
60 |
|
61 # Secret feature: |
|
62 # - On input, a block may also be closed with an "end statement" -- |
|
63 # this is a block-closing comment without the '#' sign. |
|
64 |
|
65 # Possible improvements: |
|
66 # - check syntax based on transitions in 'next' table |
|
67 # - better error reporting |
|
68 # - better error recovery |
|
69 # - check identifier after class/def |
|
70 |
|
71 # The following wishes need a more complete tokenization of the source: |
|
72 # - Don't get fooled by comments ending in backslash |
|
73 # - reindent continuation lines indicated by backslash |
|
74 # - handle continuation lines inside parentheses/braces/brackets |
|
75 # - handle triple quoted strings spanning lines |
|
76 # - realign comments |
|
77 # - optionally do much more thorough reformatting, a la C indent |
|
78 |
|
79 # Defaults |
|
80 STEPSIZE = 8 |
|
81 TABSIZE = 8 |
|
82 EXPANDTABS = 0 |
|
83 |
|
84 import re |
|
85 import sys |
|
86 |
|
87 next = {} |
|
88 next['if'] = next['elif'] = 'elif', 'else', 'end' |
|
89 next['while'] = next['for'] = 'else', 'end' |
|
90 next['try'] = 'except', 'finally' |
|
91 next['except'] = 'except', 'else', 'end' |
|
92 next['else'] = next['finally'] = next['def'] = next['class'] = 'end' |
|
93 next['end'] = () |
|
94 start = 'if', 'while', 'for', 'try', 'def', 'class' |
|
95 |
|
96 class PythonIndenter: |
|
97 |
|
98 def __init__(self, fpi = sys.stdin, fpo = sys.stdout, |
|
99 indentsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): |
|
100 self.fpi = fpi |
|
101 self.fpo = fpo |
|
102 self.indentsize = indentsize |
|
103 self.tabsize = tabsize |
|
104 self.lineno = 0 |
|
105 self.expandtabs = expandtabs |
|
106 self._write = fpo.write |
|
107 self.kwprog = re.compile( |
|
108 r'^\s*(?P<kw>[a-z]+)' |
|
109 r'(\s+(?P<id>[a-zA-Z_]\w*))?' |
|
110 r'[^\w]') |
|
111 self.endprog = re.compile( |
|
112 r'^\s*#?\s*end\s+(?P<kw>[a-z]+)' |
|
113 r'(\s+(?P<id>[a-zA-Z_]\w*))?' |
|
114 r'[^\w]') |
|
115 self.wsprog = re.compile(r'^[ \t]*') |
|
116 # end def __init__ |
|
117 |
|
118 def write(self, line): |
|
119 if self.expandtabs: |
|
120 self._write(line.expandtabs(self.tabsize)) |
|
121 else: |
|
122 self._write(line) |
|
123 # end if |
|
124 # end def write |
|
125 |
|
126 def readline(self): |
|
127 line = self.fpi.readline() |
|
128 if line: self.lineno = self.lineno + 1 |
|
129 # end if |
|
130 return line |
|
131 # end def readline |
|
132 |
|
133 def error(self, fmt, *args): |
|
134 if args: fmt = fmt % args |
|
135 # end if |
|
136 sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt)) |
|
137 self.write('### %s ###\n' % fmt) |
|
138 # end def error |
|
139 |
|
140 def getline(self): |
|
141 line = self.readline() |
|
142 while line[-2:] == '\\\n': |
|
143 line2 = self.readline() |
|
144 if not line2: break |
|
145 # end if |
|
146 line = line + line2 |
|
147 # end while |
|
148 return line |
|
149 # end def getline |
|
150 |
|
151 def putline(self, line, indent = None): |
|
152 if indent is None: |
|
153 self.write(line) |
|
154 return |
|
155 # end if |
|
156 tabs, spaces = divmod(indent*self.indentsize, self.tabsize) |
|
157 i = 0 |
|
158 m = self.wsprog.match(line) |
|
159 if m: i = m.end() |
|
160 # end if |
|
161 self.write('\t'*tabs + ' '*spaces + line[i:]) |
|
162 # end def putline |
|
163 |
|
164 def reformat(self): |
|
165 stack = [] |
|
166 while 1: |
|
167 line = self.getline() |
|
168 if not line: break # EOF |
|
169 # end if |
|
170 m = self.endprog.match(line) |
|
171 if m: |
|
172 kw = 'end' |
|
173 kw2 = m.group('kw') |
|
174 if not stack: |
|
175 self.error('unexpected end') |
|
176 elif stack[-1][0] != kw2: |
|
177 self.error('unmatched end') |
|
178 # end if |
|
179 del stack[-1:] |
|
180 self.putline(line, len(stack)) |
|
181 continue |
|
182 # end if |
|
183 m = self.kwprog.match(line) |
|
184 if m: |
|
185 kw = m.group('kw') |
|
186 if kw in start: |
|
187 self.putline(line, len(stack)) |
|
188 stack.append((kw, kw)) |
|
189 continue |
|
190 # end if |
|
191 if next.has_key(kw) and stack: |
|
192 self.putline(line, len(stack)-1) |
|
193 kwa, kwb = stack[-1] |
|
194 stack[-1] = kwa, kw |
|
195 continue |
|
196 # end if |
|
197 # end if |
|
198 self.putline(line, len(stack)) |
|
199 # end while |
|
200 if stack: |
|
201 self.error('unterminated keywords') |
|
202 for kwa, kwb in stack: |
|
203 self.write('\t%s\n' % kwa) |
|
204 # end for |
|
205 # end if |
|
206 # end def reformat |
|
207 |
|
208 def delete(self): |
|
209 begin_counter = 0 |
|
210 end_counter = 0 |
|
211 while 1: |
|
212 line = self.getline() |
|
213 if not line: break # EOF |
|
214 # end if |
|
215 m = self.endprog.match(line) |
|
216 if m: |
|
217 end_counter = end_counter + 1 |
|
218 continue |
|
219 # end if |
|
220 m = self.kwprog.match(line) |
|
221 if m: |
|
222 kw = m.group('kw') |
|
223 if kw in start: |
|
224 begin_counter = begin_counter + 1 |
|
225 # end if |
|
226 # end if |
|
227 self.putline(line) |
|
228 # end while |
|
229 if begin_counter - end_counter < 0: |
|
230 sys.stderr.write('Warning: input contained more end tags than expected\n') |
|
231 elif begin_counter - end_counter > 0: |
|
232 sys.stderr.write('Warning: input contained less end tags than expected\n') |
|
233 # end if |
|
234 # end def delete |
|
235 |
|
236 def complete(self): |
|
237 self.indentsize = 1 |
|
238 stack = [] |
|
239 todo = [] |
|
240 thisid = '' |
|
241 current, firstkw, lastkw, topid = 0, '', '', '' |
|
242 while 1: |
|
243 line = self.getline() |
|
244 i = 0 |
|
245 m = self.wsprog.match(line) |
|
246 if m: i = m.end() |
|
247 # end if |
|
248 m = self.endprog.match(line) |
|
249 if m: |
|
250 thiskw = 'end' |
|
251 endkw = m.group('kw') |
|
252 thisid = m.group('id') |
|
253 else: |
|
254 m = self.kwprog.match(line) |
|
255 if m: |
|
256 thiskw = m.group('kw') |
|
257 if not next.has_key(thiskw): |
|
258 thiskw = '' |
|
259 # end if |
|
260 if thiskw in ('def', 'class'): |
|
261 thisid = m.group('id') |
|
262 else: |
|
263 thisid = '' |
|
264 # end if |
|
265 elif line[i:i+1] in ('\n', '#'): |
|
266 todo.append(line) |
|
267 continue |
|
268 else: |
|
269 thiskw = '' |
|
270 # end if |
|
271 # end if |
|
272 indent = len(line[:i].expandtabs(self.tabsize)) |
|
273 while indent < current: |
|
274 if firstkw: |
|
275 if topid: |
|
276 s = '# end %s %s\n' % ( |
|
277 firstkw, topid) |
|
278 else: |
|
279 s = '# end %s\n' % firstkw |
|
280 # end if |
|
281 self.putline(s, current) |
|
282 firstkw = lastkw = '' |
|
283 # end if |
|
284 current, firstkw, lastkw, topid = stack[-1] |
|
285 del stack[-1] |
|
286 # end while |
|
287 if indent == current and firstkw: |
|
288 if thiskw == 'end': |
|
289 if endkw != firstkw: |
|
290 self.error('mismatched end') |
|
291 # end if |
|
292 firstkw = lastkw = '' |
|
293 elif not thiskw or thiskw in start: |
|
294 if topid: |
|
295 s = '# end %s %s\n' % ( |
|
296 firstkw, topid) |
|
297 else: |
|
298 s = '# end %s\n' % firstkw |
|
299 # end if |
|
300 self.putline(s, current) |
|
301 firstkw = lastkw = topid = '' |
|
302 # end if |
|
303 # end if |
|
304 if indent > current: |
|
305 stack.append((current, firstkw, lastkw, topid)) |
|
306 if thiskw and thiskw not in start: |
|
307 # error |
|
308 thiskw = '' |
|
309 # end if |
|
310 current, firstkw, lastkw, topid = \ |
|
311 indent, thiskw, thiskw, thisid |
|
312 # end if |
|
313 if thiskw: |
|
314 if thiskw in start: |
|
315 firstkw = lastkw = thiskw |
|
316 topid = thisid |
|
317 else: |
|
318 lastkw = thiskw |
|
319 # end if |
|
320 # end if |
|
321 for l in todo: self.write(l) |
|
322 # end for |
|
323 todo = [] |
|
324 if not line: break |
|
325 # end if |
|
326 self.write(line) |
|
327 # end while |
|
328 # end def complete |
|
329 |
|
330 # end class PythonIndenter |
|
331 |
|
332 # Simplified user interface |
|
333 # - xxx_filter(input, output): read and write file objects |
|
334 # - xxx_string(s): take and return string object |
|
335 # - xxx_file(filename): process file in place, return true iff changed |
|
336 |
|
337 def complete_filter(input = sys.stdin, output = sys.stdout, |
|
338 stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): |
|
339 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) |
|
340 pi.complete() |
|
341 # end def complete_filter |
|
342 |
|
343 def delete_filter(input= sys.stdin, output = sys.stdout, |
|
344 stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): |
|
345 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) |
|
346 pi.delete() |
|
347 # end def delete_filter |
|
348 |
|
349 def reformat_filter(input = sys.stdin, output = sys.stdout, |
|
350 stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): |
|
351 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) |
|
352 pi.reformat() |
|
353 # end def reformat_filter |
|
354 |
|
355 class StringReader: |
|
356 def __init__(self, buf): |
|
357 self.buf = buf |
|
358 self.pos = 0 |
|
359 self.len = len(self.buf) |
|
360 # end def __init__ |
|
361 def read(self, n = 0): |
|
362 if n <= 0: |
|
363 n = self.len - self.pos |
|
364 else: |
|
365 n = min(n, self.len - self.pos) |
|
366 # end if |
|
367 r = self.buf[self.pos : self.pos + n] |
|
368 self.pos = self.pos + n |
|
369 return r |
|
370 # end def read |
|
371 def readline(self): |
|
372 i = self.buf.find('\n', self.pos) |
|
373 return self.read(i + 1 - self.pos) |
|
374 # end def readline |
|
375 def readlines(self): |
|
376 lines = [] |
|
377 line = self.readline() |
|
378 while line: |
|
379 lines.append(line) |
|
380 line = self.readline() |
|
381 # end while |
|
382 return lines |
|
383 # end def readlines |
|
384 # seek/tell etc. are left as an exercise for the reader |
|
385 # end class StringReader |
|
386 |
|
387 class StringWriter: |
|
388 def __init__(self): |
|
389 self.buf = '' |
|
390 # end def __init__ |
|
391 def write(self, s): |
|
392 self.buf = self.buf + s |
|
393 # end def write |
|
394 def getvalue(self): |
|
395 return self.buf |
|
396 # end def getvalue |
|
397 # end class StringWriter |
|
398 |
|
399 def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): |
|
400 input = StringReader(source) |
|
401 output = StringWriter() |
|
402 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) |
|
403 pi.complete() |
|
404 return output.getvalue() |
|
405 # end def complete_string |
|
406 |
|
407 def delete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): |
|
408 input = StringReader(source) |
|
409 output = StringWriter() |
|
410 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) |
|
411 pi.delete() |
|
412 return output.getvalue() |
|
413 # end def delete_string |
|
414 |
|
415 def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): |
|
416 input = StringReader(source) |
|
417 output = StringWriter() |
|
418 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) |
|
419 pi.reformat() |
|
420 return output.getvalue() |
|
421 # end def reformat_string |
|
422 |
|
423 def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): |
|
424 source = open(filename, 'r').read() |
|
425 result = complete_string(source, stepsize, tabsize, expandtabs) |
|
426 if source == result: return 0 |
|
427 # end if |
|
428 import os |
|
429 try: os.rename(filename, filename + '~') |
|
430 except os.error: pass |
|
431 # end try |
|
432 f = open(filename, 'w') |
|
433 f.write(result) |
|
434 f.close() |
|
435 return 1 |
|
436 # end def complete_file |
|
437 |
|
438 def delete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): |
|
439 source = open(filename, 'r').read() |
|
440 result = delete_string(source, stepsize, tabsize, expandtabs) |
|
441 if source == result: return 0 |
|
442 # end if |
|
443 import os |
|
444 try: os.rename(filename, filename + '~') |
|
445 except os.error: pass |
|
446 # end try |
|
447 f = open(filename, 'w') |
|
448 f.write(result) |
|
449 f.close() |
|
450 return 1 |
|
451 # end def delete_file |
|
452 |
|
453 def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): |
|
454 source = open(filename, 'r').read() |
|
455 result = reformat_string(source, stepsize, tabsize, expandtabs) |
|
456 if source == result: return 0 |
|
457 # end if |
|
458 import os |
|
459 try: os.rename(filename, filename + '~') |
|
460 except os.error: pass |
|
461 # end try |
|
462 f = open(filename, 'w') |
|
463 f.write(result) |
|
464 f.close() |
|
465 return 1 |
|
466 # end def reformat_file |
|
467 |
|
468 # Test program when called as a script |
|
469 |
|
470 usage = """ |
|
471 usage: pindent (-c|-d|-r) [-s stepsize] [-t tabsize] [-e] [file] ... |
|
472 -c : complete a correctly indented program (add #end directives) |
|
473 -d : delete #end directives |
|
474 -r : reformat a completed program (use #end directives) |
|
475 -s stepsize: indentation step (default %(STEPSIZE)d) |
|
476 -t tabsize : the worth in spaces of a tab (default %(TABSIZE)d) |
|
477 -e : expand TABs into spaces (defailt OFF) |
|
478 [file] ... : files are changed in place, with backups in file~ |
|
479 If no files are specified or a single - is given, |
|
480 the program acts as a filter (reads stdin, writes stdout). |
|
481 """ % vars() |
|
482 |
|
483 def error_both(op1, op2): |
|
484 sys.stderr.write('Error: You can not specify both '+op1+' and -'+op2[0]+' at the same time\n') |
|
485 sys.stderr.write(usage) |
|
486 sys.exit(2) |
|
487 # end def error_both |
|
488 |
|
489 def test(): |
|
490 import getopt |
|
491 try: |
|
492 opts, args = getopt.getopt(sys.argv[1:], 'cdrs:t:e') |
|
493 except getopt.error, msg: |
|
494 sys.stderr.write('Error: %s\n' % msg) |
|
495 sys.stderr.write(usage) |
|
496 sys.exit(2) |
|
497 # end try |
|
498 action = None |
|
499 stepsize = STEPSIZE |
|
500 tabsize = TABSIZE |
|
501 expandtabs = EXPANDTABS |
|
502 for o, a in opts: |
|
503 if o == '-c': |
|
504 if action: error_both(o, action) |
|
505 # end if |
|
506 action = 'complete' |
|
507 elif o == '-d': |
|
508 if action: error_both(o, action) |
|
509 # end if |
|
510 action = 'delete' |
|
511 elif o == '-r': |
|
512 if action: error_both(o, action) |
|
513 # end if |
|
514 action = 'reformat' |
|
515 elif o == '-s': |
|
516 stepsize = int(a) |
|
517 elif o == '-t': |
|
518 tabsize = int(a) |
|
519 elif o == '-e': |
|
520 expandtabs = 1 |
|
521 # end if |
|
522 # end for |
|
523 if not action: |
|
524 sys.stderr.write( |
|
525 'You must specify -c(omplete), -d(elete) or -r(eformat)\n') |
|
526 sys.stderr.write(usage) |
|
527 sys.exit(2) |
|
528 # end if |
|
529 if not args or args == ['-']: |
|
530 action = eval(action + '_filter') |
|
531 action(sys.stdin, sys.stdout, stepsize, tabsize, expandtabs) |
|
532 else: |
|
533 action = eval(action + '_file') |
|
534 for filename in args: |
|
535 action(filename, stepsize, tabsize, expandtabs) |
|
536 # end for |
|
537 # end if |
|
538 # end def test |
|
539 |
|
540 if __name__ == '__main__': |
|
541 test() |
|
542 # end if |