python-2.5.2/win32/Tools/Scripts/reindent.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 #! /usr/bin/env python
       
     2 
       
     3 # Released to the public domain, by Tim Peters, 03 October 2000.
       
     4 
       
     5 """reindent [-d][-r][-v] [ path ... ]
       
     6 
       
     7 -d (--dryrun)  Dry run.  Analyze, but don't make any changes to, files.
       
     8 -r (--recurse) Recurse.  Search for all .py files in subdirectories too.
       
     9 -v (--verbose) Verbose.  Print informative msgs; else no output.
       
    10 -h (--help)    Help.     Print this usage information and exit.
       
    11 
       
    12 Change Python (.py) files to use 4-space indents and no hard tab characters.
       
    13 Also trim excess spaces and tabs from ends of lines, and remove empty lines
       
    14 at the end of files.  Also ensure the last line ends with a newline.
       
    15 
       
    16 If no paths are given on the command line, reindent operates as a filter,
       
    17 reading a single source file from standard input and writing the transformed
       
    18 source to standard output.  In this case, the -d, -r and -v flags are
       
    19 ignored.
       
    20 
       
    21 You can pass one or more file and/or directory paths.  When a directory
       
    22 path, all .py files within the directory will be examined, and, if the -r
       
    23 option is given, likewise recursively for subdirectories.
       
    24 
       
    25 If output is not to standard output, reindent overwrites files in place,
       
    26 renaming the originals with a .bak extension.  If it finds nothing to
       
    27 change, the file is left alone.  If reindent does change a file, the changed
       
    28 file is a fixed-point for future runs (i.e., running reindent on the
       
    29 resulting .py file won't change it again).
       
    30 
       
    31 The hard part of reindenting is figuring out what to do with comment
       
    32 lines.  So long as the input files get a clean bill of health from
       
    33 tabnanny.py, reindent should do a good job.
       
    34 """
       
    35 
       
    36 __version__ = "1"
       
    37 
       
    38 import tokenize
       
    39 import os
       
    40 import sys
       
    41 
       
    42 verbose = 0
       
    43 recurse = 0
       
    44 dryrun  = 0
       
    45 
       
    46 def usage(msg=None):
       
    47     if msg is not None:
       
    48         print >> sys.stderr, msg
       
    49     print >> sys.stderr, __doc__
       
    50 
       
    51 def errprint(*args):
       
    52     sep = ""
       
    53     for arg in args:
       
    54         sys.stderr.write(sep + str(arg))
       
    55         sep = " "
       
    56     sys.stderr.write("\n")
       
    57 
       
    58 def main():
       
    59     import getopt
       
    60     global verbose, recurse, dryrun
       
    61     try:
       
    62         opts, args = getopt.getopt(sys.argv[1:], "drvh",
       
    63                                    ["dryrun", "recurse", "verbose", "help"])
       
    64     except getopt.error, msg:
       
    65         usage(msg)
       
    66         return
       
    67     for o, a in opts:
       
    68         if o in ('-d', '--dryrun'):
       
    69             dryrun += 1
       
    70         elif o in ('-r', '--recurse'):
       
    71             recurse += 1
       
    72         elif o in ('-v', '--verbose'):
       
    73             verbose += 1
       
    74         elif o in ('-h', '--help'):
       
    75             usage()
       
    76             return
       
    77     if not args:
       
    78         r = Reindenter(sys.stdin)
       
    79         r.run()
       
    80         r.write(sys.stdout)
       
    81         return
       
    82     for arg in args:
       
    83         check(arg)
       
    84 
       
    85 def check(file):
       
    86     if os.path.isdir(file) and not os.path.islink(file):
       
    87         if verbose:
       
    88             print "listing directory", file
       
    89         names = os.listdir(file)
       
    90         for name in names:
       
    91             fullname = os.path.join(file, name)
       
    92             if ((recurse and os.path.isdir(fullname) and
       
    93                  not os.path.islink(fullname))
       
    94                 or name.lower().endswith(".py")):
       
    95                 check(fullname)
       
    96         return
       
    97 
       
    98     if verbose:
       
    99         print "checking", file, "...",
       
   100     try:
       
   101         f = open(file)
       
   102     except IOError, msg:
       
   103         errprint("%s: I/O Error: %s" % (file, str(msg)))
       
   104         return
       
   105 
       
   106     r = Reindenter(f)
       
   107     f.close()
       
   108     if r.run():
       
   109         if verbose:
       
   110             print "changed."
       
   111             if dryrun:
       
   112                 print "But this is a dry run, so leaving it alone."
       
   113         if not dryrun:
       
   114             bak = file + ".bak"
       
   115             if os.path.exists(bak):
       
   116                 os.remove(bak)
       
   117             os.rename(file, bak)
       
   118             if verbose:
       
   119                 print "renamed", file, "to", bak
       
   120             f = open(file, "w")
       
   121             r.write(f)
       
   122             f.close()
       
   123             if verbose:
       
   124                 print "wrote new", file
       
   125     else:
       
   126         if verbose:
       
   127             print "unchanged."
       
   128 
       
   129 def _rstrip(line, JUNK='\n \t'):
       
   130     """Return line stripped of trailing spaces, tabs, newlines.
       
   131 
       
   132     Note that line.rstrip() instead also strips sundry control characters,
       
   133     but at least one known Emacs user expects to keep junk like that, not
       
   134     mentioning Barry by name or anything <wink>.
       
   135     """
       
   136 
       
   137     i = len(line)
       
   138     while i > 0 and line[i-1] in JUNK:
       
   139         i -= 1
       
   140     return line[:i]
       
   141 
       
   142 class Reindenter:
       
   143 
       
   144     def __init__(self, f):
       
   145         self.find_stmt = 1  # next token begins a fresh stmt?
       
   146         self.level = 0      # current indent level
       
   147 
       
   148         # Raw file lines.
       
   149         self.raw = f.readlines()
       
   150 
       
   151         # File lines, rstripped & tab-expanded.  Dummy at start is so
       
   152         # that we can use tokenize's 1-based line numbering easily.
       
   153         # Note that a line is all-blank iff it's "\n".
       
   154         self.lines = [_rstrip(line).expandtabs() + "\n"
       
   155                       for line in self.raw]
       
   156         self.lines.insert(0, None)
       
   157         self.index = 1  # index into self.lines of next line
       
   158 
       
   159         # List of (lineno, indentlevel) pairs, one for each stmt and
       
   160         # comment line.  indentlevel is -1 for comment lines, as a
       
   161         # signal that tokenize doesn't know what to do about them;
       
   162         # indeed, they're our headache!
       
   163         self.stats = []
       
   164 
       
   165     def run(self):
       
   166         tokenize.tokenize(self.getline, self.tokeneater)
       
   167         # Remove trailing empty lines.
       
   168         lines = self.lines
       
   169         while lines and lines[-1] == "\n":
       
   170             lines.pop()
       
   171         # Sentinel.
       
   172         stats = self.stats
       
   173         stats.append((len(lines), 0))
       
   174         # Map count of leading spaces to # we want.
       
   175         have2want = {}
       
   176         # Program after transformation.
       
   177         after = self.after = []
       
   178         # Copy over initial empty lines -- there's nothing to do until
       
   179         # we see a line with *something* on it.
       
   180         i = stats[0][0]
       
   181         after.extend(lines[1:i])
       
   182         for i in range(len(stats)-1):
       
   183             thisstmt, thislevel = stats[i]
       
   184             nextstmt = stats[i+1][0]
       
   185             have = getlspace(lines[thisstmt])
       
   186             want = thislevel * 4
       
   187             if want < 0:
       
   188                 # A comment line.
       
   189                 if have:
       
   190                     # An indented comment line.  If we saw the same
       
   191                     # indentation before, reuse what it most recently
       
   192                     # mapped to.
       
   193                     want = have2want.get(have, -1)
       
   194                     if want < 0:
       
   195                         # Then it probably belongs to the next real stmt.
       
   196                         for j in xrange(i+1, len(stats)-1):
       
   197                             jline, jlevel = stats[j]
       
   198                             if jlevel >= 0:
       
   199                                 if have == getlspace(lines[jline]):
       
   200                                     want = jlevel * 4
       
   201                                 break
       
   202                     if want < 0:           # Maybe it's a hanging
       
   203                                            # comment like this one,
       
   204                         # in which case we should shift it like its base
       
   205                         # line got shifted.
       
   206                         for j in xrange(i-1, -1, -1):
       
   207                             jline, jlevel = stats[j]
       
   208                             if jlevel >= 0:
       
   209                                 want = have + getlspace(after[jline-1]) - \
       
   210                                        getlspace(lines[jline])
       
   211                                 break
       
   212                     if want < 0:
       
   213                         # Still no luck -- leave it alone.
       
   214                         want = have
       
   215                 else:
       
   216                     want = 0
       
   217             assert want >= 0
       
   218             have2want[have] = want
       
   219             diff = want - have
       
   220             if diff == 0 or have == 0:
       
   221                 after.extend(lines[thisstmt:nextstmt])
       
   222             else:
       
   223                 for line in lines[thisstmt:nextstmt]:
       
   224                     if diff > 0:
       
   225                         if line == "\n":
       
   226                             after.append(line)
       
   227                         else:
       
   228                             after.append(" " * diff + line)
       
   229                     else:
       
   230                         remove = min(getlspace(line), -diff)
       
   231                         after.append(line[remove:])
       
   232         return self.raw != self.after
       
   233 
       
   234     def write(self, f):
       
   235         f.writelines(self.after)
       
   236 
       
   237     # Line-getter for tokenize.
       
   238     def getline(self):
       
   239         if self.index >= len(self.lines):
       
   240             line = ""
       
   241         else:
       
   242             line = self.lines[self.index]
       
   243             self.index += 1
       
   244         return line
       
   245 
       
   246     # Line-eater for tokenize.
       
   247     def tokeneater(self, type, token, (sline, scol), end, line,
       
   248                    INDENT=tokenize.INDENT,
       
   249                    DEDENT=tokenize.DEDENT,
       
   250                    NEWLINE=tokenize.NEWLINE,
       
   251                    COMMENT=tokenize.COMMENT,
       
   252                    NL=tokenize.NL):
       
   253 
       
   254         if type == NEWLINE:
       
   255             # A program statement, or ENDMARKER, will eventually follow,
       
   256             # after some (possibly empty) run of tokens of the form
       
   257             #     (NL | COMMENT)* (INDENT | DEDENT+)?
       
   258             self.find_stmt = 1
       
   259 
       
   260         elif type == INDENT:
       
   261             self.find_stmt = 1
       
   262             self.level += 1
       
   263 
       
   264         elif type == DEDENT:
       
   265             self.find_stmt = 1
       
   266             self.level -= 1
       
   267 
       
   268         elif type == COMMENT:
       
   269             if self.find_stmt:
       
   270                 self.stats.append((sline, -1))
       
   271                 # but we're still looking for a new stmt, so leave
       
   272                 # find_stmt alone
       
   273 
       
   274         elif type == NL:
       
   275             pass
       
   276 
       
   277         elif self.find_stmt:
       
   278             # This is the first "real token" following a NEWLINE, so it
       
   279             # must be the first token of the next program statement, or an
       
   280             # ENDMARKER.
       
   281             self.find_stmt = 0
       
   282             if line:   # not endmarker
       
   283                 self.stats.append((sline, self.level))
       
   284 
       
   285 # Count number of leading blanks.
       
   286 def getlspace(line):
       
   287     i, n = 0, len(line)
       
   288     while i < n and line[i] == " ":
       
   289         i += 1
       
   290     return i
       
   291 
       
   292 if __name__ == '__main__':
       
   293     main()