python-2.5.2/win32/Lib/tabnanny.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 #! /usr/bin/env python
       
     2 
       
     3 """The Tab Nanny despises ambiguous indentation.  She knows no mercy.
       
     4 
       
     5 tabnanny -- Detection of ambiguous indentation
       
     6 
       
     7 For the time being this module is intended to be called as a script.
       
     8 However it is possible to import it into an IDE and use the function
       
     9 check() described below.
       
    10 
       
    11 Warning: The API provided by this module is likely to change in future
       
    12 releases; such changes may not be backward compatible.
       
    13 """
       
    14 
       
    15 # Released to the public domain, by Tim Peters, 15 April 1998.
       
    16 
       
    17 # XXX Note: this is now a standard library module.
       
    18 # XXX The API needs to undergo changes however; the current code is too
       
    19 # XXX script-like.  This will be addressed later.
       
    20 
       
    21 __version__ = "6"
       
    22 
       
    23 import os
       
    24 import sys
       
    25 import getopt
       
    26 import tokenize
       
    27 if not hasattr(tokenize, 'NL'):
       
    28     raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
       
    29 
       
    30 __all__ = ["check", "NannyNag", "process_tokens"]
       
    31 
       
    32 verbose = 0
       
    33 filename_only = 0
       
    34 
       
    35 def errprint(*args):
       
    36     sep = ""
       
    37     for arg in args:
       
    38         sys.stderr.write(sep + str(arg))
       
    39         sep = " "
       
    40     sys.stderr.write("\n")
       
    41 
       
    42 def main():
       
    43     global verbose, filename_only
       
    44     try:
       
    45         opts, args = getopt.getopt(sys.argv[1:], "qv")
       
    46     except getopt.error, msg:
       
    47         errprint(msg)
       
    48         return
       
    49     for o, a in opts:
       
    50         if o == '-q':
       
    51             filename_only = filename_only + 1
       
    52         if o == '-v':
       
    53             verbose = verbose + 1
       
    54     if not args:
       
    55         errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
       
    56         return
       
    57     for arg in args:
       
    58         check(arg)
       
    59 
       
    60 class NannyNag(Exception):
       
    61     """
       
    62     Raised by tokeneater() if detecting an ambiguous indent.
       
    63     Captured and handled in check().
       
    64     """
       
    65     def __init__(self, lineno, msg, line):
       
    66         self.lineno, self.msg, self.line = lineno, msg, line
       
    67     def get_lineno(self):
       
    68         return self.lineno
       
    69     def get_msg(self):
       
    70         return self.msg
       
    71     def get_line(self):
       
    72         return self.line
       
    73 
       
    74 def check(file):
       
    75     """check(file_or_dir)
       
    76 
       
    77     If file_or_dir is a directory and not a symbolic link, then recursively
       
    78     descend the directory tree named by file_or_dir, checking all .py files
       
    79     along the way. If file_or_dir is an ordinary Python source file, it is
       
    80     checked for whitespace related problems. The diagnostic messages are
       
    81     written to standard output using the print statement.
       
    82     """
       
    83 
       
    84     if os.path.isdir(file) and not os.path.islink(file):
       
    85         if verbose:
       
    86             print "%r: listing directory" % (file,)
       
    87         names = os.listdir(file)
       
    88         for name in names:
       
    89             fullname = os.path.join(file, name)
       
    90             if (os.path.isdir(fullname) and
       
    91                 not os.path.islink(fullname) or
       
    92                 os.path.normcase(name[-3:]) == ".py"):
       
    93                 check(fullname)
       
    94         return
       
    95 
       
    96     try:
       
    97         f = open(file)
       
    98     except IOError, msg:
       
    99         errprint("%r: I/O Error: %s" % (file, msg))
       
   100         return
       
   101 
       
   102     if verbose > 1:
       
   103         print "checking %r ..." % file
       
   104 
       
   105     try:
       
   106         process_tokens(tokenize.generate_tokens(f.readline))
       
   107 
       
   108     except tokenize.TokenError, msg:
       
   109         errprint("%r: Token Error: %s" % (file, msg))
       
   110         return
       
   111 
       
   112     except IndentationError, msg:
       
   113         errprint("%r: Indentation Error: %s" % (file, msg))
       
   114         return
       
   115 
       
   116     except NannyNag, nag:
       
   117         badline = nag.get_lineno()
       
   118         line = nag.get_line()
       
   119         if verbose:
       
   120             print "%r: *** Line %d: trouble in tab city! ***" % (file, badline)
       
   121             print "offending line: %r" % (line,)
       
   122             print nag.get_msg()
       
   123         else:
       
   124             if ' ' in file: file = '"' + file + '"'
       
   125             if filename_only: print file
       
   126             else: print file, badline, repr(line)
       
   127         return
       
   128 
       
   129     if verbose:
       
   130         print "%r: Clean bill of health." % (file,)
       
   131 
       
   132 class Whitespace:
       
   133     # the characters used for space and tab
       
   134     S, T = ' \t'
       
   135 
       
   136     # members:
       
   137     #   raw
       
   138     #       the original string
       
   139     #   n
       
   140     #       the number of leading whitespace characters in raw
       
   141     #   nt
       
   142     #       the number of tabs in raw[:n]
       
   143     #   norm
       
   144     #       the normal form as a pair (count, trailing), where:
       
   145     #       count
       
   146     #           a tuple such that raw[:n] contains count[i]
       
   147     #           instances of S * i + T
       
   148     #       trailing
       
   149     #           the number of trailing spaces in raw[:n]
       
   150     #       It's A Theorem that m.indent_level(t) ==
       
   151     #       n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
       
   152     #   is_simple
       
   153     #       true iff raw[:n] is of the form (T*)(S*)
       
   154 
       
   155     def __init__(self, ws):
       
   156         self.raw  = ws
       
   157         S, T = Whitespace.S, Whitespace.T
       
   158         count = []
       
   159         b = n = nt = 0
       
   160         for ch in self.raw:
       
   161             if ch == S:
       
   162                 n = n + 1
       
   163                 b = b + 1
       
   164             elif ch == T:
       
   165                 n = n + 1
       
   166                 nt = nt + 1
       
   167                 if b >= len(count):
       
   168                     count = count + [0] * (b - len(count) + 1)
       
   169                 count[b] = count[b] + 1
       
   170                 b = 0
       
   171             else:
       
   172                 break
       
   173         self.n    = n
       
   174         self.nt   = nt
       
   175         self.norm = tuple(count), b
       
   176         self.is_simple = len(count) <= 1
       
   177 
       
   178     # return length of longest contiguous run of spaces (whether or not
       
   179     # preceding a tab)
       
   180     def longest_run_of_spaces(self):
       
   181         count, trailing = self.norm
       
   182         return max(len(count)-1, trailing)
       
   183 
       
   184     def indent_level(self, tabsize):
       
   185         # count, il = self.norm
       
   186         # for i in range(len(count)):
       
   187         #    if count[i]:
       
   188         #        il = il + (i/tabsize + 1)*tabsize * count[i]
       
   189         # return il
       
   190 
       
   191         # quicker:
       
   192         # il = trailing + sum (i/ts + 1)*ts*count[i] =
       
   193         # trailing + ts * sum (i/ts + 1)*count[i] =
       
   194         # trailing + ts * sum i/ts*count[i] + count[i] =
       
   195         # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
       
   196         # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
       
   197         # and note that i/ts*count[i] is 0 when i < ts
       
   198 
       
   199         count, trailing = self.norm
       
   200         il = 0
       
   201         for i in range(tabsize, len(count)):
       
   202             il = il + i/tabsize * count[i]
       
   203         return trailing + tabsize * (il + self.nt)
       
   204 
       
   205     # return true iff self.indent_level(t) == other.indent_level(t)
       
   206     # for all t >= 1
       
   207     def equal(self, other):
       
   208         return self.norm == other.norm
       
   209 
       
   210     # return a list of tuples (ts, i1, i2) such that
       
   211     # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
       
   212     # Intended to be used after not self.equal(other) is known, in which
       
   213     # case it will return at least one witnessing tab size.
       
   214     def not_equal_witness(self, other):
       
   215         n = max(self.longest_run_of_spaces(),
       
   216                 other.longest_run_of_spaces()) + 1
       
   217         a = []
       
   218         for ts in range(1, n+1):
       
   219             if self.indent_level(ts) != other.indent_level(ts):
       
   220                 a.append( (ts,
       
   221                            self.indent_level(ts),
       
   222                            other.indent_level(ts)) )
       
   223         return a
       
   224 
       
   225     # Return True iff self.indent_level(t) < other.indent_level(t)
       
   226     # for all t >= 1.
       
   227     # The algorithm is due to Vincent Broman.
       
   228     # Easy to prove it's correct.
       
   229     # XXXpost that.
       
   230     # Trivial to prove n is sharp (consider T vs ST).
       
   231     # Unknown whether there's a faster general way.  I suspected so at
       
   232     # first, but no longer.
       
   233     # For the special (but common!) case where M and N are both of the
       
   234     # form (T*)(S*), M.less(N) iff M.len() < N.len() and
       
   235     # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
       
   236     # XXXwrite that up.
       
   237     # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
       
   238     def less(self, other):
       
   239         if self.n >= other.n:
       
   240             return False
       
   241         if self.is_simple and other.is_simple:
       
   242             return self.nt <= other.nt
       
   243         n = max(self.longest_run_of_spaces(),
       
   244                 other.longest_run_of_spaces()) + 1
       
   245         # the self.n >= other.n test already did it for ts=1
       
   246         for ts in range(2, n+1):
       
   247             if self.indent_level(ts) >= other.indent_level(ts):
       
   248                 return False
       
   249         return True
       
   250 
       
   251     # return a list of tuples (ts, i1, i2) such that
       
   252     # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
       
   253     # Intended to be used after not self.less(other) is known, in which
       
   254     # case it will return at least one witnessing tab size.
       
   255     def not_less_witness(self, other):
       
   256         n = max(self.longest_run_of_spaces(),
       
   257                 other.longest_run_of_spaces()) + 1
       
   258         a = []
       
   259         for ts in range(1, n+1):
       
   260             if self.indent_level(ts) >= other.indent_level(ts):
       
   261                 a.append( (ts,
       
   262                            self.indent_level(ts),
       
   263                            other.indent_level(ts)) )
       
   264         return a
       
   265 
       
   266 def format_witnesses(w):
       
   267     firsts = map(lambda tup: str(tup[0]), w)
       
   268     prefix = "at tab size"
       
   269     if len(w) > 1:
       
   270         prefix = prefix + "s"
       
   271     return prefix + " " + ', '.join(firsts)
       
   272 
       
   273 def process_tokens(tokens):
       
   274     INDENT = tokenize.INDENT
       
   275     DEDENT = tokenize.DEDENT
       
   276     NEWLINE = tokenize.NEWLINE
       
   277     JUNK = tokenize.COMMENT, tokenize.NL
       
   278     indents = [Whitespace("")]
       
   279     check_equal = 0
       
   280 
       
   281     for (type, token, start, end, line) in tokens:
       
   282         if type == NEWLINE:
       
   283             # a program statement, or ENDMARKER, will eventually follow,
       
   284             # after some (possibly empty) run of tokens of the form
       
   285             #     (NL | COMMENT)* (INDENT | DEDENT+)?
       
   286             # If an INDENT appears, setting check_equal is wrong, and will
       
   287             # be undone when we see the INDENT.
       
   288             check_equal = 1
       
   289 
       
   290         elif type == INDENT:
       
   291             check_equal = 0
       
   292             thisguy = Whitespace(token)
       
   293             if not indents[-1].less(thisguy):
       
   294                 witness = indents[-1].not_less_witness(thisguy)
       
   295                 msg = "indent not greater e.g. " + format_witnesses(witness)
       
   296                 raise NannyNag(start[0], msg, line)
       
   297             indents.append(thisguy)
       
   298 
       
   299         elif type == DEDENT:
       
   300             # there's nothing we need to check here!  what's important is
       
   301             # that when the run of DEDENTs ends, the indentation of the
       
   302             # program statement (or ENDMARKER) that triggered the run is
       
   303             # equal to what's left at the top of the indents stack
       
   304 
       
   305             # Ouch!  This assert triggers if the last line of the source
       
   306             # is indented *and* lacks a newline -- then DEDENTs pop out
       
   307             # of thin air.
       
   308             # assert check_equal  # else no earlier NEWLINE, or an earlier INDENT
       
   309             check_equal = 1
       
   310 
       
   311             del indents[-1]
       
   312 
       
   313         elif check_equal and type not in JUNK:
       
   314             # this is the first "real token" following a NEWLINE, so it
       
   315             # must be the first token of the next program statement, or an
       
   316             # ENDMARKER; the "line" argument exposes the leading whitespace
       
   317             # for this statement; in the case of ENDMARKER, line is an empty
       
   318             # string, so will properly match the empty string with which the
       
   319             # "indents" stack was seeded
       
   320             check_equal = 0
       
   321             thisguy = Whitespace(line)
       
   322             if not indents[-1].equal(thisguy):
       
   323                 witness = indents[-1].not_equal_witness(thisguy)
       
   324                 msg = "indent not equal e.g. " + format_witnesses(witness)
       
   325                 raise NannyNag(start[0], msg, line)
       
   326 
       
   327 
       
   328 if __name__ == '__main__':
       
   329     main()