python-2.5.2/win32/Tools/Scripts/texcheck.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 """ TeXcheck.py -- rough syntax checking on Python style LaTeX documents.
       
     2 
       
     3    Written by Raymond D. Hettinger <python at rcn.com>
       
     4    Copyright (c) 2003 Python Software Foundation.  All rights reserved.
       
     5 
       
     6 Designed to catch common markup errors including:
       
     7 * Unbalanced or mismatched parenthesis, brackets, and braces.
       
     8 * Unbalanced or mismatched \\begin and \\end blocks.
       
     9 * Misspelled or invalid LaTeX commands.
       
    10 * Use of forward slashes instead of backslashes for commands.
       
    11 * Table line size mismatches.
       
    12 
       
    13 Sample command line usage:
       
    14     python texcheck.py -k chapterheading -m lib/librandomtex *.tex
       
    15 
       
    16 Options:
       
    17     -m          Munge parenthesis and brackets. [0,n) would normally mismatch.
       
    18     -k keyword: Keyword is a valid LaTeX command. Do not include the backslash.
       
    19     -d:         Delimiter check only (useful for non-LaTeX files).
       
    20     -h:         Help
       
    21     -s lineno:  Start at lineno (useful for skipping complex sections).
       
    22     -v:         Verbose.  Trace the matching of //begin and //end blocks.
       
    23 """
       
    24 
       
    25 import re
       
    26 import sys
       
    27 import getopt
       
    28 from itertools import izip, count, islice
       
    29 import glob
       
    30 
       
    31 cmdstr = r"""
       
    32     \section \module \declaremodule \modulesynopsis \moduleauthor
       
    33     \sectionauthor \versionadded \code \class \method \begin
       
    34     \optional \var \ref \end \subsection \lineiii \hline \label
       
    35     \indexii \textrm \ldots \keyword \stindex \index \item \note
       
    36     \withsubitem \ttindex \footnote \citetitle \samp \opindex
       
    37     \noindent \exception \strong \dfn \ctype \obindex \character
       
    38     \indexiii \function \bifuncindex \refmodule \refbimodindex
       
    39     \subsubsection \nodename \member \chapter \emph \ASCII \UNIX
       
    40     \regexp \program \production \token \productioncont \term
       
    41     \grammartoken \lineii \seemodule \file \EOF \documentclass
       
    42     \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp
       
    43     \tableofcontents \kbd \programopt \envvar \refstmodindex
       
    44     \cfunction \constant \NULL \moreargs \cfuncline \cdata
       
    45     \textasciicircum \n \ABC \setindexsubitem \versionchanged
       
    46     \deprecated \seetext \newcommand \POSIX \pep \warning \rfc
       
    47     \verbatiminput \methodline \textgreater \seetitle \lineiv
       
    48     \funclineni \ulink \manpage \funcline \dataline \unspecified
       
    49     \textbackslash \mimetype \mailheader \seepep \textunderscore
       
    50     \longprogramopt \infinity \plusminus \shortversion \version
       
    51     \refmodindex \seerfc \makeindex \makemodindex \renewcommand
       
    52     \indexname \appendix \protect \indexiv \mbox \textasciitilde
       
    53     \platform \seeurl \leftmargin \labelwidth \localmoduletable
       
    54     \LaTeX \copyright \memberline \backslash \pi \centerline
       
    55     \caption \vspace \textwidth \menuselection \textless
       
    56     \makevar \csimplemacro \menuselection \bfcode \sub \release
       
    57     \email \kwindex \refexmodindex \filenq \e \menuselection
       
    58     \exindex \linev \newsgroup \verbatim \setshortversion
       
    59     \author \authoraddress \paragraph \subparagraph \cmemberline
       
    60     \textbar \C \seelink
       
    61 """
       
    62 
       
    63 def matchclose(c_lineno, c_symbol, openers, pairmap):
       
    64     "Verify that closing delimiter matches most recent opening delimiter"
       
    65     try:
       
    66         o_lineno, o_symbol = openers.pop()
       
    67     except IndexError:
       
    68         print "\nDelimiter mismatch.  On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol)
       
    69         return
       
    70     if o_symbol in pairmap.get(c_symbol, [c_symbol]): return
       
    71     print "\nOpener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno)
       
    72     return
       
    73 
       
    74 def checkit(source, opts, morecmds=[]):
       
    75     """Check the LaTeX formatting in a sequence of lines.
       
    76 
       
    77     Opts is a mapping of options to option values if any:
       
    78         -m          munge parenthesis and brackets
       
    79         -d          delimiters only checking
       
    80         -v          verbose trace of delimiter matching
       
    81         -s lineno:  linenumber to start scan (default is 1).
       
    82 
       
    83     Morecmds is a sequence of LaTeX commands (without backslashes) that
       
    84     are to be considered valid in the scan.
       
    85     """
       
    86 
       
    87     texcmd = re.compile(r'\\[A-Za-z]+')
       
    88     falsetexcmd = re.compile(r'\/([A-Za-z]+)') # Mismarked with forward slash
       
    89 
       
    90     validcmds = set(cmdstr.split())
       
    91     for cmd in morecmds:
       
    92         validcmds.add('\\' + cmd)
       
    93 
       
    94     if '-m' in opts:
       
    95         pairmap = {']':'[(', ')':'(['}      # Munged openers
       
    96     else:
       
    97         pairmap = {']':'[', ')':'('}        # Normal opener for a given closer
       
    98     openpunct = set('([')                   # Set of valid openers
       
    99 
       
   100     delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])')
       
   101     braces = re.compile(r'({)|(})')
       
   102     doubledwords = re.compile(r'(\b[A-za-z]+\b) \b\1\b')
       
   103     spacingmarkup = re.compile(r'\\(ABC|ASCII|C|Cpp|EOF|infinity|NULL|plusminus|POSIX|UNIX)\s')
       
   104 
       
   105     openers = []                            # Stack of pending open delimiters
       
   106     bracestack = []                         # Stack of pending open braces
       
   107 
       
   108     tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}')
       
   109     tableline = re.compile(r'\\line([iv]+){')
       
   110     tableend = re.compile(r'\\end{(?:long)?table([iv]+)}')
       
   111     tablelevel = ''
       
   112     tablestartline = 0
       
   113 
       
   114     startline = int(opts.get('-s', '1'))
       
   115     lineno = 0
       
   116 
       
   117     for lineno, line in izip(count(startline), islice(source, startline-1, None)):
       
   118         line = line.rstrip()
       
   119 
       
   120         # Check balancing of open/close parenthesis, brackets, and begin/end blocks
       
   121         for begend, name, punct in delimiters.findall(line):
       
   122             if '-v' in opts:
       
   123                 print lineno, '|', begend, name, punct,
       
   124             if begend == 'begin' and '-d' not in opts:
       
   125                 openers.append((lineno, name))
       
   126             elif punct in openpunct:
       
   127                 openers.append((lineno, punct))
       
   128             elif begend == 'end' and '-d' not in opts:
       
   129                 matchclose(lineno, name, openers, pairmap)
       
   130             elif punct in pairmap:
       
   131                 matchclose(lineno, punct, openers, pairmap)
       
   132             if '-v' in opts:
       
   133                 print '   --> ', openers
       
   134 
       
   135         # Balance opening and closing braces
       
   136         for open, close in braces.findall(line):
       
   137             if open == '{':
       
   138                 bracestack.append(lineno)
       
   139             if close == '}':
       
   140                 try:
       
   141                     bracestack.pop()
       
   142                 except IndexError:
       
   143                     print r'Warning, unmatched } on line %s.' % (lineno,)
       
   144 
       
   145         # Optionally, skip LaTeX specific checks
       
   146         if '-d' in opts:
       
   147             continue
       
   148 
       
   149         # Warn whenever forward slashes encountered with a LaTeX command
       
   150         for cmd in falsetexcmd.findall(line):
       
   151             if '822' in line or '.html' in line:
       
   152                 continue    # Ignore false positives for urls and for /rfc822
       
   153             if '\\' + cmd in validcmds:
       
   154                 print 'Warning, forward slash used on line %d with cmd: /%s' % (lineno, cmd)
       
   155 
       
   156         # Check for markup requiring {} for correct spacing
       
   157         for cmd in spacingmarkup.findall(line):
       
   158             print r'Warning, \%s should be written as \%s{} on line %d' % (cmd, cmd, lineno)
       
   159 
       
   160         # Validate commands
       
   161         nc = line.find(r'\newcommand')
       
   162         if nc != -1:
       
   163             start = line.find('{', nc)
       
   164             end = line.find('}', start)
       
   165             validcmds.add(line[start+1:end])
       
   166         for cmd in texcmd.findall(line):
       
   167             if cmd not in validcmds:
       
   168                 print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd)
       
   169 
       
   170         # Check table levels (make sure lineii only inside tableii)
       
   171         m = tablestart.search(line)
       
   172         if m:
       
   173             tablelevel = m.group(1)
       
   174             tablestartline = lineno
       
   175         m = tableline.search(line)
       
   176         if m and m.group(1) != tablelevel:
       
   177             print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline)
       
   178         if tableend.search(line):
       
   179             tablelevel = ''
       
   180 
       
   181         # Style guide warnings
       
   182         if 'e.g.' in line or 'i.e.' in line:
       
   183             print r'Style warning, avoid use of i.e or e.g. on line %d' % (lineno,)
       
   184 
       
   185         for dw in doubledwords.findall(line):
       
   186             print r'Doubled word warning.  "%s" on line %d' % (dw, lineno)
       
   187 
       
   188     lastline = lineno
       
   189     for lineno, symbol in openers:
       
   190         print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno)
       
   191     for lineno in bracestack:
       
   192         print "Unmatched { on line %d" % (lineno,)
       
   193     print 'Done checking %d lines.' % (lastline,)
       
   194     return 0
       
   195 
       
   196 def main(args=None):
       
   197     if args is None:
       
   198         args = sys.argv[1:]
       
   199     optitems, arglist = getopt.getopt(args, "k:mdhs:v")
       
   200     opts = dict(optitems)
       
   201     if '-h' in opts or args==[]:
       
   202         print __doc__
       
   203         return 0
       
   204 
       
   205     if len(arglist) < 1:
       
   206         print 'Please specify a file to be checked'
       
   207         return 1
       
   208 
       
   209     for i, filespec in enumerate(arglist):
       
   210         if '*' in filespec or '?' in filespec:
       
   211             arglist[i:i+1] = glob.glob(filespec)
       
   212 
       
   213     morecmds = [v for k,v in optitems if k=='-k']
       
   214     err = []
       
   215 
       
   216     for filename in arglist:
       
   217         print '=' * 30
       
   218         print "Checking", filename
       
   219         try:
       
   220             f = open(filename)
       
   221         except IOError:
       
   222             print 'Cannot open file %s.' % arglist[0]
       
   223             return 2
       
   224 
       
   225         try:
       
   226             err.append(checkit(f, opts, morecmds))
       
   227         finally:
       
   228             f.close()
       
   229 
       
   230     return max(err)
       
   231 
       
   232 if __name__ == '__main__':
       
   233     sys.exit(main())