|
1 """ TeXcheck.py -- rough syntax checking on Python style LaTeX documents. |
|
2 |
|
3 Written by Raymond D. Hettinger <python at rcn.com> |
|
4 Copyright (c) 2003 Python Software Foundation. All rights reserved. |
|
5 |
|
6 Designed to catch common markup errors including: |
|
7 * Unbalanced or mismatched parenthesis, brackets, and braces. |
|
8 * Unbalanced or mismatched \\begin and \\end blocks. |
|
9 * Misspelled or invalid LaTeX commands. |
|
10 * Use of forward slashes instead of backslashes for commands. |
|
11 * Table line size mismatches. |
|
12 |
|
13 Sample command line usage: |
|
14 python texcheck.py -k chapterheading -m lib/librandomtex *.tex |
|
15 |
|
16 Options: |
|
17 -m Munge parenthesis and brackets. [0,n) would normally mismatch. |
|
18 -k keyword: Keyword is a valid LaTeX command. Do not include the backslash. |
|
19 -d: Delimiter check only (useful for non-LaTeX files). |
|
20 -h: Help |
|
21 -s lineno: Start at lineno (useful for skipping complex sections). |
|
22 -v: Verbose. Trace the matching of //begin and //end blocks. |
|
23 """ |
|
24 |
|
25 import re |
|
26 import sys |
|
27 import getopt |
|
28 from itertools import izip, count, islice |
|
29 import glob |
|
30 |
|
31 cmdstr = r""" |
|
32 \section \module \declaremodule \modulesynopsis \moduleauthor |
|
33 \sectionauthor \versionadded \code \class \method \begin |
|
34 \optional \var \ref \end \subsection \lineiii \hline \label |
|
35 \indexii \textrm \ldots \keyword \stindex \index \item \note |
|
36 \withsubitem \ttindex \footnote \citetitle \samp \opindex |
|
37 \noindent \exception \strong \dfn \ctype \obindex \character |
|
38 \indexiii \function \bifuncindex \refmodule \refbimodindex |
|
39 \subsubsection \nodename \member \chapter \emph \ASCII \UNIX |
|
40 \regexp \program \production \token \productioncont \term |
|
41 \grammartoken \lineii \seemodule \file \EOF \documentclass |
|
42 \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp |
|
43 \tableofcontents \kbd \programopt \envvar \refstmodindex |
|
44 \cfunction \constant \NULL \moreargs \cfuncline \cdata |
|
45 \textasciicircum \n \ABC \setindexsubitem \versionchanged |
|
46 \deprecated \seetext \newcommand \POSIX \pep \warning \rfc |
|
47 \verbatiminput \methodline \textgreater \seetitle \lineiv |
|
48 \funclineni \ulink \manpage \funcline \dataline \unspecified |
|
49 \textbackslash \mimetype \mailheader \seepep \textunderscore |
|
50 \longprogramopt \infinity \plusminus \shortversion \version |
|
51 \refmodindex \seerfc \makeindex \makemodindex \renewcommand |
|
52 \indexname \appendix \protect \indexiv \mbox \textasciitilde |
|
53 \platform \seeurl \leftmargin \labelwidth \localmoduletable |
|
54 \LaTeX \copyright \memberline \backslash \pi \centerline |
|
55 \caption \vspace \textwidth \menuselection \textless |
|
56 \makevar \csimplemacro \menuselection \bfcode \sub \release |
|
57 \email \kwindex \refexmodindex \filenq \e \menuselection |
|
58 \exindex \linev \newsgroup \verbatim \setshortversion |
|
59 \author \authoraddress \paragraph \subparagraph \cmemberline |
|
60 \textbar \C \seelink |
|
61 """ |
|
62 |
|
63 def matchclose(c_lineno, c_symbol, openers, pairmap): |
|
64 "Verify that closing delimiter matches most recent opening delimiter" |
|
65 try: |
|
66 o_lineno, o_symbol = openers.pop() |
|
67 except IndexError: |
|
68 print "\nDelimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol) |
|
69 return |
|
70 if o_symbol in pairmap.get(c_symbol, [c_symbol]): return |
|
71 print "\nOpener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno) |
|
72 return |
|
73 |
|
74 def checkit(source, opts, morecmds=[]): |
|
75 """Check the LaTeX formatting in a sequence of lines. |
|
76 |
|
77 Opts is a mapping of options to option values if any: |
|
78 -m munge parenthesis and brackets |
|
79 -d delimiters only checking |
|
80 -v verbose trace of delimiter matching |
|
81 -s lineno: linenumber to start scan (default is 1). |
|
82 |
|
83 Morecmds is a sequence of LaTeX commands (without backslashes) that |
|
84 are to be considered valid in the scan. |
|
85 """ |
|
86 |
|
87 texcmd = re.compile(r'\\[A-Za-z]+') |
|
88 falsetexcmd = re.compile(r'\/([A-Za-z]+)') # Mismarked with forward slash |
|
89 |
|
90 validcmds = set(cmdstr.split()) |
|
91 for cmd in morecmds: |
|
92 validcmds.add('\\' + cmd) |
|
93 |
|
94 if '-m' in opts: |
|
95 pairmap = {']':'[(', ')':'(['} # Munged openers |
|
96 else: |
|
97 pairmap = {']':'[', ')':'('} # Normal opener for a given closer |
|
98 openpunct = set('([') # Set of valid openers |
|
99 |
|
100 delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])') |
|
101 braces = re.compile(r'({)|(})') |
|
102 doubledwords = re.compile(r'(\b[A-za-z]+\b) \b\1\b') |
|
103 spacingmarkup = re.compile(r'\\(ABC|ASCII|C|Cpp|EOF|infinity|NULL|plusminus|POSIX|UNIX)\s') |
|
104 |
|
105 openers = [] # Stack of pending open delimiters |
|
106 bracestack = [] # Stack of pending open braces |
|
107 |
|
108 tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}') |
|
109 tableline = re.compile(r'\\line([iv]+){') |
|
110 tableend = re.compile(r'\\end{(?:long)?table([iv]+)}') |
|
111 tablelevel = '' |
|
112 tablestartline = 0 |
|
113 |
|
114 startline = int(opts.get('-s', '1')) |
|
115 lineno = 0 |
|
116 |
|
117 for lineno, line in izip(count(startline), islice(source, startline-1, None)): |
|
118 line = line.rstrip() |
|
119 |
|
120 # Check balancing of open/close parenthesis, brackets, and begin/end blocks |
|
121 for begend, name, punct in delimiters.findall(line): |
|
122 if '-v' in opts: |
|
123 print lineno, '|', begend, name, punct, |
|
124 if begend == 'begin' and '-d' not in opts: |
|
125 openers.append((lineno, name)) |
|
126 elif punct in openpunct: |
|
127 openers.append((lineno, punct)) |
|
128 elif begend == 'end' and '-d' not in opts: |
|
129 matchclose(lineno, name, openers, pairmap) |
|
130 elif punct in pairmap: |
|
131 matchclose(lineno, punct, openers, pairmap) |
|
132 if '-v' in opts: |
|
133 print ' --> ', openers |
|
134 |
|
135 # Balance opening and closing braces |
|
136 for open, close in braces.findall(line): |
|
137 if open == '{': |
|
138 bracestack.append(lineno) |
|
139 if close == '}': |
|
140 try: |
|
141 bracestack.pop() |
|
142 except IndexError: |
|
143 print r'Warning, unmatched } on line %s.' % (lineno,) |
|
144 |
|
145 # Optionally, skip LaTeX specific checks |
|
146 if '-d' in opts: |
|
147 continue |
|
148 |
|
149 # Warn whenever forward slashes encountered with a LaTeX command |
|
150 for cmd in falsetexcmd.findall(line): |
|
151 if '822' in line or '.html' in line: |
|
152 continue # Ignore false positives for urls and for /rfc822 |
|
153 if '\\' + cmd in validcmds: |
|
154 print 'Warning, forward slash used on line %d with cmd: /%s' % (lineno, cmd) |
|
155 |
|
156 # Check for markup requiring {} for correct spacing |
|
157 for cmd in spacingmarkup.findall(line): |
|
158 print r'Warning, \%s should be written as \%s{} on line %d' % (cmd, cmd, lineno) |
|
159 |
|
160 # Validate commands |
|
161 nc = line.find(r'\newcommand') |
|
162 if nc != -1: |
|
163 start = line.find('{', nc) |
|
164 end = line.find('}', start) |
|
165 validcmds.add(line[start+1:end]) |
|
166 for cmd in texcmd.findall(line): |
|
167 if cmd not in validcmds: |
|
168 print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd) |
|
169 |
|
170 # Check table levels (make sure lineii only inside tableii) |
|
171 m = tablestart.search(line) |
|
172 if m: |
|
173 tablelevel = m.group(1) |
|
174 tablestartline = lineno |
|
175 m = tableline.search(line) |
|
176 if m and m.group(1) != tablelevel: |
|
177 print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline) |
|
178 if tableend.search(line): |
|
179 tablelevel = '' |
|
180 |
|
181 # Style guide warnings |
|
182 if 'e.g.' in line or 'i.e.' in line: |
|
183 print r'Style warning, avoid use of i.e or e.g. on line %d' % (lineno,) |
|
184 |
|
185 for dw in doubledwords.findall(line): |
|
186 print r'Doubled word warning. "%s" on line %d' % (dw, lineno) |
|
187 |
|
188 lastline = lineno |
|
189 for lineno, symbol in openers: |
|
190 print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno) |
|
191 for lineno in bracestack: |
|
192 print "Unmatched { on line %d" % (lineno,) |
|
193 print 'Done checking %d lines.' % (lastline,) |
|
194 return 0 |
|
195 |
|
196 def main(args=None): |
|
197 if args is None: |
|
198 args = sys.argv[1:] |
|
199 optitems, arglist = getopt.getopt(args, "k:mdhs:v") |
|
200 opts = dict(optitems) |
|
201 if '-h' in opts or args==[]: |
|
202 print __doc__ |
|
203 return 0 |
|
204 |
|
205 if len(arglist) < 1: |
|
206 print 'Please specify a file to be checked' |
|
207 return 1 |
|
208 |
|
209 for i, filespec in enumerate(arglist): |
|
210 if '*' in filespec or '?' in filespec: |
|
211 arglist[i:i+1] = glob.glob(filespec) |
|
212 |
|
213 morecmds = [v for k,v in optitems if k=='-k'] |
|
214 err = [] |
|
215 |
|
216 for filename in arglist: |
|
217 print '=' * 30 |
|
218 print "Checking", filename |
|
219 try: |
|
220 f = open(filename) |
|
221 except IOError: |
|
222 print 'Cannot open file %s.' % arglist[0] |
|
223 return 2 |
|
224 |
|
225 try: |
|
226 err.append(checkit(f, opts, morecmds)) |
|
227 finally: |
|
228 f.close() |
|
229 |
|
230 return max(err) |
|
231 |
|
232 if __name__ == '__main__': |
|
233 sys.exit(main()) |