diff options
Diffstat (limited to 'lib/python2.7/Tools/scripts/texcheck.py')
-rw-r--r-- | lib/python2.7/Tools/scripts/texcheck.py | 233 |
1 files changed, 233 insertions, 0 deletions
diff --git a/lib/python2.7/Tools/scripts/texcheck.py b/lib/python2.7/Tools/scripts/texcheck.py new file mode 100644 index 0000000..4a56635 --- /dev/null +++ b/lib/python2.7/Tools/scripts/texcheck.py @@ -0,0 +1,233 @@ +""" TeXcheck.py -- rough syntax checking on Python style LaTeX documents. + + Written by Raymond D. Hettinger <python at rcn.com> + Copyright (c) 2003 Python Software Foundation. All rights reserved. + +Designed to catch common markup errors including: +* Unbalanced or mismatched parenthesis, brackets, and braces. +* Unbalanced or mismatched \\begin and \\end blocks. +* Misspelled or invalid LaTeX commands. +* Use of forward slashes instead of backslashes for commands. +* Table line size mismatches. + +Sample command line usage: + python texcheck.py -k chapterheading -m lib/librandomtex *.tex + +Options: + -m Munge parenthesis and brackets. [0,n) would normally mismatch. + -k keyword: Keyword is a valid LaTeX command. Do not include the backslash. + -d: Delimiter check only (useful for non-LaTeX files). + -h: Help + -s lineno: Start at lineno (useful for skipping complex sections). + -v: Verbose. Trace the matching of //begin and //end blocks. +""" + +import re +import sys +import getopt +from itertools import izip, count, islice +import glob + +cmdstr = r""" + \section \module \declaremodule \modulesynopsis \moduleauthor + \sectionauthor \versionadded \code \class \method \begin + \optional \var \ref \end \subsection \lineiii \hline \label + \indexii \textrm \ldots \keyword \stindex \index \item \note + \withsubitem \ttindex \footnote \citetitle \samp \opindex + \noindent \exception \strong \dfn \ctype \obindex \character + \indexiii \function \bifuncindex \refmodule \refbimodindex + \subsubsection \nodename \member \chapter \emph \ASCII \UNIX + \regexp \program \production \token \productioncont \term + \grammartoken \lineii \seemodule \file \EOF \documentclass + \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp + \tableofcontents \kbd \programopt \envvar \refstmodindex + \cfunction \constant \NULL \moreargs \cfuncline \cdata + \textasciicircum \n \ABC \setindexsubitem \versionchanged + \deprecated \seetext \newcommand \POSIX \pep \warning \rfc + \verbatiminput \methodline \textgreater \seetitle \lineiv + \funclineni \ulink \manpage \funcline \dataline \unspecified + \textbackslash \mimetype \mailheader \seepep \textunderscore + \longprogramopt \infinity \plusminus \shortversion \version + \refmodindex \seerfc \makeindex \makemodindex \renewcommand + \indexname \appendix \protect \indexiv \mbox \textasciitilde + \platform \seeurl \leftmargin \labelwidth \localmoduletable + \LaTeX \copyright \memberline \backslash \pi \centerline + \caption \vspace \textwidth \menuselection \textless + \makevar \csimplemacro \menuselection \bfcode \sub \release + \email \kwindex \refexmodindex \filenq \e \menuselection + \exindex \linev \newsgroup \verbatim \setshortversion + \author \authoraddress \paragraph \subparagraph \cmemberline + \textbar \C \seelink +""" + +def matchclose(c_lineno, c_symbol, openers, pairmap): + "Verify that closing delimiter matches most recent opening delimiter" + try: + o_lineno, o_symbol = openers.pop() + except IndexError: + print "\nDelimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol) + return + if o_symbol in pairmap.get(c_symbol, [c_symbol]): return + print "\nOpener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno) + return + +def checkit(source, opts, morecmds=[]): + """Check the LaTeX formatting in a sequence of lines. + + Opts is a mapping of options to option values if any: + -m munge parenthesis and brackets + -d delimiters only checking + -v verbose trace of delimiter matching + -s lineno: linenumber to start scan (default is 1). + + Morecmds is a sequence of LaTeX commands (without backslashes) that + are to be considered valid in the scan. + """ + + texcmd = re.compile(r'\\[A-Za-z]+') + falsetexcmd = re.compile(r'\/([A-Za-z]+)') # Mismarked with forward slash + + validcmds = set(cmdstr.split()) + for cmd in morecmds: + validcmds.add('\\' + cmd) + + if '-m' in opts: + pairmap = {']':'[(', ')':'(['} # Munged openers + else: + pairmap = {']':'[', ')':'('} # Normal opener for a given closer + openpunct = set('([') # Set of valid openers + + delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])') + braces = re.compile(r'({)|(})') + doubledwords = re.compile(r'(\b[A-za-z]+\b) \b\1\b') + spacingmarkup = re.compile(r'\\(ABC|ASCII|C|Cpp|EOF|infinity|NULL|plusminus|POSIX|UNIX)\s') + + openers = [] # Stack of pending open delimiters + bracestack = [] # Stack of pending open braces + + tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}') + tableline = re.compile(r'\\line([iv]+){') + tableend = re.compile(r'\\end{(?:long)?table([iv]+)}') + tablelevel = '' + tablestartline = 0 + + startline = int(opts.get('-s', '1')) + lineno = 0 + + for lineno, line in izip(count(startline), islice(source, startline-1, None)): + line = line.rstrip() + + # Check balancing of open/close parenthesis, brackets, and begin/end blocks + for begend, name, punct in delimiters.findall(line): + if '-v' in opts: + print lineno, '|', begend, name, punct, + if begend == 'begin' and '-d' not in opts: + openers.append((lineno, name)) + elif punct in openpunct: + openers.append((lineno, punct)) + elif begend == 'end' and '-d' not in opts: + matchclose(lineno, name, openers, pairmap) + elif punct in pairmap: + matchclose(lineno, punct, openers, pairmap) + if '-v' in opts: + print ' --> ', openers + + # Balance opening and closing braces + for open, close in braces.findall(line): + if open == '{': + bracestack.append(lineno) + if close == '}': + try: + bracestack.pop() + except IndexError: + print r'Warning, unmatched } on line %s.' % (lineno,) + + # Optionally, skip LaTeX specific checks + if '-d' in opts: + continue + + # Warn whenever forward slashes encountered with a LaTeX command + for cmd in falsetexcmd.findall(line): + if '822' in line or '.html' in line: + continue # Ignore false positives for urls and for /rfc822 + if '\\' + cmd in validcmds: + print 'Warning, forward slash used on line %d with cmd: /%s' % (lineno, cmd) + + # Check for markup requiring {} for correct spacing + for cmd in spacingmarkup.findall(line): + print r'Warning, \%s should be written as \%s{} on line %d' % (cmd, cmd, lineno) + + # Validate commands + nc = line.find(r'\newcommand') + if nc != -1: + start = line.find('{', nc) + end = line.find('}', start) + validcmds.add(line[start+1:end]) + for cmd in texcmd.findall(line): + if cmd not in validcmds: + print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd) + + # Check table levels (make sure lineii only inside tableii) + m = tablestart.search(line) + if m: + tablelevel = m.group(1) + tablestartline = lineno + m = tableline.search(line) + if m and m.group(1) != tablelevel: + print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline) + if tableend.search(line): + tablelevel = '' + + # Style guide warnings + if 'e.g.' in line or 'i.e.' in line: + print r'Style warning, avoid use of i.e or e.g. on line %d' % (lineno,) + + for dw in doubledwords.findall(line): + print r'Doubled word warning. "%s" on line %d' % (dw, lineno) + + lastline = lineno + for lineno, symbol in openers: + print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno) + for lineno in bracestack: + print "Unmatched { on line %d" % (lineno,) + print 'Done checking %d lines.' % (lastline,) + return 0 + +def main(args=None): + if args is None: + args = sys.argv[1:] + optitems, arglist = getopt.getopt(args, "k:mdhs:v") + opts = dict(optitems) + if '-h' in opts or args==[]: + print __doc__ + return 0 + + if len(arglist) < 1: + print 'Please specify a file to be checked' + return 1 + + for i, filespec in enumerate(arglist): + if '*' in filespec or '?' in filespec: + arglist[i:i+1] = glob.glob(filespec) + + morecmds = [v for k,v in optitems if k=='-k'] + err = [] + + for filename in arglist: + print '=' * 30 + print "Checking", filename + try: + f = open(filename) + except IOError: + print 'Cannot open file %s.' % arglist[0] + return 2 + + try: + err.append(checkit(f, opts, morecmds)) + finally: + f.close() + + return max(err) + +if __name__ == '__main__': + sys.exit(main()) |