diff options
Diffstat (limited to 'lib/python2.7/lib2to3/refactor.py')
-rw-r--r-- | lib/python2.7/lib2to3/refactor.py | 747 |
1 files changed, 747 insertions, 0 deletions
diff --git a/lib/python2.7/lib2to3/refactor.py b/lib/python2.7/lib2to3/refactor.py new file mode 100644 index 0000000..98386c5 --- /dev/null +++ b/lib/python2.7/lib2to3/refactor.py @@ -0,0 +1,747 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Refactoring framework. + +Used as a main program, this can refactor any number of files and/or +recursively descend down directories. Imported as a module, this +provides infrastructure to write your own refactoring tool. +""" + +from __future__ import with_statement + +__author__ = "Guido van Rossum <guido@python.org>" + + +# Python imports +import os +import sys +import logging +import operator +import collections +import StringIO +from itertools import chain + +# Local imports +from .pgen2 import driver, tokenize, token +from .fixer_util import find_root +from . import pytree, pygram +from . import btm_utils as bu +from . import btm_matcher as bm + + +def get_all_fix_names(fixer_pkg, remove_prefix=True): + """Return a sorted list of all available fix names in the given package.""" + pkg = __import__(fixer_pkg, [], [], ["*"]) + fixer_dir = os.path.dirname(pkg.__file__) + fix_names = [] + for name in sorted(os.listdir(fixer_dir)): + if name.startswith("fix_") and name.endswith(".py"): + if remove_prefix: + name = name[4:] + fix_names.append(name[:-3]) + return fix_names + + +class _EveryNode(Exception): + pass + + +def _get_head_types(pat): + """ Accepts a pytree Pattern Node and returns a set + of the pattern types which will match first. """ + + if isinstance(pat, (pytree.NodePattern, pytree.LeafPattern)): + # NodePatters must either have no type and no content + # or a type and content -- so they don't get any farther + # Always return leafs + if pat.type is None: + raise _EveryNode + return set([pat.type]) + + if isinstance(pat, pytree.NegatedPattern): + if pat.content: + return _get_head_types(pat.content) + raise _EveryNode # Negated Patterns don't have a type + + if isinstance(pat, pytree.WildcardPattern): + # Recurse on each node in content + r = set() + for p in pat.content: + for x in p: + r.update(_get_head_types(x)) + return r + + raise Exception("Oh no! I don't understand pattern %s" %(pat)) + + +def _get_headnode_dict(fixer_list): + """ Accepts a list of fixers and returns a dictionary + of head node type --> fixer list. """ + head_nodes = collections.defaultdict(list) + every = [] + for fixer in fixer_list: + if fixer.pattern: + try: + heads = _get_head_types(fixer.pattern) + except _EveryNode: + every.append(fixer) + else: + for node_type in heads: + head_nodes[node_type].append(fixer) + else: + if fixer._accept_type is not None: + head_nodes[fixer._accept_type].append(fixer) + else: + every.append(fixer) + for node_type in chain(pygram.python_grammar.symbol2number.itervalues(), + pygram.python_grammar.tokens): + head_nodes[node_type].extend(every) + return dict(head_nodes) + + +def get_fixers_from_package(pkg_name): + """ + Return the fully qualified names for fixers in the package pkg_name. + """ + return [pkg_name + "." + fix_name + for fix_name in get_all_fix_names(pkg_name, False)] + +def _identity(obj): + return obj + +if sys.version_info < (3, 0): + import codecs + _open_with_encoding = codecs.open + # codecs.open doesn't translate newlines sadly. + def _from_system_newlines(input): + return input.replace(u"\r\n", u"\n") + def _to_system_newlines(input): + if os.linesep != "\n": + return input.replace(u"\n", os.linesep) + else: + return input +else: + _open_with_encoding = open + _from_system_newlines = _identity + _to_system_newlines = _identity + + +def _detect_future_features(source): + have_docstring = False + gen = tokenize.generate_tokens(StringIO.StringIO(source).readline) + def advance(): + tok = gen.next() + return tok[0], tok[1] + ignore = frozenset((token.NEWLINE, tokenize.NL, token.COMMENT)) + features = set() + try: + while True: + tp, value = advance() + if tp in ignore: + continue + elif tp == token.STRING: + if have_docstring: + break + have_docstring = True + elif tp == token.NAME and value == u"from": + tp, value = advance() + if tp != token.NAME or value != u"__future__": + break + tp, value = advance() + if tp != token.NAME or value != u"import": + break + tp, value = advance() + if tp == token.OP and value == u"(": + tp, value = advance() + while tp == token.NAME: + features.add(value) + tp, value = advance() + if tp != token.OP or value != u",": + break + tp, value = advance() + else: + break + except StopIteration: + pass + return frozenset(features) + + +class FixerError(Exception): + """A fixer could not be loaded.""" + + +class RefactoringTool(object): + + _default_options = {"print_function" : False, + "write_unchanged_files" : False} + + CLASS_PREFIX = "Fix" # The prefix for fixer classes + FILE_PREFIX = "fix_" # The prefix for modules with a fixer within + + def __init__(self, fixer_names, options=None, explicit=None): + """Initializer. + + Args: + fixer_names: a list of fixers to import + options: a dict with configuration. + explicit: a list of fixers to run even if they are explicit. + """ + self.fixers = fixer_names + self.explicit = explicit or [] + self.options = self._default_options.copy() + if options is not None: + self.options.update(options) + if self.options["print_function"]: + self.grammar = pygram.python_grammar_no_print_statement + else: + self.grammar = pygram.python_grammar + # When this is True, the refactor*() methods will call write_file() for + # files processed even if they were not changed during refactoring. If + # and only if the refactor method's write parameter was True. + self.write_unchanged_files = self.options.get("write_unchanged_files") + self.errors = [] + self.logger = logging.getLogger("RefactoringTool") + self.fixer_log = [] + self.wrote = False + self.driver = driver.Driver(self.grammar, + convert=pytree.convert, + logger=self.logger) + self.pre_order, self.post_order = self.get_fixers() + + + self.files = [] # List of files that were or should be modified + + self.BM = bm.BottomMatcher() + self.bmi_pre_order = [] # Bottom Matcher incompatible fixers + self.bmi_post_order = [] + + for fixer in chain(self.post_order, self.pre_order): + if fixer.BM_compatible: + self.BM.add_fixer(fixer) + # remove fixers that will be handled by the bottom-up + # matcher + elif fixer in self.pre_order: + self.bmi_pre_order.append(fixer) + elif fixer in self.post_order: + self.bmi_post_order.append(fixer) + + self.bmi_pre_order_heads = _get_headnode_dict(self.bmi_pre_order) + self.bmi_post_order_heads = _get_headnode_dict(self.bmi_post_order) + + + + def get_fixers(self): + """Inspects the options to load the requested patterns and handlers. + + Returns: + (pre_order, post_order), where pre_order is the list of fixers that + want a pre-order AST traversal, and post_order is the list that want + post-order traversal. + """ + pre_order_fixers = [] + post_order_fixers = [] + for fix_mod_path in self.fixers: + mod = __import__(fix_mod_path, {}, {}, ["*"]) + fix_name = fix_mod_path.rsplit(".", 1)[-1] + if fix_name.startswith(self.FILE_PREFIX): + fix_name = fix_name[len(self.FILE_PREFIX):] + parts = fix_name.split("_") + class_name = self.CLASS_PREFIX + "".join([p.title() for p in parts]) + try: + fix_class = getattr(mod, class_name) + except AttributeError: + raise FixerError("Can't find %s.%s" % (fix_name, class_name)) + fixer = fix_class(self.options, self.fixer_log) + if fixer.explicit and self.explicit is not True and \ + fix_mod_path not in self.explicit: + self.log_message("Skipping optional fixer: %s", fix_name) + continue + + self.log_debug("Adding transformation: %s", fix_name) + if fixer.order == "pre": + pre_order_fixers.append(fixer) + elif fixer.order == "post": + post_order_fixers.append(fixer) + else: + raise FixerError("Illegal fixer order: %r" % fixer.order) + + key_func = operator.attrgetter("run_order") + pre_order_fixers.sort(key=key_func) + post_order_fixers.sort(key=key_func) + return (pre_order_fixers, post_order_fixers) + + def log_error(self, msg, *args, **kwds): + """Called when an error occurs.""" + raise + + def log_message(self, msg, *args): + """Hook to log a message.""" + if args: + msg = msg % args + self.logger.info(msg) + + def log_debug(self, msg, *args): + if args: + msg = msg % args + self.logger.debug(msg) + + def print_output(self, old_text, new_text, filename, equal): + """Called with the old version, new version, and filename of a + refactored file.""" + pass + + def refactor(self, items, write=False, doctests_only=False): + """Refactor a list of files and directories.""" + + for dir_or_file in items: + if os.path.isdir(dir_or_file): + self.refactor_dir(dir_or_file, write, doctests_only) + else: + self.refactor_file(dir_or_file, write, doctests_only) + + def refactor_dir(self, dir_name, write=False, doctests_only=False): + """Descends down a directory and refactor every Python file found. + + Python files are assumed to have a .py extension. + + Files and subdirectories starting with '.' are skipped. + """ + py_ext = os.extsep + "py" + for dirpath, dirnames, filenames in os.walk(dir_name): + self.log_debug("Descending into %s", dirpath) + dirnames.sort() + filenames.sort() + for name in filenames: + if (not name.startswith(".") and + os.path.splitext(name)[1] == py_ext): + fullname = os.path.join(dirpath, name) + self.refactor_file(fullname, write, doctests_only) + # Modify dirnames in-place to remove subdirs with leading dots + dirnames[:] = [dn for dn in dirnames if not dn.startswith(".")] + + def _read_python_source(self, filename): + """ + Do our best to decode a Python source file correctly. + """ + try: + f = open(filename, "rb") + except IOError as err: + self.log_error("Can't open %s: %s", filename, err) + return None, None + try: + encoding = tokenize.detect_encoding(f.readline)[0] + finally: + f.close() + with _open_with_encoding(filename, "r", encoding=encoding) as f: + return _from_system_newlines(f.read()), encoding + + def refactor_file(self, filename, write=False, doctests_only=False): + """Refactors a file.""" + input, encoding = self._read_python_source(filename) + if input is None: + # Reading the file failed. + return + input += u"\n" # Silence certain parse errors + if doctests_only: + self.log_debug("Refactoring doctests in %s", filename) + output = self.refactor_docstring(input, filename) + if self.write_unchanged_files or output != input: + self.processed_file(output, filename, input, write, encoding) + else: + self.log_debug("No doctest changes in %s", filename) + else: + tree = self.refactor_string(input, filename) + if self.write_unchanged_files or (tree and tree.was_changed): + # The [:-1] is to take off the \n we added earlier + self.processed_file(unicode(tree)[:-1], filename, + write=write, encoding=encoding) + else: + self.log_debug("No changes in %s", filename) + + def refactor_string(self, data, name): + """Refactor a given input string. + + Args: + data: a string holding the code to be refactored. + name: a human-readable name for use in error/log messages. + + Returns: + An AST corresponding to the refactored input stream; None if + there were errors during the parse. + """ + features = _detect_future_features(data) + if "print_function" in features: + self.driver.grammar = pygram.python_grammar_no_print_statement + try: + tree = self.driver.parse_string(data) + except Exception as err: + self.log_error("Can't parse %s: %s: %s", + name, err.__class__.__name__, err) + return + finally: + self.driver.grammar = self.grammar + tree.future_features = features + self.log_debug("Refactoring %s", name) + self.refactor_tree(tree, name) + return tree + + def refactor_stdin(self, doctests_only=False): + input = sys.stdin.read() + if doctests_only: + self.log_debug("Refactoring doctests in stdin") + output = self.refactor_docstring(input, "<stdin>") + if self.write_unchanged_files or output != input: + self.processed_file(output, "<stdin>", input) + else: + self.log_debug("No doctest changes in stdin") + else: + tree = self.refactor_string(input, "<stdin>") + if self.write_unchanged_files or (tree and tree.was_changed): + self.processed_file(unicode(tree), "<stdin>", input) + else: + self.log_debug("No changes in stdin") + + def refactor_tree(self, tree, name): + """Refactors a parse tree (modifying the tree in place). + + For compatible patterns the bottom matcher module is + used. Otherwise the tree is traversed node-to-node for + matches. + + Args: + tree: a pytree.Node instance representing the root of the tree + to be refactored. + name: a human-readable name for this tree. + + Returns: + True if the tree was modified, False otherwise. + """ + + for fixer in chain(self.pre_order, self.post_order): + fixer.start_tree(tree, name) + + #use traditional matching for the incompatible fixers + self.traverse_by(self.bmi_pre_order_heads, tree.pre_order()) + self.traverse_by(self.bmi_post_order_heads, tree.post_order()) + + # obtain a set of candidate nodes + match_set = self.BM.run(tree.leaves()) + + while any(match_set.values()): + for fixer in self.BM.fixers: + if fixer in match_set and match_set[fixer]: + #sort by depth; apply fixers from bottom(of the AST) to top + match_set[fixer].sort(key=pytree.Base.depth, reverse=True) + + if fixer.keep_line_order: + #some fixers(eg fix_imports) must be applied + #with the original file's line order + match_set[fixer].sort(key=pytree.Base.get_lineno) + + for node in list(match_set[fixer]): + if node in match_set[fixer]: + match_set[fixer].remove(node) + + try: + find_root(node) + except ValueError: + # this node has been cut off from a + # previous transformation ; skip + continue + + if node.fixers_applied and fixer in node.fixers_applied: + # do not apply the same fixer again + continue + + results = fixer.match(node) + + if results: + new = fixer.transform(node, results) + if new is not None: + node.replace(new) + #new.fixers_applied.append(fixer) + for node in new.post_order(): + # do not apply the fixer again to + # this or any subnode + if not node.fixers_applied: + node.fixers_applied = [] + node.fixers_applied.append(fixer) + + # update the original match set for + # the added code + new_matches = self.BM.run(new.leaves()) + for fxr in new_matches: + if not fxr in match_set: + match_set[fxr]=[] + + match_set[fxr].extend(new_matches[fxr]) + + for fixer in chain(self.pre_order, self.post_order): + fixer.finish_tree(tree, name) + return tree.was_changed + + def traverse_by(self, fixers, traversal): + """Traverse an AST, applying a set of fixers to each node. + + This is a helper method for refactor_tree(). + + Args: + fixers: a list of fixer instances. + traversal: a generator that yields AST nodes. + + Returns: + None + """ + if not fixers: + return + for node in traversal: + for fixer in fixers[node.type]: + results = fixer.match(node) + if results: + new = fixer.transform(node, results) + if new is not None: + node.replace(new) + node = new + + def processed_file(self, new_text, filename, old_text=None, write=False, + encoding=None): + """ + Called when a file has been refactored and there may be changes. + """ + self.files.append(filename) + if old_text is None: + old_text = self._read_python_source(filename)[0] + if old_text is None: + return + equal = old_text == new_text + self.print_output(old_text, new_text, filename, equal) + if equal: + self.log_debug("No changes to %s", filename) + if not self.write_unchanged_files: + return + if write: + self.write_file(new_text, filename, old_text, encoding) + else: + self.log_debug("Not writing changes to %s", filename) + + def write_file(self, new_text, filename, old_text, encoding=None): + """Writes a string to a file. + + It first shows a unified diff between the old text and the new text, and + then rewrites the file; the latter is only done if the write option is + set. + """ + try: + f = _open_with_encoding(filename, "w", encoding=encoding) + except os.error as err: + self.log_error("Can't create %s: %s", filename, err) + return + try: + f.write(_to_system_newlines(new_text)) + except os.error as err: + self.log_error("Can't write %s: %s", filename, err) + finally: + f.close() + self.log_debug("Wrote changes to %s", filename) + self.wrote = True + + PS1 = ">>> " + PS2 = "... " + + def refactor_docstring(self, input, filename): + """Refactors a docstring, looking for doctests. + + This returns a modified version of the input string. It looks + for doctests, which start with a ">>>" prompt, and may be + continued with "..." prompts, as long as the "..." is indented + the same as the ">>>". + + (Unfortunately we can't use the doctest module's parser, + since, like most parsers, it is not geared towards preserving + the original source.) + """ + result = [] + block = None + block_lineno = None + indent = None + lineno = 0 + for line in input.splitlines(True): + lineno += 1 + if line.lstrip().startswith(self.PS1): + if block is not None: + result.extend(self.refactor_doctest(block, block_lineno, + indent, filename)) + block_lineno = lineno + block = [line] + i = line.find(self.PS1) + indent = line[:i] + elif (indent is not None and + (line.startswith(indent + self.PS2) or + line == indent + self.PS2.rstrip() + u"\n")): + block.append(line) + else: + if block is not None: + result.extend(self.refactor_doctest(block, block_lineno, + indent, filename)) + block = None + indent = None + result.append(line) + if block is not None: + result.extend(self.refactor_doctest(block, block_lineno, + indent, filename)) + return u"".join(result) + + def refactor_doctest(self, block, lineno, indent, filename): + """Refactors one doctest. + + A doctest is given as a block of lines, the first of which starts + with ">>>" (possibly indented), while the remaining lines start + with "..." (identically indented). + + """ + try: + tree = self.parse_block(block, lineno, indent) + except Exception as err: + if self.logger.isEnabledFor(logging.DEBUG): + for line in block: + self.log_debug("Source: %s", line.rstrip(u"\n")) + self.log_error("Can't parse docstring in %s line %s: %s: %s", + filename, lineno, err.__class__.__name__, err) + return block + if self.refactor_tree(tree, filename): + new = unicode(tree).splitlines(True) + # Undo the adjustment of the line numbers in wrap_toks() below. + clipped, new = new[:lineno-1], new[lineno-1:] + assert clipped == [u"\n"] * (lineno-1), clipped + if not new[-1].endswith(u"\n"): + new[-1] += u"\n" + block = [indent + self.PS1 + new.pop(0)] + if new: + block += [indent + self.PS2 + line for line in new] + return block + + def summarize(self): + if self.wrote: + were = "were" + else: + were = "need to be" + if not self.files: + self.log_message("No files %s modified.", were) + else: + self.log_message("Files that %s modified:", were) + for file in self.files: + self.log_message(file) + if self.fixer_log: + self.log_message("Warnings/messages while refactoring:") + for message in self.fixer_log: + self.log_message(message) + if self.errors: + if len(self.errors) == 1: + self.log_message("There was 1 error:") + else: + self.log_message("There were %d errors:", len(self.errors)) + for msg, args, kwds in self.errors: + self.log_message(msg, *args, **kwds) + + def parse_block(self, block, lineno, indent): + """Parses a block into a tree. + + This is necessary to get correct line number / offset information + in the parser diagnostics and embedded into the parse tree. + """ + tree = self.driver.parse_tokens(self.wrap_toks(block, lineno, indent)) + tree.future_features = frozenset() + return tree + + def wrap_toks(self, block, lineno, indent): + """Wraps a tokenize stream to systematically modify start/end.""" + tokens = tokenize.generate_tokens(self.gen_lines(block, indent).next) + for type, value, (line0, col0), (line1, col1), line_text in tokens: + line0 += lineno - 1 + line1 += lineno - 1 + # Don't bother updating the columns; this is too complicated + # since line_text would also have to be updated and it would + # still break for tokens spanning lines. Let the user guess + # that the column numbers for doctests are relative to the + # end of the prompt string (PS1 or PS2). + yield type, value, (line0, col0), (line1, col1), line_text + + + def gen_lines(self, block, indent): + """Generates lines as expected by tokenize from a list of lines. + + This strips the first len(indent + self.PS1) characters off each line. + """ + prefix1 = indent + self.PS1 + prefix2 = indent + self.PS2 + prefix = prefix1 + for line in block: + if line.startswith(prefix): + yield line[len(prefix):] + elif line == prefix.rstrip() + u"\n": + yield u"\n" + else: + raise AssertionError("line=%r, prefix=%r" % (line, prefix)) + prefix = prefix2 + while True: + yield "" + + +class MultiprocessingUnsupported(Exception): + pass + + +class MultiprocessRefactoringTool(RefactoringTool): + + def __init__(self, *args, **kwargs): + super(MultiprocessRefactoringTool, self).__init__(*args, **kwargs) + self.queue = None + self.output_lock = None + + def refactor(self, items, write=False, doctests_only=False, + num_processes=1): + if num_processes == 1: + return super(MultiprocessRefactoringTool, self).refactor( + items, write, doctests_only) + try: + import multiprocessing + except ImportError: + raise MultiprocessingUnsupported + if self.queue is not None: + raise RuntimeError("already doing multiple processes") + self.queue = multiprocessing.JoinableQueue() + self.output_lock = multiprocessing.Lock() + processes = [multiprocessing.Process(target=self._child) + for i in xrange(num_processes)] + try: + for p in processes: + p.start() + super(MultiprocessRefactoringTool, self).refactor(items, write, + doctests_only) + finally: + self.queue.join() + for i in xrange(num_processes): + self.queue.put(None) + for p in processes: + if p.is_alive(): + p.join() + self.queue = None + + def _child(self): + task = self.queue.get() + while task is not None: + args, kwargs = task + try: + super(MultiprocessRefactoringTool, self).refactor_file( + *args, **kwargs) + finally: + self.queue.task_done() + task = self.queue.get() + + def refactor_file(self, *args, **kwargs): + if self.queue is not None: + self.queue.put((args, kwargs)) + else: + return super(MultiprocessRefactoringTool, self).refactor_file( + *args, **kwargs) |