diff options
Diffstat (limited to 'docs/sphinx/hieroglyph/hieroglyph.py')
-rw-r--r-- | docs/sphinx/hieroglyph/hieroglyph.py | 404 |
1 files changed, 404 insertions, 0 deletions
diff --git a/docs/sphinx/hieroglyph/hieroglyph.py b/docs/sphinx/hieroglyph/hieroglyph.py new file mode 100644 index 000000000..0056d9ab8 --- /dev/null +++ b/docs/sphinx/hieroglyph/hieroglyph.py @@ -0,0 +1,404 @@ +from __future__ import print_function + +import re + +from errors import HieroglyphError +from nodes import (Node, Raises, Except, Note, Warning, Returns, Arg, + ensure_terminal_blank) + +__author__ = 'Robert Smallshire' + +def parse_hieroglyph_text(lines): + '''Parse text in hieroglyph format and return a reStructuredText equivalent + + Args: + lines: A sequence of strings representing the lines of a single + docstring as read from the source by Sphinx. This string should be + in a format that can be parsed by hieroglyph. + + Returns: + A list of lines containing the transformed docstring as + reStructuredText as produced by hieroglyph. + + Raises: + RuntimeError: If the docstring cannot be parsed. + ''' + indent_lines = unindent(lines) + indent_lines = pad_blank_lines(indent_lines) + indent_lines = first_paragraph_indent(indent_lines) + indent_paragraphs = gather_lines(indent_lines) + parse_tree = group_paragraphs(indent_paragraphs) + syntax_tree = extract_structure(parse_tree) + result = syntax_tree.render_rst() + ensure_terminal_blank(result) + return result + + +def unindent(lines): + '''Convert an iterable of indented lines into a sequence of tuples. + + The first element of each tuple is the indent in number of characters, and + the second element is the unindented string. + + Args: + lines: A sequence of strings representing the lines of text in a docstring. + + Returns: + A list of tuples where each tuple corresponds to one line of the input + list. Each tuple has two entries - the first is an integer giving the + size of the indent in characters, the second is the unindented text. + ''' + unindented_lines = [] + for line in lines: + unindented_line = line.lstrip() + indent = len(line) - len(unindented_line) + unindented_lines.append((indent, unindented_line)) + return unindented_lines + + +def pad_blank_lines(indent_texts): + '''Give blank (empty) lines the same indent level as the preceding line. + + Args: + indent_texts: An iterable of tuples each containing an integer in the + first element and a string in the second element. + + Returns: + A list of tuples each containing an integer in the first element and a + string in the second element. + ''' + current_indent = 0 + result = [] + for indent, text in indent_texts: + if len(text) > 0: + current_indent = indent + result.append((current_indent, text)) + return result + + +def extract_structure(parse_tree): + '''Create an Abstract Syntax Tree representing the semantics of a parse tree. + + Args: + parse_tree: TODO + + Returns: + A Node with is the result of an Abstract Syntax Tree representing the + docstring. + + Raises: + HieroglyphError: In the event that the parse tree cannot be understood. + ''' + return convert_node(parse_tree) + + +def convert_node(node): + if node.indent == 0 and len(node.lines) == 0: + return convert_children(node) + if node.lines[0].startswith('Args:'): + return convert_args(node) + if node.lines[0].startswith('Returns:'): + return convert_returns(node) + if node.lines[0].startswith('Raises:'): + return convert_raises(node) + if node.lines[0].startswith('Note:'): + return convert_note(node) + if node.lines[0].startswith('Warning:'): + return convert_warning(node) + result = convert_children(node) + result.lines = node.lines + result.indent = node.indent + return result + + +def convert_children(node): + converted_children = [convert_node(child) for child in node.children] + result = Node() + result.children = converted_children + return result + + +ARG_REGEX = re.compile(r'(\*{0,2}\w+)(\s+\((\w+)\))?\s*:\s*(.*)') + +def append_child_to_args_group_node(child, group_node, indent): + arg = None + non_empty_lines = (line for line in child.lines if line) + for line in non_empty_lines: + m = ARG_REGEX.match(line) + if m is None: + raise HieroglyphError("Invalid hieroglyph argument syntax: {0}".format(line)) + param_name = m.group(1) + param_type = m.group(3) + param_text = m.group(4) + + arg = Arg(indent, child.indent, param_name) + group_node.children.append(arg) + arg.type = param_type + + if param_text is not None: + arg.children.append(Node(indent, [param_text], arg)) + if arg is not None: + last_child = arg.children[-1] if len(arg.children) != 0 else arg + for grandchild in child.children: + last_child.children.append(grandchild) + + +def convert_args(node): + assert node.lines[0].startswith('Args:') + group_node = Node() + for child in node.children: + append_child_to_args_group_node(child, group_node, node.indent) + return group_node + + +def convert_returns(node): + assert node.lines[0].startswith('Returns:') + returns = Returns(node.indent) + returns.line = node.lines[0][8:].strip() + returns.children = node.children + return returns + + +def convert_note(node): + assert node.lines[0].startswith('Note:') + note = Note(node.indent) + note.line = node.lines[0][5:].strip() + note.children = node.children + return note + + +def convert_warning(node): + assert node.lines[0].startswith('Warning:') + warning = Warning(node.indent) + warning.line = node.lines[0][8:].strip() + warning.children = node.children + return warning + + +def convert_raises(node): + assert node.lines[0].startswith('Raises:') + group_node = Raises(node.indent) + for child in node.children: + append_child_to_raise_node(child, group_node) + return group_node + + +RAISE_REGEX = re.compile(r'(\w+)\s*:\s*(.*)') + +def extract_exception_type_and_text(line): + m = RAISE_REGEX.match(line) + if m is None: + raise HieroglyphError("Invalid hieroglyph exception syntax: {0}".format(line)) + return (m.group(2), m.group(1)) + + +def append_child_to_raise_node(child, group_node): + exception = None + non_empty_lines = (line for line in child.lines if line) + for line in non_empty_lines: + exception_text, exception_type = extract_exception_type_and_text(line) + + exception = Except(child.indent, exception_type) + group_node.children.append(exception) # TODO: Could use parent here. + + if exception_text is not None: + exception.children.append( Node(child.indent, + [exception_text], exception)) + if exception is not None: + last_child = exception.children[-1] if len(exception.children) != 0 else exception + for grandchild in child.children: + last_child.children.append(grandchild) + + +def group_paragraphs(indent_paragraphs): + ''' + Group paragraphs so that more indented paragraphs become children of less + indented paragraphs. + ''' + # The tree consists of tuples of the form (indent, [children]) where the + # children may be strings or other tuples + + root = Node(0, [], None) + current_node = root + + previous_indent = -1 + for indent, lines in indent_paragraphs: + if indent > previous_indent: + current_node = create_child_node(current_node, indent, lines) + elif indent == previous_indent: + current_node = create_sibling_node(current_node, indent, lines) + elif indent < previous_indent: + current_node = create_uncle_node(current_node, indent, lines) + previous_indent = indent + return root + + +def create_sibling_node(current_node, indent, lines): + sibling = Node(indent, lines, current_node.parent) + current_node.parent.add_child(sibling) + current_node = sibling + return current_node + + +def create_child_node(current_node, indent, lines): + child = Node(indent, lines, current_node) + current_node.add_child(child) + current_node = child + return current_node + + +def create_uncle_node(current_node, indent, lines): + ancestor = current_node + while ancestor.indent >= indent: + if ancestor.parent is None: + break + ancestor = ancestor.parent + uncle = Node(indent, lines, ancestor) + ancestor.add_child(uncle) + current_node = uncle + return current_node + + +def gather_lines(indent_lines): + '''Split the list of (int, str) tuples into a list of (int, [str]) tuples + to group the lines into paragraphs of consistent indent. + ''' + return remove_empty_paragraphs(split_separated_lines(gather_lines_by_indent(indent_lines))) + +def gather_lines_by_indent(indent_lines): + result = [] + previous_indent = -1 + for indent, line in indent_lines: + if indent != previous_indent: + paragraph = (indent, []) + result.append(paragraph) + else: + paragraph = result[-1] + paragraph[1].append(line) + previous_indent = indent + return result + +def split_separated_lines(indent_paragraphs): + result = [] + for indent, paragraph in indent_paragraphs: + result.append((indent, [])) + + if len(paragraph) > 0: + result[-1][1].append(paragraph[0]) + + if len(paragraph) > 2: + for line in paragraph[1: -1]: + result[-1][1].append(line) + if len(line) == 0: + result.append((indent, [])) + + if len(paragraph) > 1: + result[-1][1].append(paragraph[-1]) + + return result + +def remove_empty_paragraphs(indent_paragraphs): + return [(indent, paragraph) for indent, paragraph in indent_paragraphs if len(paragraph)] + +def first_paragraph_indent(indent_texts): + '''Fix the indentation on the first paragraph. + + This occurs because the first line of a multi-line docstring following the + opening quote usually has no indent. + + Args: + indent_texts: The lines of the docstring as an iterable over 2-tuples + each containing an integer indent level as the first element and + the text as the second element. + + Return: + A list of 2-tuples, each containing an integer indent level as the + first element and the text as the second element. + ''' + opening_indent = determine_opening_indent(indent_texts) + + result = [] + input = iter(indent_texts) + for indent, text in input: + if indent == 0: + result.append((opening_indent, text)) + else: + result.append((indent, text)) + break + + for indent, text in input: + result.append((indent, text)) + + return result + + +def determine_opening_indent(indent_texts): + '''Determine the opening indent level for a docstring. + + The opening indent level is the indent level is the first non-zero indent + level of a non-empty line in the docstring. + + Args: + indent_texts: The lines of the docstring as an iterable over 2-tuples + each containing an integer indent level as the first element and + the text as the second element. + + Returns: + The opening indent level as an integer. + ''' + num_lines = len(indent_texts) + + if num_lines < 1: + return 0 + + assert num_lines >= 1 + + first_line_indent = indent_texts[0][0] + + if num_lines == 1: + return first_line_indent + + assert num_lines >= 2 + + second_line_indent = indent_texts[1][0] + second_line_text = indent_texts[1][1] + + if len(second_line_text) == 0: + return first_line_indent + + return second_line_indent + + + +def rewrite_autodoc(app, what, name, obj, options, lines): + '''Convert lines from Hieroglyph to Sphinx format. + + The function to be called by the Sphinx autodoc extension when autodoc + has read and processed a docstring. This function modified its + ``lines`` argument *in place* replacing Hieroglyph syntax input into + Sphinx reStructuredText output. + + Args: + apps: The Sphinx application object. + + what: The type of object which the docstring belongs to. One of + 'module', 'class', 'exception', 'function', 'method', 'attribute' + + name: The fully qualified name of the object. + + obj: The object itself. + + options: The options given to the directive. An object with attributes + ``inherited_members``, ``undoc_members``, ``show_inheritance`` and + ``noindex`` that are ``True`` if the flag option of the same name + was given to the auto directive. + + lines: The lines of the docstring. Will be modified *in place*. + ''' + lines[:] = parse_hieroglyph_text(lines) + + +def setup(app): + app.connect('autodoc-process-docstring', rewrite_autodoc) + + |