1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
|
from __future__ import print_function
import re
from errors import HieroglyphError
from nodes import (Node, Raises, Except, Note, Warning, Returns, Arg,
ensure_terminal_blank)
__author__ = 'Robert Smallshire'
def parse_hieroglyph_text(lines):
'''Parse text in hieroglyph format and return a reStructuredText equivalent
Args:
lines: A sequence of strings representing the lines of a single
docstring as read from the source by Sphinx. This string should be
in a format that can be parsed by hieroglyph.
Returns:
A list of lines containing the transformed docstring as
reStructuredText as produced by hieroglyph.
Raises:
RuntimeError: If the docstring cannot be parsed.
'''
indent_lines = unindent(lines)
indent_lines = pad_blank_lines(indent_lines)
indent_lines = first_paragraph_indent(indent_lines)
indent_paragraphs = gather_lines(indent_lines)
parse_tree = group_paragraphs(indent_paragraphs)
syntax_tree = extract_structure(parse_tree)
result = syntax_tree.render_rst()
ensure_terminal_blank(result)
return result
def unindent(lines):
'''Convert an iterable of indented lines into a sequence of tuples.
The first element of each tuple is the indent in number of characters, and
the second element is the unindented string.
Args:
lines: A sequence of strings representing the lines of text in a docstring.
Returns:
A list of tuples where each tuple corresponds to one line of the input
list. Each tuple has two entries - the first is an integer giving the
size of the indent in characters, the second is the unindented text.
'''
unindented_lines = []
for line in lines:
unindented_line = line.lstrip()
indent = len(line) - len(unindented_line)
unindented_lines.append((indent, unindented_line))
return unindented_lines
def pad_blank_lines(indent_texts):
'''Give blank (empty) lines the same indent level as the preceding line.
Args:
indent_texts: An iterable of tuples each containing an integer in the
first element and a string in the second element.
Returns:
A list of tuples each containing an integer in the first element and a
string in the second element.
'''
current_indent = 0
result = []
for indent, text in indent_texts:
if len(text) > 0:
current_indent = indent
result.append((current_indent, text))
return result
def extract_structure(parse_tree):
'''Create an Abstract Syntax Tree representing the semantics of a parse tree.
Args:
parse_tree: TODO
Returns:
A Node with is the result of an Abstract Syntax Tree representing the
docstring.
Raises:
HieroglyphError: In the event that the parse tree cannot be understood.
'''
return convert_node(parse_tree)
def convert_node(node):
if node.indent == 0 and len(node.lines) == 0:
return convert_children(node)
if node.lines[0].startswith('Args:'):
return convert_args(node)
if node.lines[0].startswith('Returns:'):
return convert_returns(node)
if node.lines[0].startswith('Raises:'):
return convert_raises(node)
if node.lines[0].startswith('Note:'):
return convert_note(node)
if node.lines[0].startswith('Warning:'):
return convert_warning(node)
result = convert_children(node)
result.lines = node.lines
result.indent = node.indent
return result
def convert_children(node):
converted_children = [convert_node(child) for child in node.children]
result = Node()
result.children = converted_children
return result
ARG_REGEX = re.compile(r'(\*{0,2}\w+)(\s+\((\w+)\))?\s*:\s*(.*)')
def append_child_to_args_group_node(child, group_node, indent):
arg = None
non_empty_lines = (line for line in child.lines if line)
for line in non_empty_lines:
m = ARG_REGEX.match(line)
if m is None:
raise HieroglyphError("Invalid hieroglyph argument syntax: {0}".format(line))
param_name = m.group(1)
param_type = m.group(3)
param_text = m.group(4)
arg = Arg(indent, child.indent, param_name)
group_node.children.append(arg)
arg.type = param_type
if param_text is not None:
arg.children.append(Node(indent, [param_text], arg))
if arg is not None:
last_child = arg.children[-1] if len(arg.children) != 0 else arg
for grandchild in child.children:
last_child.children.append(grandchild)
def convert_args(node):
assert node.lines[0].startswith('Args:')
group_node = Node()
for child in node.children:
append_child_to_args_group_node(child, group_node, node.indent)
return group_node
def convert_returns(node):
assert node.lines[0].startswith('Returns:')
returns = Returns(node.indent)
returns.line = node.lines[0][8:].strip()
returns.children = node.children
return returns
def convert_note(node):
assert node.lines[0].startswith('Note:')
note = Note(node.indent)
note.line = node.lines[0][5:].strip()
note.children = node.children
return note
def convert_warning(node):
assert node.lines[0].startswith('Warning:')
warning = Warning(node.indent)
warning.line = node.lines[0][8:].strip()
warning.children = node.children
return warning
def convert_raises(node):
assert node.lines[0].startswith('Raises:')
group_node = Raises(node.indent)
for child in node.children:
append_child_to_raise_node(child, group_node)
return group_node
RAISE_REGEX = re.compile(r'(\w+)\s*:\s*(.*)')
def extract_exception_type_and_text(line):
m = RAISE_REGEX.match(line)
if m is None:
raise HieroglyphError("Invalid hieroglyph exception syntax: {0}".format(line))
return (m.group(2), m.group(1))
def append_child_to_raise_node(child, group_node):
exception = None
non_empty_lines = (line for line in child.lines if line)
for line in non_empty_lines:
exception_text, exception_type = extract_exception_type_and_text(line)
exception = Except(child.indent, exception_type)
group_node.children.append(exception) # TODO: Could use parent here.
if exception_text is not None:
exception.children.append( Node(child.indent,
[exception_text], exception))
if exception is not None:
last_child = exception.children[-1] if len(exception.children) != 0 else exception
for grandchild in child.children:
last_child.children.append(grandchild)
def group_paragraphs(indent_paragraphs):
'''
Group paragraphs so that more indented paragraphs become children of less
indented paragraphs.
'''
# The tree consists of tuples of the form (indent, [children]) where the
# children may be strings or other tuples
root = Node(0, [], None)
current_node = root
previous_indent = -1
for indent, lines in indent_paragraphs:
if indent > previous_indent:
current_node = create_child_node(current_node, indent, lines)
elif indent == previous_indent:
current_node = create_sibling_node(current_node, indent, lines)
elif indent < previous_indent:
current_node = create_uncle_node(current_node, indent, lines)
previous_indent = indent
return root
def create_sibling_node(current_node, indent, lines):
sibling = Node(indent, lines, current_node.parent)
current_node.parent.add_child(sibling)
current_node = sibling
return current_node
def create_child_node(current_node, indent, lines):
child = Node(indent, lines, current_node)
current_node.add_child(child)
current_node = child
return current_node
def create_uncle_node(current_node, indent, lines):
ancestor = current_node
while ancestor.indent >= indent:
if ancestor.parent is None:
break
ancestor = ancestor.parent
uncle = Node(indent, lines, ancestor)
ancestor.add_child(uncle)
current_node = uncle
return current_node
def gather_lines(indent_lines):
'''Split the list of (int, str) tuples into a list of (int, [str]) tuples
to group the lines into paragraphs of consistent indent.
'''
return remove_empty_paragraphs(split_separated_lines(gather_lines_by_indent(indent_lines)))
def gather_lines_by_indent(indent_lines):
result = []
previous_indent = -1
for indent, line in indent_lines:
if indent != previous_indent:
paragraph = (indent, [])
result.append(paragraph)
else:
paragraph = result[-1]
paragraph[1].append(line)
previous_indent = indent
return result
def split_separated_lines(indent_paragraphs):
result = []
for indent, paragraph in indent_paragraphs:
result.append((indent, []))
if len(paragraph) > 0:
result[-1][1].append(paragraph[0])
if len(paragraph) > 2:
for line in paragraph[1: -1]:
result[-1][1].append(line)
if len(line) == 0:
result.append((indent, []))
if len(paragraph) > 1:
result[-1][1].append(paragraph[-1])
return result
def remove_empty_paragraphs(indent_paragraphs):
return [(indent, paragraph) for indent, paragraph in indent_paragraphs if len(paragraph)]
def first_paragraph_indent(indent_texts):
'''Fix the indentation on the first paragraph.
This occurs because the first line of a multi-line docstring following the
opening quote usually has no indent.
Args:
indent_texts: The lines of the docstring as an iterable over 2-tuples
each containing an integer indent level as the first element and
the text as the second element.
Return:
A list of 2-tuples, each containing an integer indent level as the
first element and the text as the second element.
'''
opening_indent = determine_opening_indent(indent_texts)
result = []
input = iter(indent_texts)
for indent, text in input:
if indent == 0:
result.append((opening_indent, text))
else:
result.append((indent, text))
break
for indent, text in input:
result.append((indent, text))
return result
def determine_opening_indent(indent_texts):
'''Determine the opening indent level for a docstring.
The opening indent level is the indent level is the first non-zero indent
level of a non-empty line in the docstring.
Args:
indent_texts: The lines of the docstring as an iterable over 2-tuples
each containing an integer indent level as the first element and
the text as the second element.
Returns:
The opening indent level as an integer.
'''
num_lines = len(indent_texts)
if num_lines < 1:
return 0
assert num_lines >= 1
first_line_indent = indent_texts[0][0]
if num_lines == 1:
return first_line_indent
assert num_lines >= 2
second_line_indent = indent_texts[1][0]
second_line_text = indent_texts[1][1]
if len(second_line_text) == 0:
return first_line_indent
return second_line_indent
def rewrite_autodoc(app, what, name, obj, options, lines):
'''Convert lines from Hieroglyph to Sphinx format.
The function to be called by the Sphinx autodoc extension when autodoc
has read and processed a docstring. This function modified its
``lines`` argument *in place* replacing Hieroglyph syntax input into
Sphinx reStructuredText output.
Args:
apps: The Sphinx application object.
what: The type of object which the docstring belongs to. One of
'module', 'class', 'exception', 'function', 'method', 'attribute'
name: The fully qualified name of the object.
obj: The object itself.
options: The options given to the directive. An object with attributes
``inherited_members``, ``undoc_members``, ``show_inheritance`` and
``noindex`` that are ``True`` if the flag option of the same name
was given to the auto directive.
lines: The lines of the docstring. Will be modified *in place*.
'''
lines[:] = parse_hieroglyph_text(lines)
def setup(app):
app.connect('autodoc-process-docstring', rewrite_autodoc)
|