summaryrefslogtreecommitdiff
path: root/scripting/build_tools/extract_docstrings.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripting/build_tools/extract_docstrings.py')
-rw-r--r--scripting/build_tools/extract_docstrings.py405
1 files changed, 405 insertions, 0 deletions
diff --git a/scripting/build_tools/extract_docstrings.py b/scripting/build_tools/extract_docstrings.py
new file mode 100644
index 0000000..103654e
--- /dev/null
+++ b/scripting/build_tools/extract_docstrings.py
@@ -0,0 +1,405 @@
+#!/usr/bin/env python
+"""Doxygen XML to SWIG docstring converter.
+
+Converts Doxygen generated XML files into a file containing docstrings
+that can be used by SWIG >1.3.23
+
+Usage:
+
+ extract-docstrings.py input_py_wrapper.py input_xml_dir output_directory
+
+input_py_wrapper.py is a swig generated file, with/without docstrings,
+ so we can get to know which classes are inspected by swig
+
+input_xml_dir is your doxygen generated XML directory
+
+output_directory is the directory where output will be written
+
+"""
+
+# This code is implemented using Mark Pilgrim's code as a guideline:
+# http://www.faqs.org/docs/diveintopython/kgp_divein.html
+# Based in doxy2swig.py
+# Author: Prabhu Ramachandran
+# License: BSD style
+
+
+from xml.dom import minidom
+import re
+import textwrap
+import sys
+import types
+import os.path
+
+
+def my_open_read(source):
+ if hasattr(source, "read"):
+ return source
+ else:
+ return open(source)
+
+def my_open_write(dest):
+ if hasattr(dest, "write"):
+ return dest
+ else:
+ return open(dest, 'w')
+
+
+class Doxy2SWIG:
+ """Converts Doxygen generated XML files into a file containing
+ docstrings that can be used by SWIG-1.3.x that have support for
+ feature("docstring"). Once the data is parsed it is stored in
+ self.pieces.
+
+ """
+
+ def __init__(self, src):
+ """Initialize the instance given a source object (file or
+ filename).
+
+ """
+ f = my_open_read(src)
+ self.my_dir = os.path.dirname(f.name)
+ self.xmldoc = minidom.parse(f).documentElement
+ f.close()
+
+ self.pieces = []
+ self.pieces.append('\n// File: %s\n'%\
+ os.path.basename(f.name))
+
+ self.space_re = re.compile(r'\s+')
+ self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)')
+ self.multi = 0
+ self.ignores = ('inheritancegraph', 'param', 'listofallmembers',
+ 'innerclass', 'name', 'declname', 'incdepgraph',
+ 'invincdepgraph', 'programlisting', 'type',
+ 'references', 'referencedby', 'location',
+ 'collaborationgraph', 'reimplements',
+ 'reimplementedby', 'derivedcompoundref',
+ 'basecompoundref')
+ #self.generics = []
+
+ def generate(self):
+ """Parses the file set in the initialization. The resulting
+ data is stored in `self.pieces`.
+
+ """
+ self.parse(self.xmldoc)
+
+ def parse(self, node):
+ """Parse a given node. This function in turn calls the
+ `parse_<nodeType>` functions which handle the respective
+ nodes.
+
+ """
+ pm = getattr(self, "parse_%s"%node.__class__.__name__)
+ pm(node)
+
+ def parse_Document(self, node):
+ self.parse(node.documentElement)
+
+ def parse_Text(self, node):
+ txt = node.data
+ txt = txt.replace('\\', r'\\\\')
+ txt = txt.replace('"', r'\"')
+ # ignore pure whitespace
+ m = self.space_re.match(txt)
+ if m and len(m.group()) == len(txt):
+ pass
+ else:
+ self.add_text(textwrap.fill(txt))
+
+ def parse_Element(self, node):
+ """Parse an `ELEMENT_NODE`. This calls specific
+ `do_<tagName>` handers for different elements. If no handler
+ is available the `generic_parse` method is called. All
+ tagNames specified in `self.ignores` are simply ignored.
+
+ """
+ name = node.tagName
+ ignores = self.ignores
+ if name in ignores:
+ return
+ attr = "do_%s" % name
+ if hasattr(self, attr):
+ handlerMethod = getattr(self, attr)
+ handlerMethod(node)
+ else:
+ self.generic_parse(node)
+ #if name not in self.generics: self.generics.append(name)
+
+ def add_text(self, value):
+ """Adds text corresponding to `value` into `self.pieces`."""
+ if type(value) in (types.ListType, types.TupleType):
+ self.pieces.extend(value)
+ else:
+ self.pieces.append(value)
+
+ def get_specific_nodes(self, node, names):
+ """Given a node and a sequence of strings in `names`, return a
+ dictionary containing the names as keys and child
+ `ELEMENT_NODEs`, that have a `tagName` equal to the name.
+
+ """
+ nodes = [(x.tagName, x) for x in node.childNodes \
+ if x.nodeType == x.ELEMENT_NODE and \
+ x.tagName in names]
+ return dict(nodes)
+
+ def generic_parse(self, node, pad=0):
+ """A Generic parser for arbitrary tags in a node.
+
+ Parameters:
+
+ - node: A node in the DOM.
+ - pad: `int` (default: 0)
+
+ If 0 the node data is not padded with newlines. If 1 it
+ appends a newline after parsing the childNodes. If 2 it
+ pads before and after the nodes are processed. Defaults to
+ 0.
+
+ """
+ npiece = 0
+ if pad:
+ npiece = len(self.pieces)
+ if pad == 2:
+ self.add_text('\n')
+ for n in node.childNodes:
+ self.parse(n)
+ if pad:
+ if len(self.pieces) > npiece:
+ self.add_text('\n')
+
+ def space_parse(self, node):
+ self.add_text(' ')
+ self.generic_parse(node)
+
+ do_ref = space_parse
+ do_emphasis = space_parse
+ do_bold = space_parse
+ do_computeroutput = space_parse
+ do_formula = space_parse
+
+ def do_compoundname(self, node):
+ self.add_text('\n\n')
+ data = node.firstChild.data
+ self.add_text('%%feature("docstring") %s "\n'%data)
+
+ def do_compounddef(self, node):
+ kind = node.attributes['kind'].value
+ if kind in ('class', 'struct'):
+ prot = node.attributes['prot'].value
+ if prot <> 'public':
+ return
+ names = ('compoundname', 'briefdescription',
+ 'detaileddescription', 'includes')
+ first = self.get_specific_nodes(node, names)
+ for n in names:
+ if first.has_key(n):
+ self.parse(first[n])
+ self.add_text(['";','\n'])
+ for n in node.childNodes:
+ if n not in first.values():
+ self.parse(n)
+ elif kind in ('file', 'namespace'):
+ nodes = node.getElementsByTagName('sectiondef')
+ for n in nodes:
+ self.parse(n)
+
+ def do_includes(self, node):
+ self.add_text('C++ includes: ')
+ self.generic_parse(node, pad=1)
+
+ def do_parameterlist(self, node):
+ self.add_text(['\n', '\n', 'Parameters:', '\n'])
+ self.generic_parse(node, pad=1)
+
+ def do_para(self, node):
+ self.add_text('\n')
+ self.generic_parse(node, pad=1)
+
+ def do_parametername(self, node):
+ self.add_text('\n')
+ try:
+ self.add_text("%s: "%node.firstChild.data)
+ except AttributeError:
+ self.add_text("???: ")
+
+ def do_parameterdefinition(self, node):
+ self.generic_parse(node, pad=1)
+
+ def do_detaileddescription(self, node):
+ self.generic_parse(node, pad=1)
+
+ def do_briefdescription(self, node):
+ self.generic_parse(node, pad=1)
+
+ def do_memberdef(self, node):
+ prot = node.attributes['prot'].value
+ id = node.attributes['id'].value
+ kind = node.attributes['kind'].value
+ tmp = node.parentNode.parentNode.parentNode
+ compdef = tmp.getElementsByTagName('compounddef')[0]
+ cdef_kind = compdef.attributes['kind'].value
+
+ if prot == 'public':
+ first = self.get_specific_nodes(node, ('definition', 'name'))
+ name = first['name'].firstChild.data
+ if name[:8] == 'operator': # Don't handle operators yet.
+ return
+
+ defn = first['definition'].firstChild.data
+ self.add_text('\n')
+ self.add_text('%feature("docstring") ')
+
+ anc = node.parentNode.parentNode
+ if cdef_kind in ('file', 'namespace'):
+ ns_node = anc.getElementsByTagName('innernamespace')
+ if not ns_node and cdef_kind == 'namespace':
+ ns_node = anc.getElementsByTagName('compoundname')
+ if ns_node:
+ ns = ns_node[0].firstChild.data
+ self.add_text(' %s::%s "\n%s'%(ns, name, defn))
+ else:
+ self.add_text(' %s "\n%s'%(name, defn))
+ elif cdef_kind in ('class', 'struct'):
+ # Get the full function name.
+ anc_node = anc.getElementsByTagName('compoundname')
+ cname = anc_node[0].firstChild.data
+ self.add_text(' %s::%s "\n%s'%(cname, name, defn))
+
+
+ for n in node.childNodes:
+ if n not in first.values():
+ self.parse(n)
+ self.add_text(['";', '\n'])
+
+ def do_definition(self, node):
+ data = node.firstChild.data
+ self.add_text('%s "\n%s'%(data, data))
+
+ def do_sectiondef(self, node):
+ kind = node.attributes['kind'].value
+ if kind in ('public-func', 'func'):
+ self.generic_parse(node)
+
+ def do_simplesect(self, node):
+ kind = node.attributes['kind'].value
+ if kind in ('date', 'rcs', 'version'):
+ pass
+ elif kind == 'warning':
+ self.add_text(['\n', 'WARNING: '])
+ self.generic_parse(node)
+ elif kind == 'see':
+ self.add_text('\n')
+ self.add_text('See: ')
+ self.generic_parse(node)
+ else:
+ self.generic_parse(node)
+
+ def do_argsstring(self, node):
+ self.generic_parse(node, pad=1)
+
+ def do_member(self, node):
+ kind = node.attributes['kind'].value
+ refid = node.attributes['refid'].value
+ if kind == 'function' and refid[:9] == 'namespace':
+ self.generic_parse(node)
+
+ def do_doxygenindex(self, node):
+ self.multi = 1
+ comps = node.getElementsByTagName('compound')
+ for c in comps:
+ refid = c.attributes['refid'].value
+ fname = refid + '.xml'
+ if not os.path.exists(fname):
+ fname = os.path.join(self.my_dir, fname)
+ print "parsing file: %s"%fname
+ p = Doxy2SWIG(fname)
+ p.generate()
+ self.pieces.extend(self.clean_pieces(p.pieces))
+
+ def write(self, fname):
+ o = my_open_write(fname)
+ if self.multi:
+ o.write("".join(self.pieces))
+ else:
+ o.write("".join(self.clean_pieces(self.pieces)))
+ o.close()
+
+ def clean_pieces(self, pieces):
+ """Cleans the list of strings given as `pieces`. It replaces
+ multiple newlines by a maximum of 2 and returns a new list.
+ It also wraps the paragraphs nicely.
+
+ """
+ ret = []
+ count = 0
+ for i in pieces:
+ if i == '\n':
+ count = count + 1
+ else:
+ if i == '";':
+ if count:
+ ret.append('\n')
+ elif count > 2:
+ ret.append('\n\n')
+ elif count:
+ ret.append('\n'*count)
+ count = 0
+ ret.append(i)
+
+ _data = "".join(ret)
+ ret = []
+ for i in _data.split('\n\n'):
+ if i == 'Parameters:':
+ ret.extend(['Parameters:\n-----------', '\n\n'])
+ elif i.find('// File:') > -1: # leave comments alone.
+ ret.extend([i, '\n'])
+ else:
+ _tmp = textwrap.fill(i.strip())
+ _tmp = self.lead_spc.sub(r'\1"\2', _tmp)
+ ret.extend([_tmp, '\n\n'])
+ return ret
+
+
+def get_python_classes(input_py):
+ with open(input_py) as f:
+ data = f.read()
+ classes_supers = re.findall(r'class[ ]+([\w_]+)(\([\w_, ]+\))?:',data)
+ classes = (classname for classname,superclass in classes_supers)
+ return classes
+ return []
+
+def main(input_py, input_xml, output_dir):
+
+ classes = get_python_classes(input_py)
+
+ with file("%s/docstrings.i"%output_dir,'w') as f_index:
+
+ for classname in classes:
+
+
+ class_file = "%s/class%s.xml"%(input_xml,classname.replace("_","__"))
+ swig_file = "%s/%s.i"%(output_dir,classname.lower())
+
+ if os.path.isfile(class_file):
+ print "processing:",class_file," ->",swig_file
+ p = Doxy2SWIG(class_file)
+ p.generate()
+ p.write(swig_file)
+ f_index.write('%%include "%s.i"\n'% classname.lower())
+ #else:
+ # print "ignoring class %s, as %s does not exist" %(classname,class_file)
+
+
+
+
+
+if __name__ == '__main__':
+ print sys.argv
+ if len(sys.argv) != 4:
+ print __doc__
+ sys.exit(1)
+ main(sys.argv[1], sys.argv[2],sys.argv[3])