1 files changed, 405 insertions, 0 deletions
diff --git a/scripting/build_tools/extract_docstrings.py b/scripting/build_tools/extract_docstrings.py
new file mode 100644
index 0000000..103654e
--- /dev/null
+++ b/scripting/build_tools/extract_docstrings.py
@@ -0,0 +1,405 @@
+#!/usr/bin/env python
+"""Doxygen XML to SWIG docstring converter.
+
+Converts Doxygen generated XML files into a file containing docstrings
+that can be used by SWIG >1.3.23
+
+Usage:
+
+  extract-docstrings.py input_py_wrapper.py input_xml_dir output_directory
+
+input_py_wrapper.py is a swig generated file, with/without docstrings, 
+                    so we can get to know which classes are inspected by swig
+                    
+input_xml_dir is your doxygen generated XML directory 
+
+output_directory is the directory where output will be written
+
+"""
+
+# This code is implemented using Mark Pilgrim's code as a guideline:
+#   http://www.faqs.org/docs/diveintopython/kgp_divein.html
+# Based in doxy2swig.py
+# Author: Prabhu Ramachandran
+# License: BSD style
+
+
+from xml.dom import minidom
+import re
+import textwrap
+import sys
+import types
+import os.path
+
+
+def my_open_read(source):
+    if hasattr(source, "read"):
+        return source
+    else:
+        return open(source)
+
+def my_open_write(dest):
+    if hasattr(dest, "write"):
+        return dest
+    else:
+        return open(dest, 'w')
+
+
+class Doxy2SWIG:    
+    """Converts Doxygen generated XML files into a file containing
+    docstrings that can be used by SWIG-1.3.x that have support for
+    feature("docstring").  Once the data is parsed it is stored in
+    self.pieces.
+
+    """    
+    
+    def __init__(self, src):
+        """Initialize the instance given a source object (file or
+        filename).
+
+        """
+        f = my_open_read(src)
+        self.my_dir = os.path.dirname(f.name)
+        self.xmldoc = minidom.parse(f).documentElement
+        f.close()
+
+        self.pieces = []
+        self.pieces.append('\n// File: %s\n'%\
+                           os.path.basename(f.name))
+
+        self.space_re = re.compile(r'\s+')
+        self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)')
+        self.multi = 0
+        self.ignores = ('inheritancegraph', 'param', 'listofallmembers',
+                        'innerclass', 'name', 'declname', 'incdepgraph',
+                        'invincdepgraph', 'programlisting', 'type',
+                        'references', 'referencedby', 'location',
+                        'collaborationgraph', 'reimplements',
+                        'reimplementedby', 'derivedcompoundref',
+                        'basecompoundref')
+        #self.generics = []
+        
+    def generate(self):
+        """Parses the file set in the initialization.  The resulting
+        data is stored in `self.pieces`.
+
+        """
+        self.parse(self.xmldoc)
+    
+    def parse(self, node):
+        """Parse a given node.  This function in turn calls the
+        `parse_<nodeType>` functions which handle the respective
+        nodes.
+
+        """
+        pm = getattr(self, "parse_%s"%node.__class__.__name__)
+        pm(node)
+
+    def parse_Document(self, node):
+        self.parse(node.documentElement)
+
+    def parse_Text(self, node):
+        txt = node.data
+        txt = txt.replace('\\', r'\\\\')
+        txt = txt.replace('"', r'\"')
+        # ignore pure whitespace
+        m = self.space_re.match(txt)
+        if m and len(m.group()) == len(txt):
+            pass
+        else:
+            self.add_text(textwrap.fill(txt))
+
+    def parse_Element(self, node):
+        """Parse an `ELEMENT_NODE`.  This calls specific
+        `do_<tagName>` handers for different elements.  If no handler
+        is available the `generic_parse` method is called.  All
+        tagNames specified in `self.ignores` are simply ignored.
+        
+        """
+        name = node.tagName
+        ignores = self.ignores
+        if name in ignores:
+            return
+        attr = "do_%s" % name
+        if hasattr(self, attr):
+            handlerMethod = getattr(self, attr)
+            handlerMethod(node)
+        else:
+            self.generic_parse(node)
+            #if name not in self.generics: self.generics.append(name)
+
+    def add_text(self, value):
+        """Adds text corresponding to `value` into `self.pieces`."""
+        if type(value) in (types.ListType, types.TupleType):
+            self.pieces.extend(value)
+        else:
+            self.pieces.append(value)
+
+    def get_specific_nodes(self, node, names):
+        """Given a node and a sequence of strings in `names`, return a
+        dictionary containing the names as keys and child
+        `ELEMENT_NODEs`, that have a `tagName` equal to the name.
+
+        """
+        nodes = [(x.tagName, x) for x in node.childNodes \
+                 if x.nodeType == x.ELEMENT_NODE and \
+                 x.tagName in names]
+        return dict(nodes)
+
+    def generic_parse(self, node, pad=0):
+        """A Generic parser for arbitrary tags in a node.
+
+        Parameters:
+
+         - node:  A node in the DOM.
+         - pad: `int` (default: 0)
+
+           If 0 the node data is not padded with newlines.  If 1 it
+           appends a newline after parsing the childNodes.  If 2 it
+           pads before and after the nodes are processed.  Defaults to
+           0.
+
+        """
+        npiece = 0
+        if pad:
+            npiece = len(self.pieces)
+            if pad == 2:
+                self.add_text('\n')                
+        for n in node.childNodes:
+            self.parse(n)
+        if pad:
+            if len(self.pieces) > npiece:
+                self.add_text('\n')
+
+    def space_parse(self, node):
+        self.add_text(' ')
+        self.generic_parse(node)
+
+    do_ref = space_parse
+    do_emphasis = space_parse
+    do_bold = space_parse
+    do_computeroutput = space_parse
+    do_formula = space_parse
+
+    def do_compoundname(self, node):
+        self.add_text('\n\n')
+        data = node.firstChild.data
+        self.add_text('%%feature("docstring") %s "\n'%data)
+
+    def do_compounddef(self, node):
+        kind = node.attributes['kind'].value
+        if kind in ('class', 'struct'):
+            prot = node.attributes['prot'].value
+            if prot <> 'public':
+                return
+            names = ('compoundname', 'briefdescription',
+                     'detaileddescription', 'includes')
+            first = self.get_specific_nodes(node, names)
+            for n in names:
+                if first.has_key(n):
+                    self.parse(first[n])
+            self.add_text(['";','\n'])
+            for n in node.childNodes:
+                if n not in first.values():
+                    self.parse(n)
+        elif kind in ('file', 'namespace'):
+            nodes = node.getElementsByTagName('sectiondef')
+            for n in nodes:
+                self.parse(n)
+
+    def do_includes(self, node):
+        self.add_text('C++ includes: ')
+        self.generic_parse(node, pad=1)
+
+    def do_parameterlist(self, node):
+        self.add_text(['\n', '\n', 'Parameters:', '\n'])
+        self.generic_parse(node, pad=1)
+
+    def do_para(self, node):
+        self.add_text('\n')
+        self.generic_parse(node, pad=1)
+
+    def do_parametername(self, node):
+        self.add_text('\n')
+        try:
+            self.add_text("%s: "%node.firstChild.data)
+        except AttributeError:
+            self.add_text("???: ")
+
+    def do_parameterdefinition(self, node):
+        self.generic_parse(node, pad=1)
+
+    def do_detaileddescription(self, node):
+        self.generic_parse(node, pad=1)
+
+    def do_briefdescription(self, node):
+        self.generic_parse(node, pad=1)
+
+    def do_memberdef(self, node):
+        prot = node.attributes['prot'].value
+        id = node.attributes['id'].value
+        kind = node.attributes['kind'].value
+        tmp = node.parentNode.parentNode.parentNode
+        compdef = tmp.getElementsByTagName('compounddef')[0]
+        cdef_kind = compdef.attributes['kind'].value
+        
+        if prot == 'public':
+            first = self.get_specific_nodes(node, ('definition', 'name'))
+            name = first['name'].firstChild.data
+            if name[:8] == 'operator': # Don't handle operators yet.
+                return
+
+            defn = first['definition'].firstChild.data
+            self.add_text('\n')
+            self.add_text('%feature("docstring") ')
+            
+            anc = node.parentNode.parentNode
+            if cdef_kind in ('file', 'namespace'):
+                ns_node = anc.getElementsByTagName('innernamespace')
+                if not ns_node and cdef_kind == 'namespace':
+                    ns_node = anc.getElementsByTagName('compoundname')
+                if ns_node:
+                    ns = ns_node[0].firstChild.data
+                    self.add_text(' %s::%s "\n%s'%(ns, name, defn))   
+                else:
+                    self.add_text(' %s "\n%s'%(name, defn))
+            elif cdef_kind in ('class', 'struct'):
+                # Get the full function name.
+                anc_node = anc.getElementsByTagName('compoundname')
+                cname = anc_node[0].firstChild.data
+                self.add_text(' %s::%s "\n%s'%(cname, name, defn))
+
+
+            for n in node.childNodes:
+                if n not in first.values():
+                    self.parse(n)
+            self.add_text(['";', '\n'])
+        
+    def do_definition(self, node):
+        data = node.firstChild.data
+        self.add_text('%s "\n%s'%(data, data))
+
+    def do_sectiondef(self, node):
+        kind = node.attributes['kind'].value
+        if kind in ('public-func', 'func'):
+            self.generic_parse(node)
+
+    def do_simplesect(self, node):
+        kind = node.attributes['kind'].value
+        if kind in ('date', 'rcs', 'version'):
+            pass
+        elif kind == 'warning':
+            self.add_text(['\n', 'WARNING: '])
+            self.generic_parse(node)
+        elif kind == 'see':
+            self.add_text('\n')
+            self.add_text('See: ')
+            self.generic_parse(node)
+        else:
+            self.generic_parse(node)
+
+    def do_argsstring(self, node):
+        self.generic_parse(node, pad=1)
+
+    def do_member(self, node):
+        kind = node.attributes['kind'].value
+        refid = node.attributes['refid'].value
+        if kind == 'function' and refid[:9] == 'namespace':
+            self.generic_parse(node)
+
+    def do_doxygenindex(self, node):
+        self.multi = 1
+        comps = node.getElementsByTagName('compound')
+        for c in comps:
+            refid = c.attributes['refid'].value
+            fname = refid + '.xml'
+            if not os.path.exists(fname):
+                fname = os.path.join(self.my_dir,  fname)
+            print "parsing file: %s"%fname
+            p = Doxy2SWIG(fname)
+            p.generate()
+            self.pieces.extend(self.clean_pieces(p.pieces))
+
+    def write(self, fname):
+        o = my_open_write(fname)
+        if self.multi:
+            o.write("".join(self.pieces))
+        else:
+            o.write("".join(self.clean_pieces(self.pieces)))
+        o.close()
+
+    def clean_pieces(self, pieces):
+        """Cleans the list of strings given as `pieces`.  It replaces
+        multiple newlines by a maximum of 2 and returns a new list.
+        It also wraps the paragraphs nicely.
+        
+        """
+        ret = []
+        count = 0
+        for i in pieces:
+            if i == '\n':
+                count = count + 1
+            else:
+                if i == '";':
+                    if count:
+                        ret.append('\n')
+                elif count > 2:
+                    ret.append('\n\n')
+                elif count:
+                    ret.append('\n'*count)
+                count = 0
+                ret.append(i)
+
+        _data = "".join(ret)
+        ret = []
+        for i in _data.split('\n\n'):
+            if i == 'Parameters:':
+                ret.extend(['Parameters:\n-----------', '\n\n'])
+            elif i.find('// File:') > -1: # leave comments alone.
+                ret.extend([i, '\n'])
+            else:
+                _tmp = textwrap.fill(i.strip())
+                _tmp = self.lead_spc.sub(r'\1"\2', _tmp)
+                ret.extend([_tmp, '\n\n'])
+        return ret
+
+
+def get_python_classes(input_py):
+    with open(input_py) as f:
+        data = f.read()
+        classes_supers = re.findall(r'class[ ]+([\w_]+)(\([\w_, ]+\))?:',data)  
+        classes = (classname for classname,superclass in classes_supers)
+        return classes
+    return []
+
+def main(input_py, input_xml, output_dir):
+
+    classes = get_python_classes(input_py)
+
+    with file("%s/docstrings.i"%output_dir,'w') as f_index:
+
+        for classname in classes:
+
+
+            class_file = "%s/class%s.xml"%(input_xml,classname.replace("_","__"))
+            swig_file = "%s/%s.i"%(output_dir,classname.lower())
+            
+            if os.path.isfile(class_file):
+                print "processing:",class_file," ->",swig_file
+                p = Doxy2SWIG(class_file)
+                p.generate()
+                p.write(swig_file)
+                f_index.write('%%include "%s.i"\n'% classname.lower())
+            #else:
+            #    print "ignoring class %s, as %s does not exist" %(classname,class_file)
+    
+
+
+
+
+if __name__ == '__main__':
+    print sys.argv
+    if len(sys.argv) != 4:
+        print __doc__
+        sys.exit(1)
+    main(sys.argv[1], sys.argv[2],sys.argv[3])