summaryrefslogtreecommitdiff
path: root/lib/python2.7/pyclbr.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/python2.7/pyclbr.py')
-rw-r--r--lib/python2.7/pyclbr.py344
1 files changed, 344 insertions, 0 deletions
diff --git a/lib/python2.7/pyclbr.py b/lib/python2.7/pyclbr.py
new file mode 100644
index 0000000..b8f71ae
--- /dev/null
+++ b/lib/python2.7/pyclbr.py
@@ -0,0 +1,344 @@
+"""Parse a Python module and describe its classes and methods.
+
+Parse enough of a Python file to recognize imports and class and
+method definitions, and to find out the superclasses of a class.
+
+The interface consists of a single function:
+ readmodule_ex(module [, path])
+where module is the name of a Python module, and path is an optional
+list of directories where the module is to be searched. If present,
+path is prepended to the system search path sys.path. The return
+value is a dictionary. The keys of the dictionary are the names of
+the classes defined in the module (including classes that are defined
+via the from XXX import YYY construct). The values are class
+instances of the class Class defined here. One special key/value pair
+is present for packages: the key '__path__' has a list as its value
+which contains the package search path.
+
+A class is described by the class Class in this module. Instances
+of this class have the following instance variables:
+ module -- the module name
+ name -- the name of the class
+ super -- a list of super classes (Class instances)
+ methods -- a dictionary of methods
+ file -- the file in which the class was defined
+ lineno -- the line in the file on which the class statement occurred
+The dictionary of methods uses the method names as keys and the line
+numbers on which the method was defined as values.
+If the name of a super class is not recognized, the corresponding
+entry in the list of super classes is not a class instance but a
+string giving the name of the super class. Since import statements
+are recognized and imported modules are scanned as well, this
+shouldn't happen often.
+
+A function is described by the class Function in this module.
+Instances of this class have the following instance variables:
+ module -- the module name
+ name -- the name of the class
+ file -- the file in which the class was defined
+ lineno -- the line in the file on which the class statement occurred
+"""
+
+import sys
+import imp
+import tokenize
+from token import NAME, DEDENT, OP
+from operator import itemgetter
+
+__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
+
+_modules = {} # cache of modules we've seen
+
+# each Python class is represented by an instance of this class
+class Class:
+ '''Class to represent a Python class.'''
+ def __init__(self, module, name, super, file, lineno):
+ self.module = module
+ self.name = name
+ if super is None:
+ super = []
+ self.super = super
+ self.methods = {}
+ self.file = file
+ self.lineno = lineno
+
+ def _addmethod(self, name, lineno):
+ self.methods[name] = lineno
+
+class Function:
+ '''Class to represent a top-level Python function'''
+ def __init__(self, module, name, file, lineno):
+ self.module = module
+ self.name = name
+ self.file = file
+ self.lineno = lineno
+
+def readmodule(module, path=None):
+ '''Backwards compatible interface.
+
+ Call readmodule_ex() and then only keep Class objects from the
+ resulting dictionary.'''
+
+ res = {}
+ for key, value in _readmodule(module, path or []).items():
+ if isinstance(value, Class):
+ res[key] = value
+ return res
+
+def readmodule_ex(module, path=None):
+ '''Read a module file and return a dictionary of classes.
+
+ Search for MODULE in PATH and sys.path, read and parse the
+ module and return a dictionary with one entry for each class
+ found in the module.
+ '''
+ return _readmodule(module, path or [])
+
+def _readmodule(module, path, inpackage=None):
+ '''Do the hard work for readmodule[_ex].
+
+ If INPACKAGE is given, it must be the dotted name of the package in
+ which we are searching for a submodule, and then PATH must be the
+ package search path; otherwise, we are searching for a top-level
+ module, and PATH is combined with sys.path.
+ '''
+ # Compute the full module name (prepending inpackage if set)
+ if inpackage is not None:
+ fullmodule = "%s.%s" % (inpackage, module)
+ else:
+ fullmodule = module
+
+ # Check in the cache
+ if fullmodule in _modules:
+ return _modules[fullmodule]
+
+ # Initialize the dict for this module's contents
+ dict = {}
+
+ # Check if it is a built-in module; we don't do much for these
+ if module in sys.builtin_module_names and inpackage is None:
+ _modules[module] = dict
+ return dict
+
+ # Check for a dotted module name
+ i = module.rfind('.')
+ if i >= 0:
+ package = module[:i]
+ submodule = module[i+1:]
+ parent = _readmodule(package, path, inpackage)
+ if inpackage is not None:
+ package = "%s.%s" % (inpackage, package)
+ if not '__path__' in parent:
+ raise ImportError('No package named {}'.format(package))
+ return _readmodule(submodule, parent['__path__'], package)
+
+ # Search the path for the module
+ f = None
+ if inpackage is not None:
+ f, fname, (_s, _m, ty) = imp.find_module(module, path)
+ else:
+ f, fname, (_s, _m, ty) = imp.find_module(module, path + sys.path)
+ if ty == imp.PKG_DIRECTORY:
+ dict['__path__'] = [fname]
+ path = [fname] + path
+ f, fname, (_s, _m, ty) = imp.find_module('__init__', [fname])
+ _modules[fullmodule] = dict
+ if ty != imp.PY_SOURCE:
+ # not Python source, can't do anything with this module
+ f.close()
+ return dict
+
+ stack = [] # stack of (class, indent) pairs
+
+ g = tokenize.generate_tokens(f.readline)
+ try:
+ for tokentype, token, start, _end, _line in g:
+ if tokentype == DEDENT:
+ lineno, thisindent = start
+ # close nested classes and defs
+ while stack and stack[-1][1] >= thisindent:
+ del stack[-1]
+ elif token == 'def':
+ lineno, thisindent = start
+ # close previous nested classes and defs
+ while stack and stack[-1][1] >= thisindent:
+ del stack[-1]
+ tokentype, meth_name, start = g.next()[0:3]
+ if tokentype != NAME:
+ continue # Syntax error
+ if stack:
+ cur_class = stack[-1][0]
+ if isinstance(cur_class, Class):
+ # it's a method
+ cur_class._addmethod(meth_name, lineno)
+ # else it's a nested def
+ else:
+ # it's a function
+ dict[meth_name] = Function(fullmodule, meth_name,
+ fname, lineno)
+ stack.append((None, thisindent)) # Marker for nested fns
+ elif token == 'class':
+ lineno, thisindent = start
+ # close previous nested classes and defs
+ while stack and stack[-1][1] >= thisindent:
+ del stack[-1]
+ tokentype, class_name, start = g.next()[0:3]
+ if tokentype != NAME:
+ continue # Syntax error
+ # parse what follows the class name
+ tokentype, token, start = g.next()[0:3]
+ inherit = None
+ if token == '(':
+ names = [] # List of superclasses
+ # there's a list of superclasses
+ level = 1
+ super = [] # Tokens making up current superclass
+ while True:
+ tokentype, token, start = g.next()[0:3]
+ if token in (')', ',') and level == 1:
+ n = "".join(super)
+ if n in dict:
+ # we know this super class
+ n = dict[n]
+ else:
+ c = n.split('.')
+ if len(c) > 1:
+ # super class is of the form
+ # module.class: look in module for
+ # class
+ m = c[-2]
+ c = c[-1]
+ if m in _modules:
+ d = _modules[m]
+ if c in d:
+ n = d[c]
+ names.append(n)
+ super = []
+ if token == '(':
+ level += 1
+ elif token == ')':
+ level -= 1
+ if level == 0:
+ break
+ elif token == ',' and level == 1:
+ pass
+ # only use NAME and OP (== dot) tokens for type name
+ elif tokentype in (NAME, OP) and level == 1:
+ super.append(token)
+ # expressions in the base list are not supported
+ inherit = names
+ cur_class = Class(fullmodule, class_name, inherit,
+ fname, lineno)
+ if not stack:
+ dict[class_name] = cur_class
+ stack.append((cur_class, thisindent))
+ elif token == 'import' and start[1] == 0:
+ modules = _getnamelist(g)
+ for mod, _mod2 in modules:
+ try:
+ # Recursively read the imported module
+ if inpackage is None:
+ _readmodule(mod, path)
+ else:
+ try:
+ _readmodule(mod, path, inpackage)
+ except ImportError:
+ _readmodule(mod, [])
+ except:
+ # If we can't find or parse the imported module,
+ # too bad -- don't die here.
+ pass
+ elif token == 'from' and start[1] == 0:
+ mod, token = _getname(g)
+ if not mod or token != "import":
+ continue
+ names = _getnamelist(g)
+ try:
+ # Recursively read the imported module
+ d = _readmodule(mod, path, inpackage)
+ except:
+ # If we can't find or parse the imported module,
+ # too bad -- don't die here.
+ continue
+ # add any classes that were defined in the imported module
+ # to our name space if they were mentioned in the list
+ for n, n2 in names:
+ if n in d:
+ dict[n2 or n] = d[n]
+ elif n == '*':
+ # don't add names that start with _
+ for n in d:
+ if n[0] != '_':
+ dict[n] = d[n]
+ except StopIteration:
+ pass
+
+ f.close()
+ return dict
+
+def _getnamelist(g):
+ # Helper to get a comma-separated list of dotted names plus 'as'
+ # clauses. Return a list of pairs (name, name2) where name2 is
+ # the 'as' name, or None if there is no 'as' clause.
+ names = []
+ while True:
+ name, token = _getname(g)
+ if not name:
+ break
+ if token == 'as':
+ name2, token = _getname(g)
+ else:
+ name2 = None
+ names.append((name, name2))
+ while token != "," and "\n" not in token:
+ token = g.next()[1]
+ if token != ",":
+ break
+ return names
+
+def _getname(g):
+ # Helper to get a dotted name, return a pair (name, token) where
+ # name is the dotted name, or None if there was no dotted name,
+ # and token is the next input token.
+ parts = []
+ tokentype, token = g.next()[0:2]
+ if tokentype != NAME and token != '*':
+ return (None, token)
+ parts.append(token)
+ while True:
+ tokentype, token = g.next()[0:2]
+ if token != '.':
+ break
+ tokentype, token = g.next()[0:2]
+ if tokentype != NAME:
+ break
+ parts.append(token)
+ return (".".join(parts), token)
+
+def _main():
+ # Main program for testing.
+ import os
+ mod = sys.argv[1]
+ if os.path.exists(mod):
+ path = [os.path.dirname(mod)]
+ mod = os.path.basename(mod)
+ if mod.lower().endswith(".py"):
+ mod = mod[:-3]
+ else:
+ path = []
+ dict = readmodule_ex(mod, path)
+ objs = dict.values()
+ objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0),
+ getattr(b, 'lineno', 0)))
+ for obj in objs:
+ if isinstance(obj, Class):
+ print "class", obj.name, obj.super, obj.lineno
+ methods = sorted(obj.methods.iteritems(), key=itemgetter(1))
+ for name, lineno in methods:
+ if name != "__path__":
+ print " def", name, lineno
+ elif isinstance(obj, Function):
+ print "def", obj.name, obj.lineno
+
+if __name__ == "__main__":
+ _main()