+"""Parse a Python module and describe its classes and methods.
+Parse enough of a Python file to recognize imports and class and
+method definitions, and to find out the superclasses of a class.
+The interface consists of a single function:
+        readmodule_ex(module [, path])
+where module is the name of a Python module, and path is an optional
+list of directories where the module is to be searched.  If present,
+path is prepended to the system search path sys.path.  The return
+value is a dictionary.  The keys of the dictionary are the names of
+the classes defined in the module (including classes that are defined
+via the from XXX import YYY construct).  The values are class
+instances of the class Class defined here.  One special key/value pair
+is present for packages: the key '__path__' has a list as its value
+which contains the package search path.
+A class is described by the class Class in this module.  Instances
+of this class have the following instance variables:
+        module -- the module name
+        name -- the name of the class
+        super -- a list of super classes (Class instances)
+        methods -- a dictionary of methods
+        file -- the file in which the class was defined
+        lineno -- the line in the file on which the class statement occurred
+The dictionary of methods uses the method names as keys and the line
+numbers on which the method was defined as values.
+If the name of a super class is not recognized, the corresponding
+entry in the list of super classes is not a class instance but a
+string giving the name of the super class.  Since import statements
+are recognized and imported modules are scanned as well, this
+shouldn't happen often.
+A function is described by the class Function in this module.
+Instances of this class have the following instance variables:
+        module -- the module name
+        name -- the name of the class
+        file -- the file in which the class was defined
+        lineno -- the line in the file on which the class statement occurred
+import sys
+import imp
+import tokenize
+from token import NAME, DEDENT, OP
+from operator import itemgetter
+__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
+_modules = {}                           # cache of modules we've seen
+# each Python class is represented by an instance of this class
+class Class:
+    '''Class to represent a Python class.'''
+    def __init__(self, module, name, super, file, lineno):
+        self.module = module
+ = name
+        if super is None:
+            super = []
+        self.super = super
+        self.methods = {}
+        self.file = file
+        self.lineno = lineno
+    def _addmethod(self, name, lineno):
+        self.methods[name] = lineno
+class Function:
+    '''Class to represent a top-level Python function'''
+    def __init__(self, module, name, file, lineno):
+        self.module = module
+ = name
+        self.file = file
+        self.lineno = lineno
+def readmodule(module, path=None):
+    '''Backwards compatible interface.
+    Call readmodule_ex() and then only keep Class objects from the
+    resulting dictionary.'''
+    res = {}
+    for key, value in _readmodule(module, path or []).items():
+        if isinstance(value, Class):
+            res[key] = value
+    return res
+def readmodule_ex(module, path=None):
+    '''Read a module file and return a dictionary of classes.
+    Search for MODULE in PATH and sys.path, read and parse the
+    module and return a dictionary with one entry for each class
+    found in the module.
+    '''
+    return _readmodule(module, path or [])
+def _readmodule(module, path, inpackage=None):
+    '''Do the hard work for readmodule[_ex].
+    If INPACKAGE is given, it must be the dotted name of the package in
+    which we are searching for a submodule, and then PATH must be the
+    package search path; otherwise, we are searching for a top-level
+    module, and PATH is combined with sys.path.
+    '''
+    # Compute the full module name (prepending inpackage if set)
+    if inpackage is not None:
+        fullmodule = "%s.%s" % (inpackage, module)
+    else:
+        fullmodule = module
+    # Check in the cache
+    if fullmodule in _modules:
+        return _modules[fullmodule]
+    # Initialize the dict for this module's contents
+    dict = {}
+    # Check if it is a built-in module; we don't do much for these
+    if module in sys.builtin_module_names and inpackage is None:
+        _modules[module] = dict
+        return dict
+    # Check for a dotted module name
+    i = module.rfind('.')
+    if i >= 0:
+        package = module[:i]
+        submodule = module[i+1:]
+        parent = _readmodule(package, path, inpackage)
+        if inpackage is not None:
+            package = "%s.%s" % (inpackage, package)
+        return _readmodule(submodule, parent['__path__'], package)
+    # Search the path for the module
+    f = None
+    if inpackage is not None:
+        f, fname, (_s, _m, ty) = imp.find_module(module, path)
+    else:
+        f, fname, (_s, _m, ty) = imp.find_module(module, path + sys.path)
+    if ty == imp.PKG_DIRECTORY:
+        dict['__path__'] = [fname]
+        path = [fname] + path
+        f, fname, (_s, _m, ty) = imp.find_module('__init__', [fname])
+    _modules[fullmodule] = dict
+    if ty != imp.PY_SOURCE:
+        # not Python source, can't do anything with this module
+        f.close()
+        return dict
+    stack = [] # stack of (class, indent) pairs
+    g = tokenize.generate_tokens(f.readline)
+    try:
+        for tokentype, token, start, _end, _line in g:
+            if tokentype == DEDENT:
+                lineno, thisindent = start
+                # close nested classes and defs
+                while stack and stack[-1][1] >= thisindent:
+                    del stack[-1]
+            elif token == 'def':
+                lineno, thisindent = start
+                # close previous nested classes and defs
+                while stack and stack[-1][1] >= thisindent:
+                    del stack[-1]
+                tokentype, meth_name, start =[0:3]
+                if tokentype != NAME:
+                    continue # Syntax error
+                if stack:
+                    cur_class = stack[-1][0]
+                    if isinstance(cur_class, Class):
+                        # it's a method
+                        cur_class._addmethod(meth_name, lineno)
+                    # else it's a nested def
+                else:
+                    # it's a function
+                    dict[meth_name] = Function(fullmodule, meth_name,
+                                               fname, lineno)
+                stack.append((None, thisindent)) # Marker for nested fns
+            elif token == 'class':
+                lineno, thisindent = start
+                # close previous nested classes and defs
+                while stack and stack[-1][1] >= thisindent:
+                    del stack[-1]
+                tokentype, class_name, start =[0:3]
+                if tokentype != NAME:
+                    continue # Syntax error
+                # parse what follows the class name
+                tokentype, token, start =[0:3]
+                inherit = None
+                if token == '(':
+                    names = [] # List of superclasses
+                    # there's a list of superclasses
+                    level = 1
+                    super = [] # Tokens making up current superclass
+                    while True:
+                        tokentype, token, start =[0:3]
+                        if token in (')', ',') and level == 1:
+                            n = "".join(super)
+                            if n in dict:
+                                # we know this super class
+                                n = dict[n]
+                            else:
+                                c = n.split('.')
+                                if len(c) > 1:
+                                    # super class is of the form
+                                    # module.class: look in module for
+                                    # class
+                                    m = c[-2]
+                                    c = c[-1]
+                                    if m in _modules:
+                                        d = _modules[m]
+                                        if c in d:
+                                            n = d[c]
+                            names.append(n)
+                            super = []
+                        if token == '(':
+                            level += 1
+                        elif token == ')':
+                            level -= 1
+                            if level == 0:
+                                break
+                        elif token == ',' and level == 1:
+                            pass
+                        # only use NAME and OP (== dot) tokens for type name
+                        elif tokentype in (NAME, OP) and level == 1:
+                            super.append(token)
+                        # expressions in the base list are not supported
+                    inherit = names
+                cur_class = Class(fullmodule, class_name, inherit,
+                                  fname, lineno)
+                if not stack:
+                    dict[class_name] = cur_class
+                stack.append((cur_class, thisindent))
+            elif token == 'import' and start[1] == 0:
+                modules = _getnamelist(g)
+                for mod, _mod2 in modules:
+                    try:
+                        # Recursively read the imported module
+                        if inpackage is None:
+                            _readmodule(mod, path)
+                        else:
+                            try:
+                                _readmodule(mod, path, inpackage)
+                            except ImportError:
+                                _readmodule(mod, [])
+                    except:
+                        # If we can't find or parse the imported module,
+                        # too bad -- don't die here.
+                        pass
+            elif token == 'from' and start[1] == 0:
+                mod, token = _getname(g)
+                if not mod or token != "import":
+                    continue
+                names = _getnamelist(g)
+                try:
+                    # Recursively read the imported module
+                    d = _readmodule(mod, path, inpackage)
+                except:
+                    # If we can't find or parse the imported module,
+                    # too bad -- don't die here.
+                    continue
+                # add any classes that were defined in the imported module
+                # to our name space if they were mentioned in the list
+                for n, n2 in names:
+                    if n in d:
+                        dict[n2 or n] = d[n]
+                    elif n == '*':
+                        # don't add names that start with _
+                        for n in d:
+                            if n[0] != '_':
+                                dict[n] = d[n]
+    except StopIteration:
+        pass
+    f.close()
+    return dict
+def _getnamelist(g):
+    # Helper to get a comma-separated list of dotted names plus 'as'
+    # clauses.  Return a list of pairs (name, name2) where name2 is
+    # the 'as' name, or None if there is no 'as' clause.
+    names = []
+    while True:
+        name, token = _getname(g)
+        if not name:
+            break
+        if token == 'as':
+            name2, token = _getname(g)
+        else:
+            name2 = None
+        names.append((name, name2))
+        while token != "," and "\n" not in token:
+            token =[1]
+        if token != ",":
+            break
+    return names
+def _getname(g):
+    # Helper to get a dotted name, return a pair (name, token) where
+    # name is the dotted name, or None if there was no dotted name,
+    # and token is the next input token.
+    parts = []
+    tokentype, token =[0:2]
+    if tokentype != NAME and token != '*':
+        return (None, token)
+    parts.append(token)
+    while True:
+        tokentype, token =[0:2]
+        if token != '.':
+            break
+        tokentype, token =[0:2]
+        if tokentype != NAME:
+            break
+        parts.append(token)
+    return (".".join(parts), token)
+def _main():
+    # Main program for testing.
+    import os
+    mod = sys.argv[1]
+    if os.path.exists(mod):
+        path = [os.path.dirname(mod)]
+        mod = os.path.basename(mod)
+        if mod.lower().endswith(".py"):
+            mod = mod[:-3]
+    else:
+        path = []
+    dict = readmodule_ex(mod, path)
+    objs = dict.values()
+    objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0),
+                               getattr(b, 'lineno', 0)))
+    for obj in objs:
+        if isinstance(obj, Class):
+            print "class",, obj.super, obj.lineno
+            methods = sorted(obj.methods.iteritems(), key=itemgetter(1))
+            for name, lineno in methods:
+                if name != "__path__":
+                    print "  def", name, lineno
+        elif isinstance(obj, Function):
+            print "def",, obj.lineno
+if __name__ == "__main__":
+    _main()