jamesa/build_graph.py
changeset 5 842a773e65f2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jamesa/build_graph.py	Wed Nov 04 17:40:17 2009 +0000
@@ -0,0 +1,343 @@
+# Copyright (c) 2009 Symbian Foundation Ltd
+# This component and the accompanying materials are made available
+# under the terms of the License "Eclipse Public License v1.0"
+# which accompanies this distribution, and is available
+# at the URL "http://www.eclipse.org/legal/epl-v10.html".
+#
+# Initial Contributors:
+# Symbian Foundation Ltd - initial contribution.
+# 
+# Contributors:
+#
+# Description:
+# Generates a dependency graph of the Symbian source tree.
+
+"""Build a graph of component dependencies from Symbian OS source code.
+The graph is serialized to a file, which can then be used by other scripts to extract data.
+
+The script works by recursing over the directory structure from the specified root and then
+analyzing all bld.inf files to locate referenced production MMP files. These are then processed
+for target and dependency information.
+
+You can use the supplementary scripts to then extract useful information from the generated graph
+file.
+"""
+
+from optparse import OptionParser
+from _common import Node
+
+import re
+import os
+import sys
+import pickle
+import logging
+
+__author__ = 'James Aley'
+__email__ = 'jamesa@symbian.org'
+__version__ = '1.0'
+
+# Constants for various default config
+_LOG_FORMAT = '%(levelname)s: %(message)s'
+_MAX_PATH = 260
+
+# Precompile regexes for better performance
+# - Comment filtering
+_RE_CLEAN_INLINE = '^(.*)//.*$'
+_RE_MULTILINE_OPEN = '^(.*)/\\*.*$'
+_RE_MULTILINE_CLOSE = '^.*\\*/(.*)$'
+_p_clean_inline = re.compile(_RE_CLEAN_INLINE)
+_p_multiline_open = re.compile(_RE_MULTILINE_OPEN)
+_p_multiline_close = re.compile(_RE_MULTILINE_CLOSE)
+
+# - MMP file Parsing
+_RE_TARGET = '^\\s*TARGET\\s+([^\\s]+).*$'
+_RE_PLAIN_TARGET = '^\\s*([^\\s\\.]+)\\.?[^\\s]?\\s*'
+_RE_COMPLEX_TARGET = '.*\\((.+),.+\\).*'
+_RE_LIBRARY = '^\\s*[^\\s]*LIBRARY.*\\s+([^\\s]+.*)$'
+_RE_START = '^\\s*START.*$'
+_RE_END = '\\s*END.*$'
+_p_target = re.compile(_RE_TARGET, re.I)
+_p_plain_target = re.compile(_RE_PLAIN_TARGET)
+_p_complex_target = re.compile(_RE_COMPLEX_TARGET)
+_p_library = re.compile(_RE_LIBRARY, re.I)
+_p_start = re.compile(_RE_START)
+_p_end = re.compile(_RE_END)
+
+# - BLD.INF file parsing
+_RE_PRJ_MMPFILES = '^\\s*PRJ_MMPFILES\\s*$'
+_RE_OTHER_SECTION = '^\\s*PRJ_[a-z]+\\s*$'
+_p_prj_mmpfiles = re.compile(_RE_PRJ_MMPFILES, re.I)
+_p_other_section = re.compile(_RE_OTHER_SECTION, re.I)
+
+# Set up a logging instance for output
+logging.basicConfig(format=_LOG_FORMAT, level=logging.WARNING, stream=sys.stdout)
+
+# Cache dictionary to marry Nodes to eachother
+node_cache = {}
+
+# Dictionary representing the dependency graph.
+# Each key identifies the node in the graph, where the value is the node
+# object itself including the arcs to other node_path keys that it requires.
+graph = {}
+
+def rstrip(string, suffix):
+    """Like Python's __str__.rstrip(chars), but it treats the chars as
+    a contiguous string and only strips that complete ending.
+    """
+    if string.endswith(suffix):
+        string = string[:len(string) - len(suffix)]
+    return string
+
+def clean_binary_name(binary_name):
+    """Strips the extension off of binary names so that references to .lib
+    are associated with the correct binaries.
+    """
+    match_complex_target = _p_complex_target.match(binary_name)
+    if match_complex_target:
+        binary_name = match_complex_target.groups()[0].lower().strip()
+    else:
+        match_plain_target = _p_plain_target.match(binary_name)
+        if match_plain_target:
+            binary_name = match_plain_target.groups()[0].lower().strip()
+    return binary_name
+
+def looks_like_test(path):
+    """Returns true if a path looks like it refers to test components.
+    The script does its best to filter test components, as many are missing
+    from the source tree and they're not interesting with respect to building
+    production ROM images anyway.
+    """
+    conventions = ['tsrc', 'test']
+    for convention in conventions:
+        # Iterate through likely test component conventions, if
+        # we match one, return True now
+        if os.path.sep + convention + os.path.sep in path.lower():
+            return True
+    # Otherwise, nothing found, so return False
+    return False
+
+def without_comments(source_file):
+    """Generator function, will yield lines of the source_file object (iterable)
+    with commented regions removed.
+    """
+    multiline_region = False
+    for line in source_file:
+        match_multiline_close = _p_multiline_close.match(line)
+        if match_multiline_close:
+            # Close Comments, strip to the left of the comment
+            multiline_region = False
+            line = match_multiline_close.groups()[0]
+        if multiline_region:
+            # Skip the line if we're in a commented region
+            continue
+        match_multiline_open = _p_multiline_open.match(line)
+        if match_multiline_open:
+            # Open comments, strip to the right of the comment
+            multiline_region = True
+            line = match_multiline_open.groups()[0]
+        match_inline = _p_clean_inline.match(line)
+        if match_inline:
+            # Strip the line to only the left of the comment
+            line = match_inline.groups()[0]
+        if line:
+            yield line
+
+def parse_mmp(mmp_path):
+    """Read an mmp file, return a tuple of the form:
+        (target, required_target_list)
+    """
+    logging.debug('parse_mmp(%s)' % (mmp_path, ))
+    
+    mmp_file = None
+    try:
+        mmp_file = open(mmp_path)
+    except IOError, e:
+        logging.error('Unable to open: %s' % (mmp_path, ))
+        return
+
+    # Iterate through MMP file lines to find the TARGET and LIBRARY statements
+    # Note that Symbian projects can compile to different TARGET objects depending on
+    # precompiler macros, so we must index all possible target names.
+    targets = []
+    libs = []
+    resource_block = False
+    for line in without_comments(mmp_file):
+        match_start = _p_start.match(line)
+        if match_start:
+            resource_block = True
+        match_end = _p_end.match(line)
+        if match_end:
+            resource_block = False
+        if resource_block:
+            # need to avoid resource target sections
+            continue
+        match_target = _p_target.match(line)
+        match_library = _p_library.match(line)
+        if match_target:
+            clean_target = clean_binary_name(match_target.groups()[0])
+            targets.append(clean_target)
+        elif match_library:
+            libs_on_line = match_library.groups()[0].split()
+            for lib in libs_on_line:
+                clean_lib = clean_binary_name(lib)
+                libs.append(clean_lib)
+    mmp_file.close()
+
+    return (targets, libs)
+
+def new_node(path, ref_mmps, ref_testmmps):
+    """Construct a new node in the graph with the provided content.
+    """
+    logging.debug('new_node(%s, ref_mmps(%d), ref_testmmps(%d))' % (path, len(ref_mmps), len(ref_testmmps)))
+    node = Node(path)
+    
+    # Iterate the MMPs, read dependency and target information
+    for mmp in ref_mmps:
+        (targets, dependencies) = parse_mmp(mmp)
+        if len(targets) > 0:
+            for target in targets:
+                node.mmp_components.append(target)
+            node.add_deps(dependencies)
+
+    # Register the components in the cache, as later we will
+    # join the graph nodes by referring to this cache
+    for c in node.mmp_components:
+        if c in node_cache.keys():
+            existing = node_cache[c]
+            node_cache[c] = existing + [path]
+        else:
+            node_cache[c] = [path]
+
+    # Add this node to the graph
+    graph[path] = node
+
+def parse_bld_inf(path):
+    """Parse a bld.inf file to check to see if references MMP files.
+    For those MMP files included, parse them to build the node object.
+    """
+    logging.debug('parse_bld_inf(%s)' % (path, ))
+    
+    # List the files referenced from this bld.inf
+    ref_mmp = []
+    ref_testmmp = []
+    
+    bld_inf = None
+    try:
+        bld_inf = open(path, 'r')
+    except IOError, e:
+        logging.error('Unable to open: %s' % (path, ))
+        return
+
+    # Parse the bld_inf file, adding references MMP files to appropriate lists
+    projects_flag = False
+    for line in without_comments(bld_inf):
+        match_projects = _p_prj_mmpfiles.match(line)
+        match_other_section = _p_other_section.match(line)
+        if match_projects:
+            projects_flag = True
+        elif match_other_section:
+            projects_flag = False
+        if projects_flag and len(line) <= _MAX_PATH:
+            rel_name = rstrip(line.lower().strip(), '.mmp')
+            bld_inf_path = os.path.dirname(path)
+            test_path = os.path.join(bld_inf_path, rel_name + '.mmp')
+            test_path = os.path.realpath(test_path)
+            if os.path.exists(test_path):
+                ref_mmp.append(test_path)
+            else:
+                logging.warning('%s refers to %s but it does not exist!' % (path, test_path))
+    bld_inf.close()
+
+    # If we found some MMP files, then this is a new node
+    if len(ref_mmp) > 0:
+        new_node(path, ref_mmp, ref_testmmp)
+
+def make_nodes(not_used, dir_name, file_names):
+    """Call back function for os.path.walk: will analyse the file names, if
+    there are any bld.inf files, it will open them to see if they identify a
+    Node object and create them as appropriate
+    """
+    logging.debug('make_nodes(%s, %s)' % (dir_name, file_names))
+    if looks_like_test(dir_name):
+        return
+    for file_name in file_names:
+        if file_name.lower().endswith('.inf'):
+            abs_path = os.path.join(dir_name, file_name)
+            assert(os.path.exists(abs_path))
+            parse_bld_inf(abs_path)
+
+def connect_nodes():
+    """Walk through the graph and substute the contents of the dependency
+    list members at each node with references to the node_path of that which
+    builds the referenced component.
+
+    There will be instances where multiple graph nodes build overlapping
+    components. This will, in practice, mean that there are many ways of
+    building a suitable ROM for dependencies of one of these nodes.
+    """
+    unresolved_deps = []
+    for node_path in graph.keys():
+        node = graph[node_path]
+        resolved = []
+        for dep in node.dependencies:
+            if dep not in node_cache.keys():
+                logging.warning('Could not resolve %s for %s' % (dep, node.node_path))
+                if dep not in unresolved_deps:
+                    unresolved_deps.append(dep)
+                node.unresolved.append(dep)
+            else:
+                solutions = node_cache[dep]
+                proposed = solutions[0]
+                if proposed not in resolved:
+                    resolved.append(proposed)
+                node.interesting += filter(lambda x: x not in node.interesting, solutions[1:])
+        node.dependencies = resolved
+        graph[node_path] = node
+    if len(unresolved_deps) > 0:
+        logging.warning('There were %d unresolved dependencies.' % (len(unresolved_deps), ))
+
+def build_graph(root):
+    """Walk nodes from the directory root provided looking for bld.inf files.
+    Graph will be built from the referened production MMP files.
+    """
+    if not os.path.isdir(root):
+        logging.fatal('%s is not a directory, aborting...' % (root, ))
+        exit(1)
+    os.path.walk(root, make_nodes, None)
+    connect_nodes()
+
+def save_graph(path):
+    """Serialize the graph object to path. This will be a Python object pickle at
+    the highest available protocol version for this Python install.
+    """
+    graph_file = None
+    try:
+        graph_file = open(path, 'wb')
+    except IOError, e:
+        logging.error('Could not write graph to file: %s' % (repr(e), ))
+        exit(1)
+    pickle.dump(graph, graph_file, pickle.HIGHEST_PROTOCOL)
+    graph_file.close()
+
+# Main:
+if __name__ == '__main__':
+    parser = OptionParser()
+    parser.set_description(__doc__)
+    parser.add_option('-g', '--graph', dest='graph_file', 
+                      help='File name to write the graph to.', 
+                      metavar='GRAPH_FILE', default='dependencies.graph')
+    parser.add_option('-r', '--root', dest='graph_root',
+                      help='Directory to recursively build a graph from, usually root of source tree.',
+                      metavar='SOURCE_ROOT', default='.')
+    parser.add_option('-v', '--verbose', dest='verbose',
+                      help='Verbose logging, will show all warnings as graph is generated. Recommend redirect!',
+                      action='store_true', default=False)
+    (options, args) = parser.parse_args()
+    if not options.verbose:
+        logging.disable(logging.ERROR)
+    print 'Walking source from "%s"\nThis can take some time with large source trees...' % (options.graph_root, )
+    build_graph(options.graph_root)
+    print 'Found %d components consisting of %d binaries.' % (len(graph), len(node_cache))
+    print 'Wriing graph to %s' % (options.graph_file)
+    save_graph(options.graph_file)
+    print '...done!'