diff -r 60053dab7e2a -r 842a773e65f2 jamesa/build_graph.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jamesa/build_graph.py Wed Nov 04 17:40:17 2009 +0000 @@ -0,0 +1,343 @@ +# Copyright (c) 2009 Symbian Foundation Ltd +# This component and the accompanying materials are made available +# under the terms of the License "Eclipse Public License v1.0" +# which accompanies this distribution, and is available +# at the URL "http://www.eclipse.org/legal/epl-v10.html". +# +# Initial Contributors: +# Symbian Foundation Ltd - initial contribution. +# +# Contributors: +# +# Description: +# Generates a dependency graph of the Symbian source tree. + +"""Build a graph of component dependencies from Symbian OS source code. +The graph is serialized to a file, which can then be used by other scripts to extract data. + +The script works by recursing over the directory structure from the specified root and then +analyzing all bld.inf files to locate referenced production MMP files. These are then processed +for target and dependency information. + +You can use the supplementary scripts to then extract useful information from the generated graph +file. +""" + +from optparse import OptionParser +from _common import Node + +import re +import os +import sys +import pickle +import logging + +__author__ = 'James Aley' +__email__ = 'jamesa@symbian.org' +__version__ = '1.0' + +# Constants for various default config +_LOG_FORMAT = '%(levelname)s: %(message)s' +_MAX_PATH = 260 + +# Precompile regexes for better performance +# - Comment filtering +_RE_CLEAN_INLINE = '^(.*)//.*$' +_RE_MULTILINE_OPEN = '^(.*)/\\*.*$' +_RE_MULTILINE_CLOSE = '^.*\\*/(.*)$' +_p_clean_inline = re.compile(_RE_CLEAN_INLINE) +_p_multiline_open = re.compile(_RE_MULTILINE_OPEN) +_p_multiline_close = re.compile(_RE_MULTILINE_CLOSE) + +# - MMP file Parsing +_RE_TARGET = '^\\s*TARGET\\s+([^\\s]+).*$' +_RE_PLAIN_TARGET = '^\\s*([^\\s\\.]+)\\.?[^\\s]?\\s*' +_RE_COMPLEX_TARGET = '.*\\((.+),.+\\).*' +_RE_LIBRARY = '^\\s*[^\\s]*LIBRARY.*\\s+([^\\s]+.*)$' +_RE_START = '^\\s*START.*$' +_RE_END = '\\s*END.*$' +_p_target = re.compile(_RE_TARGET, re.I) +_p_plain_target = re.compile(_RE_PLAIN_TARGET) +_p_complex_target = re.compile(_RE_COMPLEX_TARGET) +_p_library = re.compile(_RE_LIBRARY, re.I) +_p_start = re.compile(_RE_START) +_p_end = re.compile(_RE_END) + +# - BLD.INF file parsing +_RE_PRJ_MMPFILES = '^\\s*PRJ_MMPFILES\\s*$' +_RE_OTHER_SECTION = '^\\s*PRJ_[a-z]+\\s*$' +_p_prj_mmpfiles = re.compile(_RE_PRJ_MMPFILES, re.I) +_p_other_section = re.compile(_RE_OTHER_SECTION, re.I) + +# Set up a logging instance for output +logging.basicConfig(format=_LOG_FORMAT, level=logging.WARNING, stream=sys.stdout) + +# Cache dictionary to marry Nodes to eachother +node_cache = {} + +# Dictionary representing the dependency graph. +# Each key identifies the node in the graph, where the value is the node +# object itself including the arcs to other node_path keys that it requires. +graph = {} + +def rstrip(string, suffix): + """Like Python's __str__.rstrip(chars), but it treats the chars as + a contiguous string and only strips that complete ending. + """ + if string.endswith(suffix): + string = string[:len(string) - len(suffix)] + return string + +def clean_binary_name(binary_name): + """Strips the extension off of binary names so that references to .lib + are associated with the correct binaries. + """ + match_complex_target = _p_complex_target.match(binary_name) + if match_complex_target: + binary_name = match_complex_target.groups()[0].lower().strip() + else: + match_plain_target = _p_plain_target.match(binary_name) + if match_plain_target: + binary_name = match_plain_target.groups()[0].lower().strip() + return binary_name + +def looks_like_test(path): + """Returns true if a path looks like it refers to test components. + The script does its best to filter test components, as many are missing + from the source tree and they're not interesting with respect to building + production ROM images anyway. + """ + conventions = ['tsrc', 'test'] + for convention in conventions: + # Iterate through likely test component conventions, if + # we match one, return True now + if os.path.sep + convention + os.path.sep in path.lower(): + return True + # Otherwise, nothing found, so return False + return False + +def without_comments(source_file): + """Generator function, will yield lines of the source_file object (iterable) + with commented regions removed. + """ + multiline_region = False + for line in source_file: + match_multiline_close = _p_multiline_close.match(line) + if match_multiline_close: + # Close Comments, strip to the left of the comment + multiline_region = False + line = match_multiline_close.groups()[0] + if multiline_region: + # Skip the line if we're in a commented region + continue + match_multiline_open = _p_multiline_open.match(line) + if match_multiline_open: + # Open comments, strip to the right of the comment + multiline_region = True + line = match_multiline_open.groups()[0] + match_inline = _p_clean_inline.match(line) + if match_inline: + # Strip the line to only the left of the comment + line = match_inline.groups()[0] + if line: + yield line + +def parse_mmp(mmp_path): + """Read an mmp file, return a tuple of the form: + (target, required_target_list) + """ + logging.debug('parse_mmp(%s)' % (mmp_path, )) + + mmp_file = None + try: + mmp_file = open(mmp_path) + except IOError, e: + logging.error('Unable to open: %s' % (mmp_path, )) + return + + # Iterate through MMP file lines to find the TARGET and LIBRARY statements + # Note that Symbian projects can compile to different TARGET objects depending on + # precompiler macros, so we must index all possible target names. + targets = [] + libs = [] + resource_block = False + for line in without_comments(mmp_file): + match_start = _p_start.match(line) + if match_start: + resource_block = True + match_end = _p_end.match(line) + if match_end: + resource_block = False + if resource_block: + # need to avoid resource target sections + continue + match_target = _p_target.match(line) + match_library = _p_library.match(line) + if match_target: + clean_target = clean_binary_name(match_target.groups()[0]) + targets.append(clean_target) + elif match_library: + libs_on_line = match_library.groups()[0].split() + for lib in libs_on_line: + clean_lib = clean_binary_name(lib) + libs.append(clean_lib) + mmp_file.close() + + return (targets, libs) + +def new_node(path, ref_mmps, ref_testmmps): + """Construct a new node in the graph with the provided content. + """ + logging.debug('new_node(%s, ref_mmps(%d), ref_testmmps(%d))' % (path, len(ref_mmps), len(ref_testmmps))) + node = Node(path) + + # Iterate the MMPs, read dependency and target information + for mmp in ref_mmps: + (targets, dependencies) = parse_mmp(mmp) + if len(targets) > 0: + for target in targets: + node.mmp_components.append(target) + node.add_deps(dependencies) + + # Register the components in the cache, as later we will + # join the graph nodes by referring to this cache + for c in node.mmp_components: + if c in node_cache.keys(): + existing = node_cache[c] + node_cache[c] = existing + [path] + else: + node_cache[c] = [path] + + # Add this node to the graph + graph[path] = node + +def parse_bld_inf(path): + """Parse a bld.inf file to check to see if references MMP files. + For those MMP files included, parse them to build the node object. + """ + logging.debug('parse_bld_inf(%s)' % (path, )) + + # List the files referenced from this bld.inf + ref_mmp = [] + ref_testmmp = [] + + bld_inf = None + try: + bld_inf = open(path, 'r') + except IOError, e: + logging.error('Unable to open: %s' % (path, )) + return + + # Parse the bld_inf file, adding references MMP files to appropriate lists + projects_flag = False + for line in without_comments(bld_inf): + match_projects = _p_prj_mmpfiles.match(line) + match_other_section = _p_other_section.match(line) + if match_projects: + projects_flag = True + elif match_other_section: + projects_flag = False + if projects_flag and len(line) <= _MAX_PATH: + rel_name = rstrip(line.lower().strip(), '.mmp') + bld_inf_path = os.path.dirname(path) + test_path = os.path.join(bld_inf_path, rel_name + '.mmp') + test_path = os.path.realpath(test_path) + if os.path.exists(test_path): + ref_mmp.append(test_path) + else: + logging.warning('%s refers to %s but it does not exist!' % (path, test_path)) + bld_inf.close() + + # If we found some MMP files, then this is a new node + if len(ref_mmp) > 0: + new_node(path, ref_mmp, ref_testmmp) + +def make_nodes(not_used, dir_name, file_names): + """Call back function for os.path.walk: will analyse the file names, if + there are any bld.inf files, it will open them to see if they identify a + Node object and create them as appropriate + """ + logging.debug('make_nodes(%s, %s)' % (dir_name, file_names)) + if looks_like_test(dir_name): + return + for file_name in file_names: + if file_name.lower().endswith('.inf'): + abs_path = os.path.join(dir_name, file_name) + assert(os.path.exists(abs_path)) + parse_bld_inf(abs_path) + +def connect_nodes(): + """Walk through the graph and substute the contents of the dependency + list members at each node with references to the node_path of that which + builds the referenced component. + + There will be instances where multiple graph nodes build overlapping + components. This will, in practice, mean that there are many ways of + building a suitable ROM for dependencies of one of these nodes. + """ + unresolved_deps = [] + for node_path in graph.keys(): + node = graph[node_path] + resolved = [] + for dep in node.dependencies: + if dep not in node_cache.keys(): + logging.warning('Could not resolve %s for %s' % (dep, node.node_path)) + if dep not in unresolved_deps: + unresolved_deps.append(dep) + node.unresolved.append(dep) + else: + solutions = node_cache[dep] + proposed = solutions[0] + if proposed not in resolved: + resolved.append(proposed) + node.interesting += filter(lambda x: x not in node.interesting, solutions[1:]) + node.dependencies = resolved + graph[node_path] = node + if len(unresolved_deps) > 0: + logging.warning('There were %d unresolved dependencies.' % (len(unresolved_deps), )) + +def build_graph(root): + """Walk nodes from the directory root provided looking for bld.inf files. + Graph will be built from the referened production MMP files. + """ + if not os.path.isdir(root): + logging.fatal('%s is not a directory, aborting...' % (root, )) + exit(1) + os.path.walk(root, make_nodes, None) + connect_nodes() + +def save_graph(path): + """Serialize the graph object to path. This will be a Python object pickle at + the highest available protocol version for this Python install. + """ + graph_file = None + try: + graph_file = open(path, 'wb') + except IOError, e: + logging.error('Could not write graph to file: %s' % (repr(e), )) + exit(1) + pickle.dump(graph, graph_file, pickle.HIGHEST_PROTOCOL) + graph_file.close() + +# Main: +if __name__ == '__main__': + parser = OptionParser() + parser.set_description(__doc__) + parser.add_option('-g', '--graph', dest='graph_file', + help='File name to write the graph to.', + metavar='GRAPH_FILE', default='dependencies.graph') + parser.add_option('-r', '--root', dest='graph_root', + help='Directory to recursively build a graph from, usually root of source tree.', + metavar='SOURCE_ROOT', default='.') + parser.add_option('-v', '--verbose', dest='verbose', + help='Verbose logging, will show all warnings as graph is generated. Recommend redirect!', + action='store_true', default=False) + (options, args) = parser.parse_args() + if not options.verbose: + logging.disable(logging.ERROR) + print 'Walking source from "%s"\nThis can take some time with large source trees...' % (options.graph_root, ) + build_graph(options.graph_root) + print 'Found %d components consisting of %d binaries.' % (len(graph), len(node_cache)) + print 'Wriing graph to %s' % (options.graph_file) + save_graph(options.graph_file) + print '...done!'