jamesa/build_graph.py
changeset 5 842a773e65f2
equal deleted inserted replaced
4:60053dab7e2a 5:842a773e65f2
       
     1 # Copyright (c) 2009 Symbian Foundation Ltd
       
     2 # This component and the accompanying materials are made available
       
     3 # under the terms of the License "Eclipse Public License v1.0"
       
     4 # which accompanies this distribution, and is available
       
     5 # at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     6 #
       
     7 # Initial Contributors:
       
     8 # Symbian Foundation Ltd - initial contribution.
       
     9 # 
       
    10 # Contributors:
       
    11 #
       
    12 # Description:
       
    13 # Generates a dependency graph of the Symbian source tree.
       
    14 
       
    15 """Build a graph of component dependencies from Symbian OS source code.
       
    16 The graph is serialized to a file, which can then be used by other scripts to extract data.
       
    17 
       
    18 The script works by recursing over the directory structure from the specified root and then
       
    19 analyzing all bld.inf files to locate referenced production MMP files. These are then processed
       
    20 for target and dependency information.
       
    21 
       
    22 You can use the supplementary scripts to then extract useful information from the generated graph
       
    23 file.
       
    24 """
       
    25 
       
    26 from optparse import OptionParser
       
    27 from _common import Node
       
    28 
       
    29 import re
       
    30 import os
       
    31 import sys
       
    32 import pickle
       
    33 import logging
       
    34 
       
    35 __author__ = 'James Aley'
       
    36 __email__ = 'jamesa@symbian.org'
       
    37 __version__ = '1.0'
       
    38 
       
    39 # Constants for various default config
       
    40 _LOG_FORMAT = '%(levelname)s: %(message)s'
       
    41 _MAX_PATH = 260
       
    42 
       
    43 # Precompile regexes for better performance
       
    44 # - Comment filtering
       
    45 _RE_CLEAN_INLINE = '^(.*)//.*$'
       
    46 _RE_MULTILINE_OPEN = '^(.*)/\\*.*$'
       
    47 _RE_MULTILINE_CLOSE = '^.*\\*/(.*)$'
       
    48 _p_clean_inline = re.compile(_RE_CLEAN_INLINE)
       
    49 _p_multiline_open = re.compile(_RE_MULTILINE_OPEN)
       
    50 _p_multiline_close = re.compile(_RE_MULTILINE_CLOSE)
       
    51 
       
    52 # - MMP file Parsing
       
    53 _RE_TARGET = '^\\s*TARGET\\s+([^\\s]+).*$'
       
    54 _RE_PLAIN_TARGET = '^\\s*([^\\s\\.]+)\\.?[^\\s]?\\s*'
       
    55 _RE_COMPLEX_TARGET = '.*\\((.+),.+\\).*'
       
    56 _RE_LIBRARY = '^\\s*[^\\s]*LIBRARY.*\\s+([^\\s]+.*)$'
       
    57 _RE_START = '^\\s*START.*$'
       
    58 _RE_END = '\\s*END.*$'
       
    59 _p_target = re.compile(_RE_TARGET, re.I)
       
    60 _p_plain_target = re.compile(_RE_PLAIN_TARGET)
       
    61 _p_complex_target = re.compile(_RE_COMPLEX_TARGET)
       
    62 _p_library = re.compile(_RE_LIBRARY, re.I)
       
    63 _p_start = re.compile(_RE_START)
       
    64 _p_end = re.compile(_RE_END)
       
    65 
       
    66 # - BLD.INF file parsing
       
    67 _RE_PRJ_MMPFILES = '^\\s*PRJ_MMPFILES\\s*$'
       
    68 _RE_OTHER_SECTION = '^\\s*PRJ_[a-z]+\\s*$'
       
    69 _p_prj_mmpfiles = re.compile(_RE_PRJ_MMPFILES, re.I)
       
    70 _p_other_section = re.compile(_RE_OTHER_SECTION, re.I)
       
    71 
       
    72 # Set up a logging instance for output
       
    73 logging.basicConfig(format=_LOG_FORMAT, level=logging.WARNING, stream=sys.stdout)
       
    74 
       
    75 # Cache dictionary to marry Nodes to eachother
       
    76 node_cache = {}
       
    77 
       
    78 # Dictionary representing the dependency graph.
       
    79 # Each key identifies the node in the graph, where the value is the node
       
    80 # object itself including the arcs to other node_path keys that it requires.
       
    81 graph = {}
       
    82 
       
    83 def rstrip(string, suffix):
       
    84     """Like Python's __str__.rstrip(chars), but it treats the chars as
       
    85     a contiguous string and only strips that complete ending.
       
    86     """
       
    87     if string.endswith(suffix):
       
    88         string = string[:len(string) - len(suffix)]
       
    89     return string
       
    90 
       
    91 def clean_binary_name(binary_name):
       
    92     """Strips the extension off of binary names so that references to .lib
       
    93     are associated with the correct binaries.
       
    94     """
       
    95     match_complex_target = _p_complex_target.match(binary_name)
       
    96     if match_complex_target:
       
    97         binary_name = match_complex_target.groups()[0].lower().strip()
       
    98     else:
       
    99         match_plain_target = _p_plain_target.match(binary_name)
       
   100         if match_plain_target:
       
   101             binary_name = match_plain_target.groups()[0].lower().strip()
       
   102     return binary_name
       
   103 
       
   104 def looks_like_test(path):
       
   105     """Returns true if a path looks like it refers to test components.
       
   106     The script does its best to filter test components, as many are missing
       
   107     from the source tree and they're not interesting with respect to building
       
   108     production ROM images anyway.
       
   109     """
       
   110     conventions = ['tsrc', 'test']
       
   111     for convention in conventions:
       
   112         # Iterate through likely test component conventions, if
       
   113         # we match one, return True now
       
   114         if os.path.sep + convention + os.path.sep in path.lower():
       
   115             return True
       
   116     # Otherwise, nothing found, so return False
       
   117     return False
       
   118 
       
   119 def without_comments(source_file):
       
   120     """Generator function, will yield lines of the source_file object (iterable)
       
   121     with commented regions removed.
       
   122     """
       
   123     multiline_region = False
       
   124     for line in source_file:
       
   125         match_multiline_close = _p_multiline_close.match(line)
       
   126         if match_multiline_close:
       
   127             # Close Comments, strip to the left of the comment
       
   128             multiline_region = False
       
   129             line = match_multiline_close.groups()[0]
       
   130         if multiline_region:
       
   131             # Skip the line if we're in a commented region
       
   132             continue
       
   133         match_multiline_open = _p_multiline_open.match(line)
       
   134         if match_multiline_open:
       
   135             # Open comments, strip to the right of the comment
       
   136             multiline_region = True
       
   137             line = match_multiline_open.groups()[0]
       
   138         match_inline = _p_clean_inline.match(line)
       
   139         if match_inline:
       
   140             # Strip the line to only the left of the comment
       
   141             line = match_inline.groups()[0]
       
   142         if line:
       
   143             yield line
       
   144 
       
   145 def parse_mmp(mmp_path):
       
   146     """Read an mmp file, return a tuple of the form:
       
   147         (target, required_target_list)
       
   148     """
       
   149     logging.debug('parse_mmp(%s)' % (mmp_path, ))
       
   150     
       
   151     mmp_file = None
       
   152     try:
       
   153         mmp_file = open(mmp_path)
       
   154     except IOError, e:
       
   155         logging.error('Unable to open: %s' % (mmp_path, ))
       
   156         return
       
   157 
       
   158     # Iterate through MMP file lines to find the TARGET and LIBRARY statements
       
   159     # Note that Symbian projects can compile to different TARGET objects depending on
       
   160     # precompiler macros, so we must index all possible target names.
       
   161     targets = []
       
   162     libs = []
       
   163     resource_block = False
       
   164     for line in without_comments(mmp_file):
       
   165         match_start = _p_start.match(line)
       
   166         if match_start:
       
   167             resource_block = True
       
   168         match_end = _p_end.match(line)
       
   169         if match_end:
       
   170             resource_block = False
       
   171         if resource_block:
       
   172             # need to avoid resource target sections
       
   173             continue
       
   174         match_target = _p_target.match(line)
       
   175         match_library = _p_library.match(line)
       
   176         if match_target:
       
   177             clean_target = clean_binary_name(match_target.groups()[0])
       
   178             targets.append(clean_target)
       
   179         elif match_library:
       
   180             libs_on_line = match_library.groups()[0].split()
       
   181             for lib in libs_on_line:
       
   182                 clean_lib = clean_binary_name(lib)
       
   183                 libs.append(clean_lib)
       
   184     mmp_file.close()
       
   185 
       
   186     return (targets, libs)
       
   187 
       
   188 def new_node(path, ref_mmps, ref_testmmps):
       
   189     """Construct a new node in the graph with the provided content.
       
   190     """
       
   191     logging.debug('new_node(%s, ref_mmps(%d), ref_testmmps(%d))' % (path, len(ref_mmps), len(ref_testmmps)))
       
   192     node = Node(path)
       
   193     
       
   194     # Iterate the MMPs, read dependency and target information
       
   195     for mmp in ref_mmps:
       
   196         (targets, dependencies) = parse_mmp(mmp)
       
   197         if len(targets) > 0:
       
   198             for target in targets:
       
   199                 node.mmp_components.append(target)
       
   200             node.add_deps(dependencies)
       
   201 
       
   202     # Register the components in the cache, as later we will
       
   203     # join the graph nodes by referring to this cache
       
   204     for c in node.mmp_components:
       
   205         if c in node_cache.keys():
       
   206             existing = node_cache[c]
       
   207             node_cache[c] = existing + [path]
       
   208         else:
       
   209             node_cache[c] = [path]
       
   210 
       
   211     # Add this node to the graph
       
   212     graph[path] = node
       
   213 
       
   214 def parse_bld_inf(path):
       
   215     """Parse a bld.inf file to check to see if references MMP files.
       
   216     For those MMP files included, parse them to build the node object.
       
   217     """
       
   218     logging.debug('parse_bld_inf(%s)' % (path, ))
       
   219     
       
   220     # List the files referenced from this bld.inf
       
   221     ref_mmp = []
       
   222     ref_testmmp = []
       
   223     
       
   224     bld_inf = None
       
   225     try:
       
   226         bld_inf = open(path, 'r')
       
   227     except IOError, e:
       
   228         logging.error('Unable to open: %s' % (path, ))
       
   229         return
       
   230 
       
   231     # Parse the bld_inf file, adding references MMP files to appropriate lists
       
   232     projects_flag = False
       
   233     for line in without_comments(bld_inf):
       
   234         match_projects = _p_prj_mmpfiles.match(line)
       
   235         match_other_section = _p_other_section.match(line)
       
   236         if match_projects:
       
   237             projects_flag = True
       
   238         elif match_other_section:
       
   239             projects_flag = False
       
   240         if projects_flag and len(line) <= _MAX_PATH:
       
   241             rel_name = rstrip(line.lower().strip(), '.mmp')
       
   242             bld_inf_path = os.path.dirname(path)
       
   243             test_path = os.path.join(bld_inf_path, rel_name + '.mmp')
       
   244             test_path = os.path.realpath(test_path)
       
   245             if os.path.exists(test_path):
       
   246                 ref_mmp.append(test_path)
       
   247             else:
       
   248                 logging.warning('%s refers to %s but it does not exist!' % (path, test_path))
       
   249     bld_inf.close()
       
   250 
       
   251     # If we found some MMP files, then this is a new node
       
   252     if len(ref_mmp) > 0:
       
   253         new_node(path, ref_mmp, ref_testmmp)
       
   254 
       
   255 def make_nodes(not_used, dir_name, file_names):
       
   256     """Call back function for os.path.walk: will analyse the file names, if
       
   257     there are any bld.inf files, it will open them to see if they identify a
       
   258     Node object and create them as appropriate
       
   259     """
       
   260     logging.debug('make_nodes(%s, %s)' % (dir_name, file_names))
       
   261     if looks_like_test(dir_name):
       
   262         return
       
   263     for file_name in file_names:
       
   264         if file_name.lower().endswith('.inf'):
       
   265             abs_path = os.path.join(dir_name, file_name)
       
   266             assert(os.path.exists(abs_path))
       
   267             parse_bld_inf(abs_path)
       
   268 
       
   269 def connect_nodes():
       
   270     """Walk through the graph and substute the contents of the dependency
       
   271     list members at each node with references to the node_path of that which
       
   272     builds the referenced component.
       
   273 
       
   274     There will be instances where multiple graph nodes build overlapping
       
   275     components. This will, in practice, mean that there are many ways of
       
   276     building a suitable ROM for dependencies of one of these nodes.
       
   277     """
       
   278     unresolved_deps = []
       
   279     for node_path in graph.keys():
       
   280         node = graph[node_path]
       
   281         resolved = []
       
   282         for dep in node.dependencies:
       
   283             if dep not in node_cache.keys():
       
   284                 logging.warning('Could not resolve %s for %s' % (dep, node.node_path))
       
   285                 if dep not in unresolved_deps:
       
   286                     unresolved_deps.append(dep)
       
   287                 node.unresolved.append(dep)
       
   288             else:
       
   289                 solutions = node_cache[dep]
       
   290                 proposed = solutions[0]
       
   291                 if proposed not in resolved:
       
   292                     resolved.append(proposed)
       
   293                 node.interesting += filter(lambda x: x not in node.interesting, solutions[1:])
       
   294         node.dependencies = resolved
       
   295         graph[node_path] = node
       
   296     if len(unresolved_deps) > 0:
       
   297         logging.warning('There were %d unresolved dependencies.' % (len(unresolved_deps), ))
       
   298 
       
   299 def build_graph(root):
       
   300     """Walk nodes from the directory root provided looking for bld.inf files.
       
   301     Graph will be built from the referened production MMP files.
       
   302     """
       
   303     if not os.path.isdir(root):
       
   304         logging.fatal('%s is not a directory, aborting...' % (root, ))
       
   305         exit(1)
       
   306     os.path.walk(root, make_nodes, None)
       
   307     connect_nodes()
       
   308 
       
   309 def save_graph(path):
       
   310     """Serialize the graph object to path. This will be a Python object pickle at
       
   311     the highest available protocol version for this Python install.
       
   312     """
       
   313     graph_file = None
       
   314     try:
       
   315         graph_file = open(path, 'wb')
       
   316     except IOError, e:
       
   317         logging.error('Could not write graph to file: %s' % (repr(e), ))
       
   318         exit(1)
       
   319     pickle.dump(graph, graph_file, pickle.HIGHEST_PROTOCOL)
       
   320     graph_file.close()
       
   321 
       
   322 # Main:
       
   323 if __name__ == '__main__':
       
   324     parser = OptionParser()
       
   325     parser.set_description(__doc__)
       
   326     parser.add_option('-g', '--graph', dest='graph_file', 
       
   327                       help='File name to write the graph to.', 
       
   328                       metavar='GRAPH_FILE', default='dependencies.graph')
       
   329     parser.add_option('-r', '--root', dest='graph_root',
       
   330                       help='Directory to recursively build a graph from, usually root of source tree.',
       
   331                       metavar='SOURCE_ROOT', default='.')
       
   332     parser.add_option('-v', '--verbose', dest='verbose',
       
   333                       help='Verbose logging, will show all warnings as graph is generated. Recommend redirect!',
       
   334                       action='store_true', default=False)
       
   335     (options, args) = parser.parse_args()
       
   336     if not options.verbose:
       
   337         logging.disable(logging.ERROR)
       
   338     print 'Walking source from "%s"\nThis can take some time with large source trees...' % (options.graph_root, )
       
   339     build_graph(options.graph_root)
       
   340     print 'Found %d components consisting of %d binaries.' % (len(graph), len(node_cache))
       
   341     print 'Wriing graph to %s' % (options.graph_file)
       
   342     save_graph(options.graph_file)
       
   343     print '...done!'