mpdot/mpdot.py
changeset 2 932c358ece3e
child 4 468f4c8d3d5b
equal deleted inserted replaced
1:82f11024044a 2:932c358ece3e
       
     1 """
       
     2 Created on Feb 25, 2010
       
     3 
       
     4 @author: p2ross
       
     5 
       
     6 
       
     7 Take as inmput the toc. Read for
       
     8 <topicref format="ditamap" href="GUID-F59DFBA0-B60B-334A-9B18-4B4E1E756DFA.ditamap" navtitle="camera" />
       
     9 
       
    10 Create in output dir 
       
    11 
       
    12 out/dot_comp/camera_GUID-F59DFBA0-B60B-334A-9B18-4B4E1E756DFA/
       
    13 
       
    14 As the DOT target.
       
    15 Run DOT as a sub-process.
       
    16 Then copy all of the contents except the index.html into:
       
    17 
       
    18 out/dot_src/
       
    19 
       
    20 Els stuff with ditamaps to dot_src/...
       
    21 
       
    22 Then run DOT on input:
       
    23 out/dot_src/<toc>.ditamap
       
    24 
       
    25 Output:
       
    26 out/dot_tgt/
       
    27 """
       
    28 
       
    29 import os
       
    30 import sys
       
    31 from optparse import OptionParser, check_choice
       
    32 import subprocess
       
    33 import multiprocessing
       
    34 try:
       
    35     from xml.etree import cElementTree as etree
       
    36 except ImportError:
       
    37     from xml.etree import ElementTree as etree
       
    38 
       
    39 import logging
       
    40 #import pprint
       
    41 import random
       
    42 import time
       
    43 import shutil
       
    44 import unittest
       
    45 #import xml
       
    46 from cStringIO import StringIO
       
    47 
       
    48 __version__ = '0.1.3'
       
    49 
       
    50 """
       
    51 Nokia XHTML conversion:
       
    52 $> ant ... -Dtranstype=xhtml.nokia -Dargs.input=INPUT_DITAMAP
       
    53 Nokia Eclipse Help conversion:
       
    54 $> ant ... -Dtranstype=eclipsehelp.nokia -Dargs.eclipse.version=VERSION -Dargs.eclipse.provider="Nokia Corporation" -Dargs.input=INPUT_DITAMAP
       
    55 """
       
    56 
       
    57 
       
    58 CMD_BASE            = "ant -logger org.dita.dost.log.DITAOTBuildLogger -Doutercontrol=quiet"
       
    59 CMD_PREFIX_INPUT    = "-Dargs.input="
       
    60 CMD_PREFIX_OUTPUT   = "-Doutput.dir="
       
    61 CMD_PREFIX_TEMP     = "-Ddita.temp.dir="
       
    62 CMD_PREFIX_NERVOUS  = "echo"
       
    63 #CMD_PREFIX_XSL      = "/xsl:plugins/cxxapiref/xsl/dita2xhtml.nokia.xsl "
       
    64 
       
    65 DIR_DOT_COMPONENT   = 'dot_comp'
       
    66 DIR_DOT_SOURCE      = 'dot_src'
       
    67 DIR_TOC_TMP         = 'dot_toc_tmp'
       
    68 DIR_DOT_TOC         = 'dot_toc'
       
    69 
       
    70 
       
    71 def invokeDot(theDitaMapPath, theDirOut, argList, isNervous):
       
    72     myCmdList = []
       
    73     if isNervous:
       
    74         myCmdList.append(CMD_PREFIX_NERVOUS)
       
    75         time.sleep(0.25 * random.random())
       
    76     # Randomise the start time so that DOT does not create
       
    77     # duplicate temp directories (0.001 sec name resolution).
       
    78     time.sleep(1.0 * random.random())
       
    79     myCmdList.append(CMD_BASE)
       
    80     myCmdList.append('%s"%s"' % (CMD_PREFIX_INPUT, theDitaMapPath))
       
    81     myCmdList.append('%s"%s"' % (CMD_PREFIX_OUTPUT, theDirOut))
       
    82     myCmdList.append('%s"%s"' % (CMD_PREFIX_TEMP, 'temp/%s' % os.path.basename(theDirOut)))
       
    83     myCmdList.extend(['-D%s' % a for a in argList])
       
    84     myCmd = ' '.join(myCmdList).replace('\\', '/')
       
    85     if not os.path.exists(theDirOut):
       
    86         os.makedirs(theDirOut)
       
    87     print 'invokeDot: "%s"' % myCmd
       
    88     p = subprocess.Popen(
       
    89         myCmd,
       
    90         shell=True,
       
    91         bufsize=-1,
       
    92         # Direct stdout/stderr to a PIPE then forget them
       
    93         stdout=subprocess.PIPE,
       
    94         stderr=subprocess.PIPE,
       
    95         #close_fds=True,
       
    96         )
       
    97     (stdOut, stdErr) = p.communicate()
       
    98     if len(stdErr) > 0:
       
    99         print 'stdErr for: %s -> %s' % (theDitaMapPath, theDirOut)
       
   100         print stdErr
       
   101     return p.returncode, theDirOut
       
   102 
       
   103 def genCompMapNames(theToc):
       
   104     for ev, el in etree.iterparse(theToc):
       
   105         if el.tag == 'topicref':
       
   106             myRef = el.get('href', None)
       
   107             if myRef is not None and myRef.endswith('.ditamap'):
       
   108                 logging.debug('genCompMapNames(): %s -> %s' % (myRef, el.get('navtitle')))
       
   109                 yield myRef, el.get('navtitle')
       
   110 
       
   111 def _copyDir(s, d, depth=0):
       
   112     """Recursive copy of all but the top level index.html."""
       
   113     assert(os.path.isdir(s))
       
   114     assert(os.path.isdir(d))
       
   115     for n in os.listdir(s):
       
   116         pS = os.path.join(s, n)
       
   117         pD = os.path.join(d, n)
       
   118         if os.path.isfile(pS) \
       
   119         and (depth > 1 or n.lower() != 'index.html'):
       
   120             try:
       
   121                 shutil.copy(pS, pD)
       
   122             except (WindowsError, IOError), err:
       
   123                 logging.error('_copyDirs(): %s' % err)
       
   124         elif os.path.isdir(pS):
       
   125             if not os.path.exists(pD):
       
   126                 os.makedirs(pD)
       
   127             _copyDir(pS, pD, depth=depth+1)
       
   128 
       
   129 def copyDirs(theResults, theOutDir):
       
   130     if not os.path.exists(theOutDir):
       
   131         os.makedirs(theOutDir)
       
   132     for c, d in theResults:
       
   133         if c == 0:
       
   134             print 'copyDirs(): "%s" to "%s"' % (os.path.basename(d), theOutDir)
       
   135             if not os.path.isdir(d):
       
   136                 logging.error('copyDirs(): not a directory: %s' % d)
       
   137             elif not os.path.isdir(theOutDir):
       
   138                 logging.error('copyDirs(): not a directory: %s' % theOutDir)
       
   139             else:
       
   140                 _copyDir(d, theOutDir)
       
   141         else:
       
   142             logging.error('Not copying. Results code %d directory: %s' % (c, d))
       
   143 
       
   144 def execute(inToc, outDir, argList, numJobs=0, nervous=False):
       
   145     inDir = os.path.dirname(inToc)
       
   146     outDirCmpDot = os.path.join(outDir, DIR_DOT_COMPONENT)
       
   147     if not os.path.exists(outDirCmpDot):
       
   148         os.makedirs(outDirCmpDot)
       
   149     if numJobs >= 1:
       
   150         myPool = multiprocessing.Pool(processes=numJobs)
       
   151     else:
       
   152         logging.info('Setting jobs to %d' % multiprocessing.cpu_count())
       
   153         myPool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
       
   154     myTaskS = [
       
   155         (
       
   156             os.path.join(inDir, t[0]),
       
   157             os.path.join(outDirCmpDot, '%s_%s' % (t[1], t[0])),
       
   158             argList,
       
   159             nervous,
       
   160         )
       
   161         for t in genCompMapNames(inToc)
       
   162     ]
       
   163     myResults = [r.get() for r in [myPool.apply_async(invokeDot, t) for t in myTaskS]]
       
   164     copyDirs(myResults, os.path.join(outDir, DIR_DOT_SOURCE))
       
   165     return myResults
       
   166 
       
   167 class DitamapLinkConverterError(Exception):
       
   168     """ Raised if an invalid toc is input """
       
   169 
       
   170 class DitamapLinkConverter():
       
   171     
       
   172     def __init__(self, toc_path, out_dir):
       
   173         self.out_dir = os.path.abspath(out_dir)
       
   174         self.toc_path = os.path.abspath(toc_path)
       
   175         self.toc_dir = os.path.dirname(self.toc_path)
       
   176         if not os.path.exists(self.out_dir):
       
   177             os.makedirs(self.out_dir)
       
   178             
       
   179     def _convert_link_to_html(self, link):
       
   180         if link.attrib["href"].endswith(".xml"):
       
   181             link.attrib["href"] = link.attrib["href"].replace(".xml", ".html")
       
   182             link.attrib["scope"] = "peer"
       
   183             link.attrib["format"] = "html"
       
   184         return link
       
   185     
       
   186     def _convert_links(self, tree):
       
   187         for element in tree.getiterator():
       
   188             if element.attrib.get("href") != None:
       
   189                 element = self._convert_link_to_html(element)
       
   190         return tree
       
   191     
       
   192     def _handle_map(self, ditamap):
       
   193         try:
       
   194             root = etree.parse(ditamap).getroot()
       
   195         except IOError, e:
       
   196             logging.error("Component map \"%s\" does not exist" % ditamap)
       
   197             return
       
   198         except Exception, e:
       
   199             logging.error("%s could not be parsed: %s\n" % (ditamap, str(e)))
       
   200             return        
       
   201         root = self._convert_links(root)
       
   202         self._write_file(root, os.path.basename(ditamap))
       
   203 
       
   204     def _write_file(self, root, file_name):
       
   205         filepath = self.out_dir+os.sep+file_name
       
   206         logging.debug('Writing file \"%s\"' % filepath)
       
   207         
       
   208         if root is not None:
       
   209             with open(filepath, 'w') as f:
       
   210                 f.write("""<?xml version="1.0" encoding="UTF-8"?>"""+'\n')
       
   211                 f.write("""<!DOCTYPE cxxAPIMap PUBLIC "-//NOKIA//DTD DITA C++ API Map Reference Type v0.5.0//EN" "dtd/cxxAPIMap.dtd" >"""+'\n')
       
   212                 f.write(etree.tostring(root))        
       
   213                 f.close()
       
   214     
       
   215     def _get_component_map_paths(self, tree):
       
   216         all_hrefs = []
       
   217         for element in tree.getiterator():
       
   218             if element.tag == "topicref":
       
   219                 all_hrefs.append(self.toc_dir+os.sep+element.attrib["href"])
       
   220         return all_hrefs
       
   221     
       
   222     def convert(self):
       
   223         try:
       
   224             tree = etree.parse(self.toc_path).getroot()
       
   225         except Exception, e:
       
   226             raise DitamapLinkConverterError("%s could not be parsed: %s\n" % (self.toc_path, str(e)))
       
   227         component_maps = self._get_component_map_paths(tree)
       
   228         for component_map in component_maps:
       
   229             self._handle_map(component_map)
       
   230         shutil.copyfile(self.toc_path, self.out_dir+os.sep+os.path.basename(self.toc_path))
       
   231 
       
   232 def publish_toc(toc_path, out_dir, argList, nervous):
       
   233     toc_name = os.path.basename(toc_path)
       
   234     tmp_out =  os.path.join(out_dir, DIR_TOC_TMP)
       
   235     dlc = DitamapLinkConverter(toc_path, tmp_out)
       
   236     dlc.convert()
       
   237     toc_to_publish = os.path.join(tmp_out, toc_name)
       
   238     out = os.path.join(out_dir, DIR_DOT_TOC)
       
   239     invokeDot(toc_to_publish, out, argList, nervous)
       
   240     final_destination = os.path.join(out_dir, DIR_DOT_SOURCE)
       
   241     if not os.path.exists(final_destination):
       
   242         os.makedirs(final_destination)
       
   243     try:
       
   244         shutil.copy(os.path.join(out, 'index.html'), final_destination)
       
   245     except IOError, err:
       
   246         logging.error('publish_toc(): %s' % str(err))
       
   247     
       
   248 def main():
       
   249     usage = "usage: %prog [options] <DITA map> <output directory> -Doptions without the -D"
       
   250     parser = OptionParser(usage, version='%prog ' + __version__)
       
   251     parser.add_option(
       
   252             "-l", "--loglevel",
       
   253             type="int",
       
   254             dest="loglevel",
       
   255             default=30,
       
   256             help="Log Level (debug=10, info=20, warning=30, [error=40], critical=50) [default: %default]"
       
   257         )      
       
   258     parser.add_option(
       
   259             "-j", "--jobs",
       
   260             type="int",
       
   261             dest="jobs",
       
   262             default=0,
       
   263             help="Max processes when multiprocessing. Zero uses number of native CPUs [default: %default]"
       
   264         )      
       
   265     parser.add_option("-n", action="store_true", dest="nervous", default=False, 
       
   266                       help="Nervous mode (do no harm). [default: %default]")
       
   267     (options, args) = parser.parse_args()
       
   268     logging.basicConfig(level=options.loglevel, stream=sys.stdout)
       
   269     if len(args) < 1 or not os.path.isfile(args[0]):
       
   270         parser.print_help()
       
   271         parser.error("I can't do much without a path to the XML TOC.")
       
   272         return 1
       
   273     if len(args) < 2:
       
   274         parser.print_help()
       
   275         parser.error("I need an output path.")
       
   276         return 1
       
   277     # Dump out timestamp
       
   278     print 'Start time: %s' % time.ctime()
       
   279     execTime = time.clock()
       
   280     myResults = execute(args[0], args[1], args[2:], options.jobs, options.nervous)
       
   281     publish_toc(args[0], args[1], args[2:], options.nervous)
       
   282     print 'Number of DITA maps processed: %d' % len(myResults)
       
   283     print 'End time: %s' % time.ctime()
       
   284     print 'Elapsed time: %8.3f (s)' % (time.clock()-execTime)
       
   285     print 'Bye, bye...'
       
   286 
       
   287 if __name__ == '__main__':
       
   288     multiprocessing.freeze_support()
       
   289     sys.exit(main())
       
   290     
       
   291 class TestDitamapLinkConverter(unittest.TestCase):
       
   292     def setUp(self):
       
   293         self._create_test_dir()
       
   294         self.dlc = DitamapLinkConverter('', self.out_dir)
       
   295         
       
   296     def tearDown(self):
       
   297         self._clean_test_dir()
       
   298         
       
   299     def _create_test_dir(self):
       
   300         self.test_dir = "ditamap_link_converter_test_dir"
       
   301         self.out_dir = self.test_dir+os.sep+"out"
       
   302         self.cmap_path = self.test_dir+os.sep+"cmap.xml"
       
   303         os.mkdir(self.test_dir)
       
   304         f = open(self.cmap_path, "w")
       
   305         f.write(cmap)
       
   306         f.close()
       
   307         
       
   308     def _clean_test_dir(self):
       
   309         shutil.rmtree(self.test_dir)        
       
   310         
       
   311     def _write_string_to_file(self, string, filepath):
       
   312         f = open(filepath, "w")
       
   313         f.write(string)
       
   314         f.close()        
       
   315     
       
   316     def test_i_can_change_a_link_to_an_xml_file_to_link_to_an_html_file(self):
       
   317         link = etree.Element("cxxStructRef", href="GUID-AE25CF37-B862-306B-B7B3-4A1226B83DA2.xml", navtitle="_SChannels")
       
   318         link = self.dlc._convert_link_to_html(link)
       
   319         self.assertEquals(link.attrib["href"], "GUID-AE25CF37-B862-306B-B7B3-4A1226B83DA2.html")
       
   320         self.assertTrue(link.get("scope", None) and link.attrib["scope"] == "peer")
       
   321         self.assertTrue(link.get("format", None) and link.attrib["format"] == "html")
       
   322         
       
   323     def test_i_can_find_all_link_elements_in_a_tree(self):
       
   324         tree = etree.parse(StringIO(cmap))
       
   325         tree = self.dlc._convert_links(tree)
       
   326         self.assertTrue(tree.find("cxxStructRef").attrib["href"].endswith(".html"))
       
   327         self.assertTrue(tree.find("cxxFileRef").attrib["href"].endswith(".html"))
       
   328         self.assertTrue(tree.find("cxxClassRef").attrib["href"].endswith(".html"))
       
   329         
       
   330     def test_i_can_write_a_converted_map_to_an_output_directory(self):
       
   331         self.dlc._handle_map(self.cmap_path)
       
   332         self.assertTrue(os.path.exists(self.out_dir+os.sep+"cmap.xml"))
       
   333         self.assertEquals(open(self.out_dir+os.sep+"cmap.xml").read(), converted_cmap)
       
   334         
       
   335     def test_i_gracefully_handle_a_link_to_component_map_that_doesnt_exist(self):
       
   336         try:
       
   337             self.dlc._handle_map("non_existsant_ditamap.ditamap")
       
   338         except:
       
   339             self.fail("Didn't handle a component ditamap that doesn't exist")
       
   340         else:
       
   341             pass # Expected (silently handled non existant map)
       
   342         
       
   343     def test_i_parse_all_hrefs_in_a_toc(self):
       
   344         converter = DitamapLinkConverter(os.getcwd()+os.sep+'toc.ditamap', self.out_dir)
       
   345         tree = etree.parse(StringIO(toc))
       
   346         paths = converter._get_component_map_paths(tree)
       
   347         expected = [os.getcwd()+os.sep+"GUID-F59DFBA0-B60B-334A-9B18-4B4E1E756DFA.ditamap"]       
       
   348         self.assertEquals(paths, expected)
       
   349         
       
   350     def test_i_raise_an_exception_if_i_am_given_an_invalid_toc(self):        
       
   351         invalid_toc_path = self.test_dir+os.sep+"invalid_toc.xml"
       
   352         self._write_string_to_file(invalid_toc, invalid_toc_path)
       
   353         dlc = DitamapLinkConverter(invalid_toc_path, self.out_dir)
       
   354         self.assertRaises(DitamapLinkConverterError, dlc.convert)
       
   355            
       
   356 cmap = """<?xml version="1.0" encoding="UTF-8"?>
       
   357 <!DOCTYPE cxxAPIMap PUBLIC "-//NOKIA//DTD DITA C++ API Map Reference Type v0.5.0//EN" "dtd/cxxAPIMap.dtd" >
       
   358 <cxxAPIMap id="GUID-0D9E5D45-5A07-302C-BEB3-2D0252214F2E" title="wlmplatform">
       
   359     <cxxStructRef href="GUID-AE25CF37-B862-306B-B7B3-4A1226B83DA2.xml" navtitle="_SChannels" />
       
   360     <cxxFileRef href="GUID-E1984316-685F-394E-B71A-9816E1495C1F.xml" navtitle="wlanerrorcodes.h" />
       
   361     <cxxClassRef href="GUID-F795E994-BCB6-3040-872A-90F8ADFC75E7.xml" navtitle="MWlanMgmtNotifications" />
       
   362 </cxxAPIMap>
       
   363 """
       
   364                         # 
       
   365 converted_cmap = """<?xml version="1.0" encoding="UTF-8"?>
       
   366 <!DOCTYPE cxxAPIMap PUBLIC "-//NOKIA//DTD DITA C++ API Map Reference Type v0.5.0//EN" "dtd/cxxAPIMap.dtd" >
       
   367 <cxxAPIMap id="GUID-0D9E5D45-5A07-302C-BEB3-2D0252214F2E" title="wlmplatform">
       
   368     <cxxStructRef format="html" href="GUID-AE25CF37-B862-306B-B7B3-4A1226B83DA2.html" navtitle="_SChannels" scope="peer" />
       
   369     <cxxFileRef format="html" href="GUID-E1984316-685F-394E-B71A-9816E1495C1F.html" navtitle="wlanerrorcodes.h" scope="peer" />
       
   370     <cxxClassRef format="html" href="GUID-F795E994-BCB6-3040-872A-90F8ADFC75E7.html" navtitle="MWlanMgmtNotifications" scope="peer" />
       
   371 </cxxAPIMap>"""
       
   372 
       
   373         
       
   374 toc = """<?xml version="1.0" encoding="UTF-8"?>
       
   375 <!DOCTYPE map PUBLIC "-//OASIS//DTD DITA Map//EN" "map.dtd">
       
   376 <map id="GUID-445218BA-A6BF-334B-9337-5DCBD993AEB3" title="Symbian^3">
       
   377     <topichead id="GUID-6B11027F-F9AF-3FA0-8A9D-8EA68E3D0F8D" navtitle="Applications">
       
   378         <topichead id="GUID-4766FA96-56F3-3E37-9B2C-6F280673BBA1" navtitle="Camera Apps">
       
   379           <topichead id="GUID-34AB7AC3-E64C-39E0-B6B1-53FEF84566F2" navtitle="s60">
       
   380             <topichead id="GUID-4766FA96-56F3-3E37-9B2C-6F280673BBA1" navtitle="camera">
       
   381               <topicref format="ditamap" href="GUID-F59DFBA0-B60B-334A-9B18-4B4E1E756DFA.ditamap" navtitle="camera" />
       
   382             </topichead>
       
   383             <topichead id="GUID-A0EFE059-67DA-372B-AB98-9DB79584972E" navtitle="camera_help" />
       
   384           </topichead>
       
   385         </topichead>
       
   386      </topichead>
       
   387  </map>
       
   388  """
       
   389  
       
   390 invalid_toc = """<?xml version="1.0" encoding="UTF-8"?
       
   391 <!DOCTYPE map PUBLIC "-//OASIS//DTD DITA Map//EN" "map.dtd">
       
   392 <map id="GUID-445218BA-A6BF-334B-9337-5DCBD993AEB3" title="Symbian^3">
       
   393     <topichead id="GUID-6B11027F-F9AF-3FA0-8A9D-8EA68E3D0F8D" navtitle="Applications">
       
   394         <topichead id="GUID-4766FA96-56F3-3E37-9B2C-6F280673BBA1" navtitle="Camera Apps">
       
   395           <topichead id="GUID-34AB7AC3-E64C-39E0-B6B1-53FEF84566F2" navtitle="s60">
       
   396             <topichead id="GUID-4766FA96-56F3-3E37-9B2C-6F280673BBA1" navtitle="camera">
       
   397               <topicref format="ditamap" href="GUID-F59DFBA0-B60B-334A-9B18-4B4E1E756DFA.ditamap" navtitle="camera" />
       
   398             </topichead>
       
   399             <topichead id="GUID-A0EFE059-67DA-372B-AB98-9DB79584972E" navtitle="camera_help" />
       
   400           </topichead>
       
   401         </topichead>
       
   402      </topichead>
       
   403  </map>
       
   404  """