diff -r 82f11024044a -r 932c358ece3e mpdot/mpdot.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mpdot/mpdot.py Fri Apr 23 20:45:58 2010 +0100 @@ -0,0 +1,404 @@ +""" +Created on Feb 25, 2010 + +@author: p2ross + + +Take as inmput the toc. Read for + + +Create in output dir + +out/dot_comp/camera_GUID-F59DFBA0-B60B-334A-9B18-4B4E1E756DFA/ + +As the DOT target. +Run DOT as a sub-process. +Then copy all of the contents except the index.html into: + +out/dot_src/ + +Els stuff with ditamaps to dot_src/... + +Then run DOT on input: +out/dot_src/.ditamap + +Output: +out/dot_tgt/ +""" + +import os +import sys +from optparse import OptionParser, check_choice +import subprocess +import multiprocessing +try: + from xml.etree import cElementTree as etree +except ImportError: + from xml.etree import ElementTree as etree + +import logging +#import pprint +import random +import time +import shutil +import unittest +#import xml +from cStringIO import StringIO + +__version__ = '0.1.3' + +""" +Nokia XHTML conversion: +$> ant ... -Dtranstype=xhtml.nokia -Dargs.input=INPUT_DITAMAP +Nokia Eclipse Help conversion: +$> ant ... -Dtranstype=eclipsehelp.nokia -Dargs.eclipse.version=VERSION -Dargs.eclipse.provider="Nokia Corporation" -Dargs.input=INPUT_DITAMAP +""" + + +CMD_BASE = "ant -logger org.dita.dost.log.DITAOTBuildLogger -Doutercontrol=quiet" +CMD_PREFIX_INPUT = "-Dargs.input=" +CMD_PREFIX_OUTPUT = "-Doutput.dir=" +CMD_PREFIX_TEMP = "-Ddita.temp.dir=" +CMD_PREFIX_NERVOUS = "echo" +#CMD_PREFIX_XSL = "/xsl:plugins/cxxapiref/xsl/dita2xhtml.nokia.xsl " + +DIR_DOT_COMPONENT = 'dot_comp' +DIR_DOT_SOURCE = 'dot_src' +DIR_TOC_TMP = 'dot_toc_tmp' +DIR_DOT_TOC = 'dot_toc' + + +def invokeDot(theDitaMapPath, theDirOut, argList, isNervous): + myCmdList = [] + if isNervous: + myCmdList.append(CMD_PREFIX_NERVOUS) + time.sleep(0.25 * random.random()) + # Randomise the start time so that DOT does not create + # duplicate temp directories (0.001 sec name resolution). + time.sleep(1.0 * random.random()) + myCmdList.append(CMD_BASE) + myCmdList.append('%s"%s"' % (CMD_PREFIX_INPUT, theDitaMapPath)) + myCmdList.append('%s"%s"' % (CMD_PREFIX_OUTPUT, theDirOut)) + myCmdList.append('%s"%s"' % (CMD_PREFIX_TEMP, 'temp/%s' % os.path.basename(theDirOut))) + myCmdList.extend(['-D%s' % a for a in argList]) + myCmd = ' '.join(myCmdList).replace('\\', '/') + if not os.path.exists(theDirOut): + os.makedirs(theDirOut) + print 'invokeDot: "%s"' % myCmd + p = subprocess.Popen( + myCmd, + shell=True, + bufsize=-1, + # Direct stdout/stderr to a PIPE then forget them + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + #close_fds=True, + ) + (stdOut, stdErr) = p.communicate() + if len(stdErr) > 0: + print 'stdErr for: %s -> %s' % (theDitaMapPath, theDirOut) + print stdErr + return p.returncode, theDirOut + +def genCompMapNames(theToc): + for ev, el in etree.iterparse(theToc): + if el.tag == 'topicref': + myRef = el.get('href', None) + if myRef is not None and myRef.endswith('.ditamap'): + logging.debug('genCompMapNames(): %s -> %s' % (myRef, el.get('navtitle'))) + yield myRef, el.get('navtitle') + +def _copyDir(s, d, depth=0): + """Recursive copy of all but the top level index.html.""" + assert(os.path.isdir(s)) + assert(os.path.isdir(d)) + for n in os.listdir(s): + pS = os.path.join(s, n) + pD = os.path.join(d, n) + if os.path.isfile(pS) \ + and (depth > 1 or n.lower() != 'index.html'): + try: + shutil.copy(pS, pD) + except (WindowsError, IOError), err: + logging.error('_copyDirs(): %s' % err) + elif os.path.isdir(pS): + if not os.path.exists(pD): + os.makedirs(pD) + _copyDir(pS, pD, depth=depth+1) + +def copyDirs(theResults, theOutDir): + if not os.path.exists(theOutDir): + os.makedirs(theOutDir) + for c, d in theResults: + if c == 0: + print 'copyDirs(): "%s" to "%s"' % (os.path.basename(d), theOutDir) + if not os.path.isdir(d): + logging.error('copyDirs(): not a directory: %s' % d) + elif not os.path.isdir(theOutDir): + logging.error('copyDirs(): not a directory: %s' % theOutDir) + else: + _copyDir(d, theOutDir) + else: + logging.error('Not copying. Results code %d directory: %s' % (c, d)) + +def execute(inToc, outDir, argList, numJobs=0, nervous=False): + inDir = os.path.dirname(inToc) + outDirCmpDot = os.path.join(outDir, DIR_DOT_COMPONENT) + if not os.path.exists(outDirCmpDot): + os.makedirs(outDirCmpDot) + if numJobs >= 1: + myPool = multiprocessing.Pool(processes=numJobs) + else: + logging.info('Setting jobs to %d' % multiprocessing.cpu_count()) + myPool = multiprocessing.Pool(processes=multiprocessing.cpu_count()) + myTaskS = [ + ( + os.path.join(inDir, t[0]), + os.path.join(outDirCmpDot, '%s_%s' % (t[1], t[0])), + argList, + nervous, + ) + for t in genCompMapNames(inToc) + ] + myResults = [r.get() for r in [myPool.apply_async(invokeDot, t) for t in myTaskS]] + copyDirs(myResults, os.path.join(outDir, DIR_DOT_SOURCE)) + return myResults + +class DitamapLinkConverterError(Exception): + """ Raised if an invalid toc is input """ + +class DitamapLinkConverter(): + + def __init__(self, toc_path, out_dir): + self.out_dir = os.path.abspath(out_dir) + self.toc_path = os.path.abspath(toc_path) + self.toc_dir = os.path.dirname(self.toc_path) + if not os.path.exists(self.out_dir): + os.makedirs(self.out_dir) + + def _convert_link_to_html(self, link): + if link.attrib["href"].endswith(".xml"): + link.attrib["href"] = link.attrib["href"].replace(".xml", ".html") + link.attrib["scope"] = "peer" + link.attrib["format"] = "html" + return link + + def _convert_links(self, tree): + for element in tree.getiterator(): + if element.attrib.get("href") != None: + element = self._convert_link_to_html(element) + return tree + + def _handle_map(self, ditamap): + try: + root = etree.parse(ditamap).getroot() + except IOError, e: + logging.error("Component map \"%s\" does not exist" % ditamap) + return + except Exception, e: + logging.error("%s could not be parsed: %s\n" % (ditamap, str(e))) + return + root = self._convert_links(root) + self._write_file(root, os.path.basename(ditamap)) + + def _write_file(self, root, file_name): + filepath = self.out_dir+os.sep+file_name + logging.debug('Writing file \"%s\"' % filepath) + + if root is not None: + with open(filepath, 'w') as f: + f.write(""""""+'\n') + f.write(""""""+'\n') + f.write(etree.tostring(root)) + f.close() + + def _get_component_map_paths(self, tree): + all_hrefs = [] + for element in tree.getiterator(): + if element.tag == "topicref": + all_hrefs.append(self.toc_dir+os.sep+element.attrib["href"]) + return all_hrefs + + def convert(self): + try: + tree = etree.parse(self.toc_path).getroot() + except Exception, e: + raise DitamapLinkConverterError("%s could not be parsed: %s\n" % (self.toc_path, str(e))) + component_maps = self._get_component_map_paths(tree) + for component_map in component_maps: + self._handle_map(component_map) + shutil.copyfile(self.toc_path, self.out_dir+os.sep+os.path.basename(self.toc_path)) + +def publish_toc(toc_path, out_dir, argList, nervous): + toc_name = os.path.basename(toc_path) + tmp_out = os.path.join(out_dir, DIR_TOC_TMP) + dlc = DitamapLinkConverter(toc_path, tmp_out) + dlc.convert() + toc_to_publish = os.path.join(tmp_out, toc_name) + out = os.path.join(out_dir, DIR_DOT_TOC) + invokeDot(toc_to_publish, out, argList, nervous) + final_destination = os.path.join(out_dir, DIR_DOT_SOURCE) + if not os.path.exists(final_destination): + os.makedirs(final_destination) + try: + shutil.copy(os.path.join(out, 'index.html'), final_destination) + except IOError, err: + logging.error('publish_toc(): %s' % str(err)) + +def main(): + usage = "usage: %prog [options] -Doptions without the -D" + parser = OptionParser(usage, version='%prog ' + __version__) + parser.add_option( + "-l", "--loglevel", + type="int", + dest="loglevel", + default=30, + help="Log Level (debug=10, info=20, warning=30, [error=40], critical=50) [default: %default]" + ) + parser.add_option( + "-j", "--jobs", + type="int", + dest="jobs", + default=0, + help="Max processes when multiprocessing. Zero uses number of native CPUs [default: %default]" + ) + parser.add_option("-n", action="store_true", dest="nervous", default=False, + help="Nervous mode (do no harm). [default: %default]") + (options, args) = parser.parse_args() + logging.basicConfig(level=options.loglevel, stream=sys.stdout) + if len(args) < 1 or not os.path.isfile(args[0]): + parser.print_help() + parser.error("I can't do much without a path to the XML TOC.") + return 1 + if len(args) < 2: + parser.print_help() + parser.error("I need an output path.") + return 1 + # Dump out timestamp + print 'Start time: %s' % time.ctime() + execTime = time.clock() + myResults = execute(args[0], args[1], args[2:], options.jobs, options.nervous) + publish_toc(args[0], args[1], args[2:], options.nervous) + print 'Number of DITA maps processed: %d' % len(myResults) + print 'End time: %s' % time.ctime() + print 'Elapsed time: %8.3f (s)' % (time.clock()-execTime) + print 'Bye, bye...' + +if __name__ == '__main__': + multiprocessing.freeze_support() + sys.exit(main()) + +class TestDitamapLinkConverter(unittest.TestCase): + def setUp(self): + self._create_test_dir() + self.dlc = DitamapLinkConverter('', self.out_dir) + + def tearDown(self): + self._clean_test_dir() + + def _create_test_dir(self): + self.test_dir = "ditamap_link_converter_test_dir" + self.out_dir = self.test_dir+os.sep+"out" + self.cmap_path = self.test_dir+os.sep+"cmap.xml" + os.mkdir(self.test_dir) + f = open(self.cmap_path, "w") + f.write(cmap) + f.close() + + def _clean_test_dir(self): + shutil.rmtree(self.test_dir) + + def _write_string_to_file(self, string, filepath): + f = open(filepath, "w") + f.write(string) + f.close() + + def test_i_can_change_a_link_to_an_xml_file_to_link_to_an_html_file(self): + link = etree.Element("cxxStructRef", href="GUID-AE25CF37-B862-306B-B7B3-4A1226B83DA2.xml", navtitle="_SChannels") + link = self.dlc._convert_link_to_html(link) + self.assertEquals(link.attrib["href"], "GUID-AE25CF37-B862-306B-B7B3-4A1226B83DA2.html") + self.assertTrue(link.get("scope", None) and link.attrib["scope"] == "peer") + self.assertTrue(link.get("format", None) and link.attrib["format"] == "html") + + def test_i_can_find_all_link_elements_in_a_tree(self): + tree = etree.parse(StringIO(cmap)) + tree = self.dlc._convert_links(tree) + self.assertTrue(tree.find("cxxStructRef").attrib["href"].endswith(".html")) + self.assertTrue(tree.find("cxxFileRef").attrib["href"].endswith(".html")) + self.assertTrue(tree.find("cxxClassRef").attrib["href"].endswith(".html")) + + def test_i_can_write_a_converted_map_to_an_output_directory(self): + self.dlc._handle_map(self.cmap_path) + self.assertTrue(os.path.exists(self.out_dir+os.sep+"cmap.xml")) + self.assertEquals(open(self.out_dir+os.sep+"cmap.xml").read(), converted_cmap) + + def test_i_gracefully_handle_a_link_to_component_map_that_doesnt_exist(self): + try: + self.dlc._handle_map("non_existsant_ditamap.ditamap") + except: + self.fail("Didn't handle a component ditamap that doesn't exist") + else: + pass # Expected (silently handled non existant map) + + def test_i_parse_all_hrefs_in_a_toc(self): + converter = DitamapLinkConverter(os.getcwd()+os.sep+'toc.ditamap', self.out_dir) + tree = etree.parse(StringIO(toc)) + paths = converter._get_component_map_paths(tree) + expected = [os.getcwd()+os.sep+"GUID-F59DFBA0-B60B-334A-9B18-4B4E1E756DFA.ditamap"] + self.assertEquals(paths, expected) + + def test_i_raise_an_exception_if_i_am_given_an_invalid_toc(self): + invalid_toc_path = self.test_dir+os.sep+"invalid_toc.xml" + self._write_string_to_file(invalid_toc, invalid_toc_path) + dlc = DitamapLinkConverter(invalid_toc_path, self.out_dir) + self.assertRaises(DitamapLinkConverterError, dlc.convert) + +cmap = """ + + + + + + +""" + # +converted_cmap = """ + + + + + +""" + + +toc = """ + + + + + + + + + + + + + + """ + +invalid_toc = """ + + + + + + + + + + + + + """