Orb/python/orb/guidiser.py
changeset 4 468f4c8d3d5b
parent 2 932c358ece3e
equal deleted inserted replaced
3:d8fccb2cd802 4:468f4c8d3d5b
    17 import stat
    17 import stat
    18 import sys
    18 import sys
    19 import shutil
    19 import shutil
    20 import xml
    20 import xml
    21 import logging
    21 import logging
    22 from optparse import OptionParser, check_choice
    22 from optparse import OptionParser
    23 from xml.etree import ElementTree as etree
    23 try:
       
    24     from xml.etree import cElementTree as etree
       
    25 except ImportError:
       
    26     from xml.etree import ElementTree as etree
       
    27 import xml.etree.ElementTree
    24 from cStringIO import StringIO
    28 from cStringIO import StringIO
    25 from lib import scan, xml_decl, doctype_identifier, XmlParser
    29 from lib import scan, xml_decl, doctype_identifier, XmlParser
    26 from doxyidredirect import DoxyIdRedirect, ExceptionDoxyIdRedirectLookup
    30 from doxyidredirect import DoxyIdRedirect, ExceptionDoxyIdRedirectLookup
    27 
    31 
    28 
    32 
    29 __version__ = "0.1"
    33 __version__ = "0.1"
       
    34 
       
    35 
       
    36 logger = logging.getLogger('orb.guidiser')
       
    37 
    30 
    38 
    31 class Guidiser(object):
    39 class Guidiser(object):
    32     """
    40     """
    33     A simple class that parses an xml file and converts the values of all
    41     A simple class that parses an xml file and converts the values of all
    34     id, href and keyref attributes to a 'GUID'.
    42     id, href and keyref attributes to a 'GUID'.
    35     
    43     
    36     >>> guid = Guidiser()
    44     >>> guid = Guidiser()
    37     >>> root = guid.guidise(StringIO(cxxclass))
    45     >>> root = guid.guidise(StringIO(cxxclass))
    38     >>> oldroot = etree.parse(StringIO(cxxclass)).getroot()
    46     >>> oldroot = etree.parse(StringIO(cxxclass)).getroot()
    39     >>> oldroot.attrib['id']
    47     >>> oldroot.attrib['id']
    40     'CP_class'
    48     'class_test'
    41     >>> root.attrib['id']
    49     >>> root.attrib['id']
    42     'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E'
    50     'GUID-7D44FAFC-2C6A-3B1D-8EEA-558968414CCE'
    43     """
    51     """
    44     # Publishing targets
    52     # Publishing targets
    45     PT_MODE = 0
    53     PT_MODE = 0
    46     PT_DITAOT = 1
    54     PT_DITAOT = 1
    47     PUBLISHING_TARGETS = (PT_MODE, PT_DITAOT)
    55     PUBLISHING_TARGETS = (PT_MODE, PT_DITAOT)
    93         id = os.path.splitext(filename)[0]
   101         id = os.path.splitext(filename)[0]
    94         fqn = None
   102         fqn = None
    95         if not(id.lower() in ("test", "deprecated", "todo") or id.lower().find("namespace_") != -1):                
   103         if not(id.lower() in ("test", "deprecated", "todo") or id.lower().find("namespace_") != -1):                
    96             try:
   104             try:
    97                 filename, fqn = self.doxyidredirect.lookupId(id)
   105                 filename, fqn = self.doxyidredirect.lookupId(id)
    98             except ExceptionDoxyIdRedirectLookup, err:
   106             except ExceptionDoxyIdRedirectLookup:
    99                 logging.error("Could not lookup Fully Qualified APIName for id '%s' in href '%s'" % (id, href))
   107                 logger.error("Could not lookup Fully Qualified APIName for id '%s' in href '%s'" % (id, href))
   100         #if the id was not found just guidise the id
   108         #if the id was not found just guidise the id
   101         #this is just to make the id unique for mode
   109         #this is just to make the id unique for mode
   102         guid = self._get_guid(fqn) if fqn else self._get_guid(id)
   110         guid = self._get_guid(fqn) if fqn else self._get_guid(id)
   103         if self.get_publishing_target() == self.PT_DITAOT:
   111         if self.get_publishing_target() == self.PT_DITAOT:
   104             guid+=".xml"
   112             guid+=".xml"
   107     def _guidise_xref_href(self, href):
   115     def _guidise_xref_href(self, href):
   108         # Don't guidise references without hashes. Assume they are filepaths
   116         # Don't guidise references without hashes. Assume they are filepaths
   109         # to files other than ditatopics
   117         # to files other than ditatopics
   110         if href.find('#') == -1:
   118         if href.find('#') == -1:
   111             return href
   119             return href
   112 			
       
   113         # Doxygen currently outputs hrefs in the format autolink_8cpp.xml#autolink_8cpp_1ae0e289308b6d2cbb5c86e753741981dc
   120         # Doxygen currently outputs hrefs in the format autolink_8cpp.xml#autolink_8cpp_1ae0e289308b6d2cbb5c86e753741981dc
   114         # The right side of the # is not enough to extract the fully qualified name of the function because it is md5ed
   121         # The right side of the # is not enough to extract the fully qualified name of the function because it is md5ed
   115         # Send the right side to doxyidredirect to get the fqn of the function			
   122         # Send the right side to doxyidredirect to get the fqn of the function			
   116         filename, id = href.split('#')
   123         filename, id = href.split('#')
   117         fqn = None                
   124         fqn = None                
   118         if not(id.lower() in ("test", "deprecated", "todo") or id.lower().find("namespace_") != -1):                        
   125         if not(id.lower() in ("test", "deprecated", "todo") or id.lower().find("namespace_") != -1):                        
   119             try:
   126             try:
   120                 fqn = self.doxyidredirect.lookupId(id)[1]
   127                 fqn = self.doxyidredirect.lookupId(id)[1]
   121             except ExceptionDoxyIdRedirectLookup, err:
   128             except ExceptionDoxyIdRedirectLookup:
   122                 logging.error("No API name for element id %s, guidising id instead" % id)
   129                 logger.error("No API name for element id %s, guidising id instead" % id)
   123 
       
   124         guid = self._get_guid(fqn) if fqn else self._get_guid(id)
   130         guid = self._get_guid(fqn) if fqn else self._get_guid(id)
   125         basename, ext = os.path.splitext(filename)
   131         basename, ext = os.path.splitext(filename)
   126         try:
   132         try:
   127             base_guid = self._get_guid(self.doxyidredirect.lookupId(basename)[1])
   133             base_guid = self._get_guid(self.doxyidredirect.lookupId(basename)[1])
   128         except ExceptionDoxyIdRedirectLookup, e:
   134         except ExceptionDoxyIdRedirectLookup:
   129             base_guid = self._get_guid(basename)
   135             base_guid = self._get_guid(basename)
   130             
   136             
   131         if self.get_publishing_target() == self.PT_DITAOT:
   137         if self.get_publishing_target() == self.PT_DITAOT:
   132             return base_guid + ext + "#" + guid
   138             return base_guid + ext + "#" + guid
   133         else:
   139         else:
   134             return guid
   140             return guid
   135     
   141     
   136     def _guidise_id(self, id):
   142     def _guidise_id(self, id):
   137         try:
   143         try:
   138             filename, fqn = self.doxyidredirect.lookupId(id)
   144             _, fqn = self.doxyidredirect.lookupId(id)
   139             return self._get_guid(fqn)
   145             return self._get_guid(fqn)
   140         except ExceptionDoxyIdRedirectLookup, err:
   146         except ExceptionDoxyIdRedirectLookup:
   141             logging.debug("Didn't find a Fully Qualified APIName for id '%s'" % id)
   147             logger.debug("Didn't find a Fully Qualified APIName for id '%s'" % id)
   142             return self._get_guid(id)
   148             return self._get_guid(id)
   143     
   149     
   144     def guidise(self, xmlfile):
   150     def guidise(self, xmlfile):
   145         #WORKAROUND: ElementTree provides no function to set prefixes and makes up its own if they are not set (ns0, ns1, ns2)
   151         #WORKAROUND: ElementTree provides no function to set prefixes and makes up its own if they are not set (ns0, ns1, ns2)
   146         etree._namespace_map["http://dita.oasis-open.org/architecture/2005/"] = 'ditaarch'
   152         xml.etree.ElementTree._namespace_map.update({ "http://dita.oasis-open.org/architecture/2005/": 'ditaarch' })
   147         try:
   153         try:
   148             root = etree.parse(xmlfile).getroot()
   154             root = etree.parse(xmlfile).getroot()
   149         except xml.parsers.expat.ExpatError, e:
   155         except Exception, e:
   150             logging.error("%s could not be parsed: %s\n" % (xmlfile, str(e)))
   156             logger.error("%s could not be parsed: %s\n" % (xmlfile, str(e)))
   151             return None
   157             return None
   152         for child in root.getiterator():
   158         for child in root.getiterator():
   153             for key in [key for key in ('id', 'href', 'keyref') if key in child.attrib]:
   159             for key in [key for key in ('id', 'href', 'keyref') if key in child.attrib]:
   154                 if key == 'id':
   160                 if key == 'id':
   155                     child.attrib['id'] = self._guidise_id(child.attrib['id'])
   161                     child.attrib['id'] = self._guidise_id(child.attrib['id'])
   156                 elif key == 'href':
   162                 elif key == 'href':
   157                     if 'format' in child.attrib and child.attrib['format'] == 'html':
   163                     if 'format' in child.attrib and child.attrib['format'] == 'html':
   158                         continue
   164                         continue
   159                     else:
   165                     else:
   160                         base_dir = os.path.dirname(xmlfile) if isinstance(xmlfile, str) else ""
   166                         #base_dir = os.path.dirname(xmlfile) if isinstance(xmlfile, str) else ""
   161                         child.attrib['href'] = self._guidise_href(child.attrib['href'], child.tag)
   167                         child.attrib['href'] = self._guidise_href(child.attrib['href'], child.tag)
   162                 elif key == 'keyref':
   168                 elif key == 'keyref':
   163                     child.attrib['keyref'] = self._get_guid(child.attrib['keyref'])                    
   169                     child.attrib['keyref'] = self._get_guid(child.attrib['keyref'])                    
   164 
   170 
   165         return root
   171         return root
   167 
   173 
   168 def updatefiles(xmldir, publishing_target="ditaot"):
   174 def updatefiles(xmldir, publishing_target="ditaot"):
   169     publishing_target = Guidiser.PT_MODE if (publishing_target == "mode") else Guidiser.PT_DITAOT
   175     publishing_target = Guidiser.PT_MODE if (publishing_target == "mode") else Guidiser.PT_DITAOT
   170     guidiser = Guidiser(publishing_target=publishing_target, doxyidredirect=DoxyIdRedirect(xmldir))
   176     guidiser = Guidiser(publishing_target=publishing_target, doxyidredirect=DoxyIdRedirect(xmldir))
   171     for filepath in scan(xmldir):
   177     for filepath in scan(xmldir):
   172         logging.debug('Guidising file \"%s\"' % filepath)
   178         logger.debug('Guidising file \"%s\"' % filepath)
   173         root = guidiser.guidise(filepath)
   179         root = guidiser.guidise(filepath)
   174         if root is not None:
   180         if root is not None:
   175             try:
   181             try:
   176                 os.chmod(filepath, stat.S_IWRITE)
   182                 os.chmod(filepath, stat.S_IWRITE)
   177             except Exception, e:
   183             except Exception, e:
   178                 logging.error("Could not make file \"%s\" writable, error was \"%s\"" % (filepath, e))
   184                 logger.error("Could not make file \"%s\" writable, error was \"%s\"" % (filepath, e))
   179                 continue            
   185                 continue            
   180             with open(filepath, 'w') as f:
   186             with open(filepath, 'w') as f:
   181                 f.write(xml_decl()+'\n')
   187                 f.write(xml_decl()+'\n')
   182                 try:
   188                 try:
   183                     doc_id = doctype_identifier(root.tag)
   189                     doc_id = doctype_identifier(root.tag)
   184                 except Exception, e:
   190                 except Exception, e:
   185                     logging.error("Could not write doctype identifier for file \"%s\", error was \"%s\""
   191                     logger.error("Could not write doctype identifier for file \"%s\", error was \"%s\""
   186                                   %(filepath, e))
   192                                   %(filepath, e))
   187                 else:
   193                 else:
   188                     f.write(doc_id+'\n')
   194                     f.write(doc_id+'\n')
   189                 f.write(etree.tostring(root))        
   195                 f.write(etree.tostring(root))        
   190                 f.close()
   196                 f.close()
   259 
   265 
   260     def test_i_continue_if_passed_an_invalid_file(self):
   266     def test_i_continue_if_passed_an_invalid_file(self):
   261         try:
   267         try:
   262             self.guidiser.guidise(StringIO("<cxxclass><argh</cxxclass>"))
   268             self.guidiser.guidise(StringIO("<cxxclass><argh</cxxclass>"))
   263         except Exception:
   269         except Exception:
   264             self.fail("I shouldnt have raised an exception")
   270             self.fail("I shouldnt have raised an exception.")
   265 
   271 
   266     def _test_keys_were_converted(self, key):
   272     def _test_keys_were_converted(self, key):
   267         root = self.guidiser.guidise(StringIO(cxxclass))
   273         root = self.guidiser.guidise(StringIO(cxxclass))
   268         for child in root.getiterator():
   274         for child in root.getiterator():
   269             if key in child.attrib:
   275             if key in child.attrib:
   342     def test_xref_href_to_topic_in_same_file_for_ditaot(self):
   348     def test_xref_href_to_topic_in_same_file_for_ditaot(self):
   343         self.guidiser.set_publishing_target(Guidiser.PT_DITAOT)
   349         self.guidiser.set_publishing_target(Guidiser.PT_DITAOT)
   344         self.assertEquals(self.guidiser._guidise_href("struct_e_sock_1_1_t_addr_update.xml#struct_e_sock_1_1_t_addr_update", "xref"),
   350         self.assertEquals(self.guidiser._guidise_href("struct_e_sock_1_1_t_addr_update.xml#struct_e_sock_1_1_t_addr_update", "xref"),
   345                  "GUID-E72084E6-C1CE-3388-93F7-5B7A3F506C3B.xml#GUID-E72084E6-C1CE-3388-93F7-5B7A3F506C3B"
   351                  "GUID-E72084E6-C1CE-3388-93F7-5B7A3F506C3B.xml#GUID-E72084E6-C1CE-3388-93F7-5B7A3F506C3B"
   346                  )
   352                  )
   347 				 
   353 
   348     def test_xref_href_to_some_other_file_on_file_system(self):
   354     def test_xref_href_to_some_other_file_on_file_system(self):
   349         self.guidiser.set_publishing_target(Guidiser.PT_DITAOT)
   355         self.guidiser.set_publishing_target(Guidiser.PT_DITAOT)
   350         self.assertEquals(self.guidiser._guidise_href("../../documentation/RFCs/rfc3580.txt", "xref"),
   356         self.assertEquals(self.guidiser._guidise_href("../../documentation/RFCs/rfc3580.txt", "xref"),
   351                  "../../documentation/RFCs/rfc3580.txt"
   357                  "../../documentation/RFCs/rfc3580.txt"
   352                  )
   358                  )
   363          
   369          
   364     def test_i_preserve_namespaces(self):  
   370     def test_i_preserve_namespaces(self):  
   365         xml_in = """<reference ditaarch:DITAArchVersion="1.1" xmlns:ditaarch="http://dita.oasis-open.org/architecture/2005/" />"""
   371         xml_in = """<reference ditaarch:DITAArchVersion="1.1" xmlns:ditaarch="http://dita.oasis-open.org/architecture/2005/" />"""
   366         xml_expected = """<reference ditaarch:DITAArchVersion="1.1" xmlns:ditaarch="http://dita.oasis-open.org/architecture/2005/" />"""
   372         xml_expected = """<reference ditaarch:DITAArchVersion="1.1" xmlns:ditaarch="http://dita.oasis-open.org/architecture/2005/" />"""
   367         root = self.guidiser.guidise(StringIO(xml_in))
   373         root = self.guidiser.guidise(StringIO(xml_in))
       
   374         print "****", etree.tostring(root)
   368         self.assertEqual(etree.tostring(root), xml_expected)
   375         self.assertEqual(etree.tostring(root), xml_expected)
   369         
   376         
   370 class Testupdate_files(unittest.TestCase):
   377 class Testupdate_files(unittest.TestCase):
   371     
   378     
   372     def setUp(self):
   379     def setUp(self):
   505         </cxxVariableDetail>
   512         </cxxVariableDetail>
   506     </cxxVariable>
   513     </cxxVariable>
   507 </cxxClass>"""
   514 </cxxClass>"""
   508 
   515 
   509 filesys_cxxclass_guidised = """<?xml version="1.0" encoding="UTF-8"?>
   516 filesys_cxxclass_guidised = """<?xml version="1.0" encoding="UTF-8"?>
   510 <!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd">
   517 <!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.6.0//EN" "dtd/cxxClass.dtd">
   511 <cxxClass id="GUID-83FD90ED-B2F7-3ED5-ABC5-83ED6A3F1C2F">
   518 <cxxClass id="GUID-83FD90ED-B2F7-3ED5-ABC5-83ED6A3F1C2F">
   512     <apiName>CActiveScheduler::TCleanupBundle</apiName>
   519     <apiName>CActiveScheduler::TCleanupBundle</apiName>
   513     <shortdesc />
   520     <shortdesc />
   514     <cxxClassDetail>
   521     <cxxClassDetail>
   515         <cxxClassDefinition>
   522         <cxxClassDefinition>