mpdot/linkcheck.py
changeset 2 932c358ece3e
equal deleted inserted replaced
1:82f11024044a 2:932c358ece3e
       
     1 # Copyright (c) 2007-2010 Nokia Corporation and/or its subsidiary(-ies) All rights reserved.
       
     2 # This component and the accompanying materials are made available under the terms of the License 
       
     3 # "Eclipse Public License v1.0" which accompanies this distribution, 
       
     4 # and is available at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     5 #
       
     6 # Initial Contributors:
       
     7 # Nokia Corporation - initial contribution.
       
     8 #
       
     9 # Contributors:
       
    10 #
       
    11 # Description:
       
    12 # Checks links in DITA XML and reports issues.
       
    13 """
       
    14 Created on 12 Feb 2010
       
    15 
       
    16 @author: p2ross
       
    17 
       
    18 Definitions
       
    19 ===========
       
    20 Doctype
       
    21 -------
       
    22 See: http://www.w3.org/TR/2008/REC-xml-20081126/#dt-root
       
    23 Note: this is sometimes called the Doctype because of http://www.w3.org/TR/2008/REC-xml-20081126/#vc-roottype
       
    24 
       
    25 ID
       
    26 --
       
    27 The value of the 'id' attribute of an element.
       
    28 
       
    29 Root ID
       
    30 -------
       
    31 The value of the 'id' attribute of the root element.
       
    32 Note: A development would allow differently named attributes provided that they
       
    33 were ID types. See http://www.w3.org/TR/2008/REC-xml-20081126/#sec-attribute-types
       
    34 for validity constraints for ID types.
       
    35 
       
    36 Reference
       
    37 ---------
       
    38 The value of the href attribute of an element.
       
    39 
       
    40 Map
       
    41 ---
       
    42 An XML file whose root element name is 'map' or ends with 'Map'.   
       
    43 
       
    44 Topic
       
    45 -----
       
    46 An XML file that is not a Map.
       
    47 
       
    48 Lonely topic
       
    49 ------------
       
    50 A topic whose root ID is not referenced by any map. 
       
    51 
       
    52 Lonely map
       
    53 ----------
       
    54 A map whose root ID is not referenced by any map. 
       
    55 
       
    56 Map Cycle
       
    57 ---------
       
    58 A sequence of map references whose members are not unique.
       
    59 
       
    60 """
       
    61 
       
    62 import os
       
    63 import unittest
       
    64 import sys
       
    65 import logging
       
    66 import pprint
       
    67 import fnmatch
       
    68 import re
       
    69 import urllib
       
    70 import time
       
    71 from optparse import OptionParser, check_choice
       
    72 try:
       
    73     from xml.etree import cElementTree as etree
       
    74 except ImportError:
       
    75     from xml.etree import ElementTree as etree
       
    76 import urlparse
       
    77 import multiprocessing
       
    78 # used for DitaFileObj persistence
       
    79 import shelve
       
    80 
       
    81 __version__ = '0.1.5'
       
    82 
       
    83 class ExceptionLinkCheck(Exception):
       
    84     pass
       
    85 
       
    86 class CountDict(dict):
       
    87     """Dictionary with a default value of 0 for unknown keys."""
       
    88     def __getitem__(self, key):
       
    89         if key not in self: 
       
    90             self[key] = 0
       
    91         return self.get(key)
       
    92 
       
    93 # Matches stuff like: GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E
       
    94 RE_GUID = re.compile(r'GUID-[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}', re.IGNORECASE)
       
    95 
       
    96 # Of the form {integer_error_code : (format_string, num_args), ...}
       
    97 PROBLEM_CODE_FORMAT = {
       
    98     # 'id_syntax'
       
    99     100 : ('Character \'#\' not allowed in id="%s"', 1),
       
   100     101 : ('NMTOKEN character \'%s\' not allowed in id="%s"', 2),
       
   101     102 : ('GUID specification does not match id="%s"', 1),
       
   102     # 'ref_syntax'
       
   103     200 : ('Multiple \'#\' not allowed in reference "%s"', 1),
       
   104     201 : ('Reference element "%s" is missing href=... attribute', 1),
       
   105     202 : ('URL has missing type/format in reference "%s"', 1),
       
   106     203 : ('GUID specification does not match file reference "%s"', 1),
       
   107     204 : ('GUID specification does not match fragment reference "%s"', 1),
       
   108     # 'ref'
       
   109     300 : ('Can not resolve URI "%s"', 1),
       
   110     # 'file'
       
   111     400 : ('Failed to open: "%s"', 1),
       
   112     401 : ('Multiple id="%s"', 1),
       
   113     402 : ('No id attribute on root element', 0),
       
   114     403 : ('Root ID in cycle: %s', 1),
       
   115     404 : ('Can not parse: "%s"', 1),
       
   116     410 : ('Can not resolve reference to file "%s"', 1),
       
   117     411 : ('Can resolve reference to file "%s" but not to fragment "%s"', 2),
       
   118     412 : ('Referencing element "%s" does not match target root element "%s"', 2),
       
   119     413 : ('Referencing element "%s" does not match target element "%s" for id="%s"', 3),
       
   120     414 : ('topicref element with format="ditamap" does not match target root element "%s"', 1),
       
   121     415 : ('topicref to <map> does not have format="ditamap" but format="%s"', 1),
       
   122     416 : ('topicref element type="%s" does not match target root element "%s"', 2),
       
   123     417 : ('topicref element type="%s" does not match target element "%s" for id="%s"', 3),
       
   124     418 : ('Unknown referencing element "%s" does not match target root element "%s"', 2),
       
   125     419 : ('Unknown referencing element "%s" does not match target element "%s" for id="%s"', 3),
       
   126     # 'file_set'
       
   127     500 : ('Not a directory: %s', 1),
       
   128     501 : ('Duplicate root id="%s" in files: %s', 2), 
       
   129     #502 : ('Can not resolve reference to "%s"', 1),
       
   130     #503 : ('Reference type "%s" does not match target type "%s" for id="%s"', 3),
       
   131     504 : ('Duplicate file path: "%s"', 1),
       
   132     505 : ('Duplicate id="%s" in files: %s', 2),
       
   133     # 'topic_set'
       
   134     600 : ('Topic id="%s" is not referenced by any map', 1), 
       
   135     # 'map_set'
       
   136     700 : ('More than one top level map exists: %s', 1),  
       
   137     701 : ('Maps "%s" are in a a cycle.', 1),
       
   138 }
       
   139 
       
   140 GENERIC_STRING = '...'
       
   141 PRINT_WIDTH = 75
       
   142 
       
   143 def genericStringForErrorCode(ec):
       
   144     assert(PROBLEM_CODE_FORMAT.has_key(ec))
       
   145     f, c = PROBLEM_CODE_FORMAT[ec]
       
   146     if c == 0:
       
   147         return f
       
   148     return f % ((GENERIC_STRING,) * c)
       
   149 
       
   150 def writeGenericStringsForErrorCodes(s=sys.stdout):
       
   151     s.write(' All Error Codes '.center(PRINT_WIDTH, '='))
       
   152     s.write('\n')
       
   153     s.write('%4s  %s\n' % ('Code', 'Error'))
       
   154     s.write('%4s  %s\n' % ('----', '-----'))
       
   155     ecS = PROBLEM_CODE_FORMAT.keys()
       
   156     ecS.sort()
       
   157     for ec in ecS:
       
   158         s.write('%4d  %s\n' % (ec, genericStringForErrorCode(ec)))
       
   159     s.write('='*PRINT_WIDTH)
       
   160     s.write('\n\n')
       
   161 
       
   162 def normalisePath(thePath):
       
   163     # TODO: How come this does not work?
       
   164     #return os.path.abspath(thePath)
       
   165     return os.path.abspath(thePath).replace('\\', '/')
       
   166 
       
   167 FNMATCH_PATTERNS = ['*.xml', '*.dita', '*.ditamap']
       
   168 FNMATCH_STRING = ' '.join(FNMATCH_PATTERNS)
       
   169 
       
   170 # These elements descend from topic/xref so can be treated as referencing elements
       
   171 XREF_DESCENDENTS = set(
       
   172     (
       
   173         # From the api specialisation
       
   174         'apiRelation',
       
   175         'apiBaseClassifier',
       
   176         'apiOtherClassifier',
       
   177         'apiOperationClassifier',
       
   178         'apiValueClassifier',
       
   179         # From the C++ specialisation
       
   180         'cxxfile',
       
   181         'cxxclass',
       
   182         'cxxstruct',
       
   183         'cxxunion',
       
   184         'cxxfunction',
       
   185         'cxxdefine',
       
   186         'cxxtypedef',
       
   187         'cxxvariable',
       
   188         'cxxenumeration',
       
   189         'cxxClassBaseClass',
       
   190         'cxxClassBaseStruct',
       
   191         'cxxClassBaseUnion',
       
   192         'cxxClassNestedClass',
       
   193         'cxxClassNestedStruct',
       
   194         'cxxClassNestedUnion',
       
   195         'cxxClassEnumerationInherited',
       
   196         'cxxClassEnumeratorInherited',
       
   197         'cxxClassFunctionInherited',
       
   198         'cxxClassVariableInherited',
       
   199         'cxxDefineReimplemented',
       
   200         'cxxEnumerationReimplemented',
       
   201         'cxxFunctionReimplemented',
       
   202         'cxxStructBaseClass',
       
   203         'cxxStructBaseStruct',
       
   204         'cxxStructBaseUnion',
       
   205         'cxxStructNestedClass',
       
   206         'cxxStructNestedStruct',
       
   207         'cxxStructNestedUnion',
       
   208         'cxxStructEnumerationInherited',
       
   209         'cxxStructEnumeratorInherited',
       
   210         'cxxStructFunctionInherited',
       
   211         'cxxStructVariableInherited',
       
   212         'cxxTypedefReimplemented',
       
   213         'cxxUnionBaseClass',
       
   214         'cxxUnionBaseStruct',
       
   215         'cxxUnionBaseUnion',
       
   216         'cxxUnionNestedClass',
       
   217         'cxxUnionNestedStruct',
       
   218         'cxxUnionNestedUnion',
       
   219         'cxxUnionEnumerationInherited',
       
   220         'cxxUnionFunctionInherited',
       
   221         'cxxUnionVariableInherited',
       
   222         'cxxVariableReimplemented',
       
   223     )
       
   224 )
       
   225 
       
   226 class UrlAccessCache(object):
       
   227     def __init__(self):
       
   228         # {URL : True/False, ...}
       
   229         self._cache = {}
       
   230         
       
   231     def clear(self):
       
   232         self._cache = {}
       
   233         
       
   234     def canAccess(self, theUrl):
       
   235         if not self._cache.has_key(theUrl):
       
   236             try:
       
   237                 u = urllib.urlopen(theUrl)#, data, proxies)
       
   238                 u.read()
       
   239                 self._cache[theUrl] = True
       
   240                 logging.debug('URL: %s  for %s' % (True, theUrl))
       
   241             except IOError:
       
   242                 self._cache[theUrl] = False
       
   243                 logging.debug('URL: %s for %s' % (False, theUrl))
       
   244         return self._cache[theUrl]
       
   245 
       
   246 GlobalUrlCache = UrlAccessCache()
       
   247  
       
   248 class DitaLinkCheckBase(object):
       
   249     """Base class that holds some common functionality."""
       
   250     def __init__(self, theIdentity):#=None):
       
   251         self.__identity = theIdentity
       
   252         # Set of error strings, lazily evaluated
       
   253         self._errS = None
       
   254     
       
   255     @property
       
   256     def identity(self):
       
   257         return self.__identity
       
   258     
       
   259     def __cmp__(self, other):
       
   260         assert(self.identity is not None)
       
   261         assert(other.identity is not None)
       
   262         return cmp(self.identity, other.identity)
       
   263 
       
   264     def __eq__(self, other):
       
   265         assert(self.identity is not None)
       
   266         assert(other.identity is not None)
       
   267         return self.identity == other.identity
       
   268 
       
   269     def __hash__(self):
       
   270         assert(self.identity is not None)
       
   271         return hash(self.identity)
       
   272     
       
   273     def __str__(self):
       
   274         return str(self.__identity)
       
   275 
       
   276     def debugDump(self, s=sys.stdout, prefix=''):
       
   277         """Dump of IR for debug purposes."""
       
   278         raise NotImplementedError
       
   279     
       
   280     def addError(self, errCode, argTuple):
       
   281         assert(errCode in PROBLEM_CODE_FORMAT.keys()), 'No error code: %s' % errCode
       
   282         assert(PROBLEM_CODE_FORMAT[errCode][1] == len(argTuple)), \
       
   283             'Length missmatch for error code %d: %d != %d for %s' \
       
   284             % (errCode, PROBLEM_CODE_FORMAT[errCode][1], len(argTuple), str(argTuple))
       
   285         if self._errS is None:
       
   286             self._errS = {}
       
   287         try:
       
   288             self._errS[errCode].add(argTuple)
       
   289         except KeyError:
       
   290             self._errS[errCode] = set((argTuple,))
       
   291 
       
   292     def errStrings(self, generic, theFilter):
       
   293         """Return a sorted list of error messages without duplicates."""
       
   294         if self._errS is not None:
       
   295             mySet = set()
       
   296             for ec in self._errS.keys():
       
   297                 if theFilter is None or ec in theFilter:
       
   298                     assert(ec in PROBLEM_CODE_FORMAT.keys())
       
   299                     for tu in self._errS[ec]:
       
   300                         if generic:
       
   301                             mySet.add(genericStringForErrorCode(ec))
       
   302                         else:
       
   303                             f, c = PROBLEM_CODE_FORMAT[ec]
       
   304                             assert(len(tu) == c)
       
   305                             mySet.add(f % tu)
       
   306             l = list(mySet)
       
   307             l.sort()
       
   308             return l
       
   309         return []
       
   310     
       
   311     def updateErrorCount(self, theMap):
       
   312         """Updates a map of {error_code, : count, ...}.
       
   313         Overridden for file and file set."""
       
   314         if self._errS is not None:
       
   315             for e in self._errS.keys():
       
   316                 theMap[e] += len(self._errS[e])
       
   317     
       
   318     def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout):
       
   319         """Can be overridden in child classes to recurse into
       
   320         their data structures."""
       
   321         theStream.write('\n'.join(self.errStrings(isGeneric, theFilter)))
       
   322     
       
   323 class DitaId(DitaLinkCheckBase):
       
   324     """Represents a node with an id."""
       
   325     def __init__(self, theN):
       
   326         assert(theN.get('id', None) is not None)
       
   327         super(DitaId, self).__init__(theN.get('id', None))
       
   328         self._elem = theN.tag
       
   329         if '#' in self.id:
       
   330             self.addError(100, (self.id,))
       
   331         # TODO: NMTOKENS
       
   332     
       
   333     @property
       
   334     def elem(self):
       
   335         return self._elem
       
   336 
       
   337     @property
       
   338     def id(self):
       
   339         return self.identity
       
   340 
       
   341     def checkGuid(self):
       
   342         """optionally applies additional checks for GUID requirements."""
       
   343         if RE_GUID.match(self.id) is None:
       
   344             self.addError(102, (self.id,))
       
   345 
       
   346     def debugDump(self, s=sys.stdout, prefix=''):
       
   347         """Dump of IR for debug purposes."""
       
   348         s.write('%sID:  <%s id="%s" />\n' % (prefix, self.elem, self.id))
       
   349         
       
   350 class DitaRef(DitaLinkCheckBase):
       
   351     """Represents a reference node."""
       
   352     def __init__(self, theN):
       
   353         self._elem = theN.tag
       
   354         self._href = theN.get('href', None)
       
   355         super(DitaRef, self).__init__('%s %s' % (self._elem, self._href))
       
   356         # This is used when figuring out of the target is the correct element
       
   357         # e.g. in Vanilla DITA
       
   358         # <topicref href="batcaring.dita" type="task"></topicref>
       
   359         self._refType = theN.get('type', None)
       
   360         # Format attribute, this can be format="ditamap"
       
   361         self._format = theN.get('format', None)
       
   362         if self._href is None:
       
   363             self.addError(201, (self._elem,))
       
   364             self._url = None
       
   365         else:
       
   366             self._url = urlparse.urlparse(self._href)
       
   367             if '#' in self._url.fragment:
       
   368                 self.addError(200, (self._href,))
       
   369 
       
   370     @property
       
   371     def elem(self):
       
   372         return self._elem
       
   373 
       
   374     @property
       
   375     def href(self):
       
   376         """The value of the href attribute."""
       
   377         return self._href
       
   378     
       
   379     @property
       
   380     def refType(self):
       
   381         """The value of the type attribute."""
       
   382         return self._refType
       
   383     
       
   384     @property
       
   385     def format(self):
       
   386         """The value of the format attribute."""
       
   387         return self._format
       
   388     
       
   389     @property
       
   390     def path(self):
       
   391         """The value of the path part of the href attribute."""
       
   392         return self._url.path
       
   393         
       
   394     @property
       
   395     def fragment(self):
       
   396         """The value of the fragment part of the href attribute."""
       
   397         return self._url.fragment
       
   398         
       
   399     @property
       
   400     def scheme(self):
       
   401         """The URI scheme e.g. 'http' or '' if no scheme."""
       
   402         return self._url.scheme
       
   403     
       
   404     def fileFragment(self, theRefFile):                               
       
   405         """The absolute path of the file and the fragment identifier or (None, None)."""
       
   406         if self.scheme not in ('', 'file'):
       
   407             return (None, None)
       
   408         if len(self.path) == 0:
       
   409             myPath = theRefFile
       
   410         else:
       
   411             myPath = os.path.join(os.path.dirname(theRefFile), self.path)
       
   412         return normalisePath(myPath), self.fragment
       
   413     
       
   414     def checkGuid(self):
       
   415         """optionally applies additional checks for GUID requirements."""
       
   416         if RE_GUID.match(self.path) is None:
       
   417             self.addError(203, (self.path,))
       
   418         if RE_GUID.match(self.fragment) is None:
       
   419             self.addError(204, (self.fragment,))                
       
   420 
       
   421     def checkUrl(self):
       
   422         if self.scheme:
       
   423             myU = urlparse.urlunparse(self._url)
       
   424             if not GlobalUrlCache.canAccess(myU):
       
   425                 self.addError(300, (myU,))
       
   426 
       
   427     def debugDump(self, s=sys.stdout, prefix=''):
       
   428         """Dump of IR for debug purposes."""
       
   429         s.write('%sREF: <%s href="%s" />\n' % (prefix, self.elem, self._href))
       
   430 
       
   431 class DitaFileObj(DitaLinkCheckBase):
       
   432     """Base class for a DITA topic or map."""
       
   433     def __init__(self, theFileObj, theFileName=None):
       
   434         """Initialiser with a file object and a file path"""
       
   435         #print '\nDitaFileObj(%s, %s)' % (theFileObj, theFileName)
       
   436         if theFileName is not None:
       
   437             super(DitaFileObj, self).__init__(normalisePath(theFileName))
       
   438         elif theFileObj is not None:
       
   439             super(DitaFileObj, self).__init__(theFileObj.name)
       
   440         else:
       
   441             super(DitaFileObj, self).__init__(None)
       
   442         self._rootId = None
       
   443         self._doctype = None
       
   444         # Sets of class DitaId
       
   445         self._idS = set()
       
   446         self._dupeIdS = set()
       
   447         # Set of class DitaRef
       
   448         self._xrefS = set()
       
   449         # Ouptut control
       
   450         self._hasWritten = False
       
   451         # Size of input
       
   452         try:
       
   453             self._bytes = os.path.getsize(theFileName)
       
   454         except Exception:
       
   455             # Try as if a StringIO
       
   456             try:
       
   457                 self._bytes = theFileObj.len
       
   458             except AttributeError:
       
   459                 # Give up
       
   460                 self._bytes = 0
       
   461         # Process the file object
       
   462         if theFileObj is not None:
       
   463             try:
       
   464                 # TODO: use iterparse?
       
   465                 theTree = etree.parse(theFileObj)
       
   466             except SyntaxError, err:
       
   467                 self.addError(404, (str(err),))
       
   468             else:
       
   469                 # Walk the tree
       
   470                 for i, e in enumerate(theTree.getiterator()):
       
   471                     #print 'TRACE: e', e
       
   472                     # Element [0] is the root element
       
   473                     if i == 0:
       
   474                         assert(self._rootId is None)
       
   475                         assert(self._doctype is None)
       
   476                         self._doctype = e.tag
       
   477                         if e.get('id', None) is not None:
       
   478                             self._rootId = DitaId(e)
       
   479                             self._addId(self._rootId)
       
   480                         else:
       
   481                             self.addError(402, ())
       
   482                     else:
       
   483                         # NOTE: Elements with id attributes can also have href
       
   484                         # attributes. For example a <topicref> in a <bookmap>
       
   485                         # Thus these tests are not exclusive
       
   486                         if e.get('id', None) is not None:
       
   487                             self._addId(DitaId(e))
       
   488                         if e.get('href', None) is not None:
       
   489                             # TODO: Do we limit ourselves to only a certain set of elements?
       
   490                             self._xrefS.add(DitaRef(e))
       
   491         else:
       
   492             self.addError(400, (self.identity,))
       
   493     
       
   494     def _addId(self, theId):
       
   495         #print 'TRACE: adding %s' % theId
       
   496         #print 'TRACE: self._idS %s' % self._idS
       
   497         if theId in self._idS:
       
   498             # Remove from self._idS
       
   499             #print 'TRACE: removing %s' % theId
       
   500             self._idS.remove(theId)
       
   501             self._dupeIdS.add(theId)
       
   502             self.addError(401, (theId.identity,))
       
   503         elif theId not in self._dupeIdS:
       
   504             self._idS.add(theId)
       
   505     
       
   506     @property
       
   507     def bytes(self):
       
   508         return self._bytes
       
   509     
       
   510     @property
       
   511     def doctype(self):
       
   512         return self._doctype
       
   513     
       
   514     @property
       
   515     def rootId(self):
       
   516         if self._rootId is not None:
       
   517             return self._rootId.id
       
   518     
       
   519     @property
       
   520     def isMap(self):
       
   521         return self.doctype == "map" \
       
   522         or self.doctype == 'bookmap' \
       
   523         or (self.doctype is not None and self.doctype.endswith('Map'))
       
   524     
       
   525     @property
       
   526     def idS(self):
       
   527         """The set of IDs."""
       
   528         return self._idS
       
   529     
       
   530     @property
       
   531     def refS(self):
       
   532         """The set of DitaRef objects."""
       
   533         return self._xrefS
       
   534     
       
   535     def idElemMap(self):
       
   536         """Returns a map {id : elem name, ...}."""
       
   537         retVal = {}
       
   538         for anId in self._idS:
       
   539             retVal[anId.id] = anId.elem
       
   540         return retVal
       
   541     
       
   542     def hasId(self, theString):
       
   543         for anId in self._idS:
       
   544             if theString == anId.id:
       
   545                 return True
       
   546         return False
       
   547 
       
   548     def idElem(self, theString):
       
   549         for anId in self._idS:
       
   550             if theString == anId.id:
       
   551                 return anId.elem
       
   552         return None
       
   553 
       
   554     def idObj(self, theString):
       
   555         for anId in self._idS:
       
   556             if theString == anId.id:
       
   557                 return anId
       
   558         return None
       
   559 
       
   560     def updateErrorCount(self, theMap):
       
   561         """Updates a map of {error_code, : count, ...}."""
       
   562         if self._errS is not None:
       
   563             for e in self._errS.keys():
       
   564                 theMap[e] += len(self._errS[e])
       
   565         for idObj in self.idS:
       
   566             idObj.updateErrorCount(theMap)
       
   567         for refObj in self.refS:
       
   568             refObj.updateErrorCount(theMap)
       
   569     
       
   570     def writeErrorList(self, theList, theSubHead='', theS=sys.stdout):
       
   571         if len(theList) > 0:
       
   572             theList.sort()
       
   573             if not self._hasWritten:
       
   574                 theS.write('File: %s\n' % self.identity)
       
   575             self._hasWritten = True
       
   576             if len(theSubHead) > 0:
       
   577                 theS.write('%s [%d]:\n' % (theSubHead, len(theList)))
       
   578             theS.write('\n'.join(theList))
       
   579             theS.write('\n')
       
   580     
       
   581     def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout):
       
   582         """Writes out errors for me, my IDs and my Refs."""
       
   583         self._hasWritten = False
       
   584         self.writeErrorList(self.errStrings(isGeneric, theFilter), 'File errors:', theStream)
       
   585 #===============================================================================
       
   586 #        # Duplicate IDs
       
   587 #        myList = (list(self._dupeIdS))
       
   588 #        if len(myList):
       
   589 #            self.writeErrorList(
       
   590 #                    [i.identity for i in myList],
       
   591 #                    'Duplicate ID',
       
   592 #                    theStream)
       
   593 #===============================================================================
       
   594         # Now IDs
       
   595         myList = (list(self.idS))
       
   596         myList.sort()
       
   597         for anId in myList:
       
   598             self.writeErrorList(anId.errStrings(isGeneric, theFilter), 'ID=%s' % anId.identity, theStream)
       
   599         # Now Refs
       
   600         myList = (list(self._xrefS))
       
   601         myList.sort()
       
   602         for anId in myList:
       
   603             self.writeErrorList(anId.errStrings(isGeneric, theFilter), 'Ref=%s' % anId.identity, theStream)
       
   604         if self._hasWritten:
       
   605             theStream.write('\n')
       
   606     
       
   607     def debugDump(self, s=sys.stdout, prefix=''):
       
   608         """Dump of IR for debug purposes."""
       
   609         s.write('%sFile: %s\n' % (prefix, self.identity))
       
   610         for anId in self._idS:
       
   611             anId.debugDump(s, prefix=prefix+'  ')
       
   612         for aRef in self._xrefS:
       
   613             aRef.debugDump(s, prefix=prefix+'  ')
       
   614     
       
   615 class DitaFilePath(DitaFileObj):
       
   616     """Base class for a DITA topic or map from the file system."""
       
   617     def __init__(self, theFilePath):
       
   618         """Initialiser with a file path"""
       
   619         try:
       
   620             f = open(theFilePath)
       
   621         except IOError:
       
   622             f = None
       
   623         #print 'DitaFilePath(%s)' % theFilePath
       
   624         super(DitaFilePath, self).__init__(f, theFilePath)
       
   625         if f is None:
       
   626             self.addError(400, (theFilePath,))
       
   627             
       
   628             
       
   629 class DitaFileMapBase(object):
       
   630     """Base class for holding a map of {file path : class DitaFile, ...}
       
   631     Actual implementation can be in-memory or via a database e.g. the
       
   632     shelve module."""
       
   633     def keys(self):
       
   634         """Returns an unsorted list of keys in the map."""
       
   635         raise NotImplementedError()
       
   636     
       
   637     def has_key(self, thePath):
       
   638         """Return True if the key exists."""
       
   639         raise NotImplementedError()
       
   640     
       
   641     def remove(self, thePath):
       
   642         """Remove the entry corresponding to thePath, may raise KeyError."""
       
   643         raise NotImplementedError()
       
   644     
       
   645     def getDitaFileObj(self, thePath):
       
   646         """Return a DitaFileObj that corresponds to thePath, may raise KeyError."""
       
   647         raise NotImplementedError()
       
   648         
       
   649     def setDitaFileObj(self, thePath, theObj):
       
   650         """Load a DitaFileObj or update a mutated DitaFileObj."""
       
   651         raise NotImplementedError()
       
   652         
       
   653 class DitaFileMapInMemory(DitaFileMapBase):
       
   654     """Holds map of {file path : class DitaFile, ...} in memory."""
       
   655     def __init__(self):
       
   656         # Map of {file path : class DitaFile, ...}
       
   657         self._fileMap = {}
       
   658     
       
   659     def keys(self):
       
   660         """Returns an unsorted list of keys in the map."""
       
   661         return self._fileMap.keys()
       
   662     
       
   663     def has_key(self, thePath):
       
   664         """Return True if the key exists."""
       
   665         return self._fileMap.has_key(thePath)
       
   666         
       
   667     def remove(self, thePath):
       
   668         """Remove the entry corresponding to thePath, may raise KeyError."""
       
   669         del self._fileMap[thePath]
       
   670     
       
   671     def getDitaFileObj(self, thePath):
       
   672         """Return a DitaFileObj that corresponds to thePath, may raise KeyError."""
       
   673         return self._fileMap[thePath]
       
   674         
       
   675     def setDitaFileObj(self, thePath, theObj):
       
   676         """Load a DitaFileObj or update a mutated DitaFileObj."""
       
   677         self._fileMap[thePath] = theObj
       
   678         
       
   679 class DitaFileMapShelve(DitaFileMapBase):
       
   680     """Holds map of {file path : class DitaFile, ...} in a shelve database."""
       
   681     DBASE_FILENAME = 'linkchecker.dbase'
       
   682     def __init__(self):
       
   683         if os.path.exists(self.DBASE_FILENAME):
       
   684             os.remove(self.DBASE_FILENAME)
       
   685         self._db = shelve.open(self.DBASE_FILENAME)
       
   686         # Use this as a 'cache' as shelf.keys() is slow
       
   687         self._keys = set()
       
   688     
       
   689     def keys(self):
       
   690         """Returns an unsorted list of keys in the map."""
       
   691         return list(self._keys)
       
   692     
       
   693     def has_key(self, thePath):
       
   694         """Return True if the key exists."""
       
   695         return thePath in self._keys
       
   696         
       
   697     def remove(self, thePath):
       
   698         """Remove the entry corresponding to thePath, may raise KeyError."""
       
   699         del self._db[thePath]
       
   700         self._keys.remove(thePath)
       
   701     
       
   702     def getDitaFileObj(self, thePath):
       
   703         """Return a DitaFileObj that corresponds to thePath, may raise KeyError."""
       
   704         return self._db[thePath]
       
   705         
       
   706     def setDitaFileObj(self, thePath, theObj):
       
   707         """Load a DitaFileObj or update a mutated DitaFileObj."""
       
   708         self._db[thePath] = theObj
       
   709         self._keys.add(thePath)
       
   710         
       
   711 class DitaFileSet(DitaLinkCheckBase):
       
   712     """Holds information about a set of DITA files."""
       
   713     STATS_KEYS = ('Maps', 'Non-maps', 'Files', 'Bytes', 'IDs', 'Refs')
       
   714     def __init__(self,
       
   715                  theDir,
       
   716                  procDir=True,
       
   717                  thePatterns=None,
       
   718                  recursive=False,
       
   719                  testExt=False,
       
   720                  useDbase=False):
       
   721         """Constructor. theDir is the root directory of DITA XML.
       
   722         procDir - If True then process this directory immediately, otherwise
       
   723                     the directory can be processed independently and
       
   724                     _addFileObj() or _addDitaFileObj() invoked.
       
   725         thePatterns - If supplied this should be a space separated string of
       
   726                         fnmatch extensions.
       
   727         recursive - If True and procDir True the directory is processed recursively.
       
   728         testExt - If True then test external URLs.
       
   729         useDbase - If True then store all DitaFile objects in an external dbase
       
   730                     (slower but less memory issues).
       
   731         """
       
   732         if thePatterns is None:
       
   733             thePatterns = FNMATCH_STRING.split(' ')
       
   734         if theDir is not None:
       
   735             theDir = normalisePath(theDir)
       
   736         super(DitaFileSet, self).__init__(theDir)
       
   737         logging.info('DitaFileSet starting to read...')
       
   738         GlobalUrlCache.clear()
       
   739         self._testExt = testExt
       
   740         # Set up how we store the DitaFile objects
       
   741         if useDbase:
       
   742             self._fileMap = DitaFileMapShelve()
       
   743         else:
       
   744             self._fileMap = DitaFileMapInMemory()
       
   745         # Map of (str(rootId) : filepath, ...) with no duplicates
       
   746         # Keys will be in self._uniqueRootIds
       
   747         self._rootIdToFilePathMap = {}
       
   748         # Path to the unique DITA map
       
   749         self._uniqueMapPath = None
       
   750         # Count of {error_code : count, ...}
       
   751         self._errCountMap = CountDict()
       
   752         # Statistics
       
   753         self._statsMap = CountDict()
       
   754         ## and initialise
       
   755         #for k in self.STATS_KEYS:
       
   756         #    self._statsMap[k]
       
   757         # Finalisation control (weak)
       
   758         self._hasFinalised = False
       
   759         # Timers
       
   760         self._timeRead = time.clock()
       
   761         self._timeAnalyse = 0.0
       
   762         if procDir:
       
   763             if theDir is not None and os.path.isdir(theDir):
       
   764                 self._readDir(theDir, thePatterns, recursive)
       
   765             else:
       
   766                 self.addError(500, (theDir,))
       
   767             # Finalise and run all the tests
       
   768             self.finalise()
       
   769     
       
   770     @property
       
   771     def errCountMap(self):
       
   772         return self._errCountMap
       
   773     
       
   774     @property
       
   775     def statsMap(self):
       
   776         return self._statsMap
       
   777     
       
   778     def writeStatistics(self, s=sys.stdout):
       
   779         """Writes out read statistics."""
       
   780         s.write(' Statistics '.center(PRINT_WIDTH, '='))
       
   781         s.write('\n')
       
   782         if len(self._statsMap) > 0:
       
   783             o = self.STATS_KEYS
       
   784             #assert(set(o) == set(self._statsMap.keys())), \
       
   785             #    '%s != %s' % (o, self._statsMap.keys())
       
   786             for k in o:
       
   787                 try:
       
   788                     m = self._statsMap[k] / (1024.0*1024.0)
       
   789                     s.write('%20s: %10d [%10.3f M]\n' % (k, self._statsMap[k], m))
       
   790                 except KeyError:
       
   791                     s.write('%20s: %10s \n' % (k, 'Not seen'))
       
   792             s.write('%20s: %10.3f (s)\n' % ('Read time', self._timeRead))
       
   793             s.write('%20s: %10.3f (s)\n' % ('Analysis time', self._timeAnalyse))
       
   794             s.write('='*PRINT_WIDTH)
       
   795         else:
       
   796             s.write('Nothing processed.')
       
   797         s.write('\n')
       
   798         
       
   799     def writeErrorSummary(self, s=sys.stdout):
       
   800         s.write(' Error Summary '.center(PRINT_WIDTH, '='))
       
   801         s.write('\n')
       
   802         if len(self._errCountMap):
       
   803             s.write('%4s %10s %s\n' % ('Code', 'Count', 'Error'))
       
   804             s.write('%4s %10s %s\n' % ('----', '-----', '-----'))
       
   805             errCodeS = self._errCountMap.keys()
       
   806             errCodeS.sort()
       
   807             for c in errCodeS:
       
   808                 s.write('%4d %10d %s\n' \
       
   809                         % (c, self._errCountMap[c], genericStringForErrorCode(c)))
       
   810         else:
       
   811             s.write('No errors\n')            
       
   812         s.write('='*PRINT_WIDTH)
       
   813         s.write('\n')
       
   814         
       
   815     def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout):
       
   816         """Writes out errors for me and my files."""
       
   817         theStream.write('\n'.join(self.errStrings(isGeneric, theFilter)))
       
   818         fileS = self._fileMap.keys()
       
   819         fileS.sort()
       
   820         for aFile in fileS:
       
   821             # Immutable call so just use get
       
   822             self._fileMap.getDitaFileObj(aFile).writeErrors(isGeneric, theFilter, theStream)
       
   823         
       
   824     def allErrStrings(self, isGeneric, theFilter):
       
   825         """Return a sorted list of error messages without duplicates including
       
   826         files."""
       
   827         retSet = set(self.errStrings(isGeneric, theFilter))
       
   828         fileS = self._fileMap.keys()
       
   829         fileS.sort()
       
   830         for aFilePath in self._fileMap.keys():
       
   831             # Immutable call so just use get
       
   832             for anErr in self._fileMap.getDitaFileObj(aFilePath).errStrings(isGeneric, theFilter): 
       
   833                 retSet.add(anErr)
       
   834         retList = list(retSet)
       
   835         retList.sort()
       
   836         return retList
       
   837             
       
   838     def _readDir(self, theDir, thePatS, recursive):    
       
   839         assert(os.path.isdir(theDir))
       
   840         for aName in os.listdir(theDir):
       
   841             aPath = os.path.join(theDir, aName)
       
   842             if os.path.isdir(aPath) and recursive:
       
   843                 self._readDir(aPath, thePatS, recursive)
       
   844             elif os.path.isfile(aPath):
       
   845                 for aPat in thePatS:
       
   846                     if fnmatch.fnmatch(aName, aPat):
       
   847                         assert(not self._fileMap.has_key(aPath))
       
   848                         logging.debug(' Reading %s' % aPath)
       
   849                         try:
       
   850                             f = open(aPath)
       
   851                         except IOError:
       
   852                             f = None
       
   853                         self._addFileObj(f, aPath)
       
   854                         break
       
   855 
       
   856     def _addFileObj(self, theFileObj, theFilePath):
       
   857         myObj = DitaFileObj(theFileObj, theFilePath)
       
   858         self._addDitaFileObj(myObj)
       
   859 
       
   860     def _addDitaFileObj(self, theDitaFileObj):
       
   861         if self._fileMap.has_key(theDitaFileObj.identity):
       
   862             self.addError(504, (theDitaFileObj.identity,))
       
   863         else:
       
   864             # Mutable call so use set
       
   865             self._fileMap.setDitaFileObj(theDitaFileObj.identity, theDitaFileObj)
       
   866         # Update statistics (files, bytes, ids, refs) etc.
       
   867         self._statsMap['Files'] += 1
       
   868         self._statsMap['Bytes'] += theDitaFileObj.bytes
       
   869         self._statsMap['IDs'] += len(theDitaFileObj.idS)
       
   870         self._statsMap['Refs'] += len(theDitaFileObj.refS)
       
   871         if theDitaFileObj.isMap:
       
   872             self._statsMap['Maps'] += 1
       
   873         else:
       
   874             self._statsMap['Non-maps'] += 1
       
   875     
       
   876     def finalise(self):
       
   877         """Creates the environment for all checks and then runs them."""
       
   878         logging.info('DitaFileSet.finalise() start...')
       
   879         if not self._hasFinalised:
       
   880             self._timeRead = time.clock() - self._timeRead
       
   881             self._timeAnalyse = time.clock()
       
   882             self._initRootIdToFilePathMap()
       
   883             self._checkDupeIdS()
       
   884             self._setMapCycles()
       
   885             self._checkLonely()
       
   886             self._checkRefArcs()
       
   887             self._errCountMap = CountDict()
       
   888             self.updateErrorCount(self._errCountMap)
       
   889             self._hasFinalised = True
       
   890             self._timeAnalyse = time.clock() - self._timeAnalyse
       
   891         logging.info('DitaFileSet.finalise() done.')
       
   892         
       
   893     def _initRootIdToFilePathMap(self):
       
   894         # Map of (str(rootId) : filepath, ...) with no duplicates
       
   895         self._rootIdToFilePathMap = {}
       
   896         # Temporary map of (str(rootId) : [filepath, ...], ...)
       
   897         myDupeIdFiles = {}
       
   898         for fPath in self._fileMap.keys():
       
   899             # fObj is not written to so we don't need to use set
       
   900             fObj = self._fileMap.getDitaFileObj(fPath)
       
   901             #print 'TRACE: _initRootIdToFilePathMap() fPath:', fPath
       
   902             rId = fObj.rootId
       
   903             if rId is not None:
       
   904                 if myDupeIdFiles.has_key(rId):
       
   905                     #print 'TRACE: _initRootIdToFilePathMap() another dupe:', fPath
       
   906                     myDupeIdFiles[rId].append(fObj.identity)
       
   907                 elif self._rootIdToFilePathMap.has_key(rId):
       
   908                     #print 'TRACE: _initRootIdToFilePathMap() first dupe:', fPath
       
   909                     # Remove from map and add to myDupeIdFiles
       
   910                     myFile = self._rootIdToFilePathMap.pop(rId)
       
   911                     try:
       
   912                         myDupeIdFiles[rId].append(myFile)
       
   913                     except KeyError:
       
   914                         myDupeIdFiles[rId] = [myFile,]
       
   915                     myDupeIdFiles[rId].append(fPath)
       
   916                 else:
       
   917                     #print 'TRACE: _initRootIdToFilePathMap() adding:', fPath
       
   918                     self._rootIdToFilePathMap[rId] = fObj.identity
       
   919         # Set duplicate errors
       
   920         for k in myDupeIdFiles.keys():
       
   921             myDupeIdFiles[k].sort()
       
   922             self.addError(501, (k, tuple(myDupeIdFiles[k])))
       
   923             #self.addError(501, (k, str([str(a) for a in myDupeIdFiles[k]])))
       
   924     
       
   925     def _checkDupeIdS(self):
       
   926         """Checks if there are any duplicate IDs anywhere."""
       
   927         # {ID : [fileS, ...], ...}
       
   928         myDupeIdMap = {}
       
   929         # Temporary data structure
       
   930         # {ID : first file ID is seen in, ...}
       
   931         seenIdMap = {}
       
   932         for f in self._fileMap.keys():
       
   933             # o is not written to so we don't need set...
       
   934             o = self._fileMap.getDitaFileObj(f)
       
   935             for anId in o.idS:
       
   936                 if seenIdMap.has_key(anId):
       
   937                     try:
       
   938                         myDupeIdMap[anId].append(f)
       
   939                     except KeyError:
       
   940                         myDupeIdMap[anId] = [seenIdMap[anId],]
       
   941                         myDupeIdMap[anId].append(f)
       
   942                 else:
       
   943                     seenIdMap[anId] = f
       
   944         # Now add to errs as a 505 error message
       
   945         # Sort the files in the map
       
   946         for k in myDupeIdMap.keys():
       
   947             myDupeIdMap[k].sort()
       
   948             self.addError(505, (k, tuple(myDupeIdMap[k])))
       
   949             #self.addError(505, (k, str([str(a) for a in myDupeIdMap[k]])))
       
   950                     
       
   951     def _retMapAdjList(self):
       
   952         """Create an adjacency list {file_path : set(refs), ...} (all strings)"""
       
   953         adjList = {}
       
   954         for f in self._fileMap.keys():
       
   955             fObj = self._fileMap.getDitaFileObj(f)
       
   956             if fObj.isMap:# and fObj.rootId is not None:
       
   957                 assert(fObj.identity not in adjList.keys())
       
   958                 refSet = set()
       
   959                 for r in fObj.refS:
       
   960                     refSet.add(r.fileFragment(fObj.identity)[0])
       
   961                 adjList[fObj.identity] = refSet
       
   962         return adjList
       
   963 
       
   964     def _setMapCycles(self):
       
   965         """Sets any cyclic references seen in DITA maps."""
       
   966         adjList = self._retMapAdjList()
       
   967         # A branch
       
   968         myBr = []
       
   969         myCycles = set()
       
   970         for aPath, aSet in adjList.items():
       
   971             myBr.append(aPath)
       
   972             self._recurseCycles(adjList, myBr, myCycles)
       
   973             myBr.pop()
       
   974         self._setCycleErrors(myCycles)      
       
   975             
       
   976     def _recurseCycles(self, a, b, c):
       
   977         assert(len(b) > 0)
       
   978         try:
       
   979             myPath = b[-1]
       
   980             for r in a[myPath]:
       
   981                 #print '_recurseCycles() testing r', r
       
   982                 #print '_recurseCycles() testing b', b
       
   983                 if r in b:
       
   984                     #print 'Adding cycle', tuple(b[b.index(r):])
       
   985                     c.add(tuple(b[b.index(r):]))
       
   986                 else:
       
   987                     b.append(r)
       
   988                     self._recurseCycles(a, b, c)
       
   989                     b.pop()
       
   990         except KeyError:
       
   991             pass
       
   992         
       
   993     def _setCycleErrors(self, theC):
       
   994         for aT in theC:
       
   995             self.addError(701, (str(aT),))
       
   996             myL = list(aT)
       
   997             assert(len(myL) > 0)
       
   998             i = 0
       
   999             while i < len(myL):
       
  1000                 myL.append(myL[0])
       
  1001                 # Should this be in the file thus, or in the files set?
       
  1002                 # As we are mutating the file object we need to use both
       
  1003                 # getDitaFileObj() and setDitaFileObj()
       
  1004                 fObj = self._fileMap.getDitaFileObj(myL[0])
       
  1005                 fObj.addError(701, (str(myL),))
       
  1006                 self._fileMap.setDitaFileObj(myL[0], fObj)
       
  1007                 myL.pop()
       
  1008                 myL.append(myL.pop(0))
       
  1009                 i += 1    
       
  1010 
       
  1011     def _checkLonely(self):
       
  1012         self._checkLonelyMaps()
       
  1013         self._checkLonelyTopics()
       
  1014         
       
  1015     def _checkLonelyMaps(self):
       
  1016         """Checks for lonely maps."""
       
  1017         mapPathSet = set()
       
  1018         pathSetRemain = set()
       
  1019         for f in self._fileMap.keys():
       
  1020             if self._fileMap.getDitaFileObj(f).isMap:
       
  1021                 mapPathSet.add(f)
       
  1022                 pathSetRemain.add(f)
       
  1023         for aPath in mapPathSet:
       
  1024             myMapObj = self._fileMap.getDitaFileObj(aPath)
       
  1025             for r in myMapObj.refS:
       
  1026                 refFile, frag = r.fileFragment(f)
       
  1027                 try:
       
  1028                     pathSetRemain.remove(refFile)
       
  1029                 except KeyError:
       
  1030                     # refFile is a topic or an already seen map
       
  1031                     pass
       
  1032         if len(pathSetRemain) > 1:
       
  1033             for aPath in pathSetRemain:
       
  1034                 self.addError(700, (aPath,))
       
  1035         elif len(pathSetRemain) == 1:
       
  1036             self._uniqueMapPath = pathSetRemain.pop()
       
  1037 
       
  1038     def _checkLonelyTopics(self):
       
  1039         """Checks for topics that are not referenced by any map."""
       
  1040         mapPathSet = set()
       
  1041         pathSetRemain = set()
       
  1042         for f in self._fileMap.keys():
       
  1043             #print 'TRACE: f:', f
       
  1044             if self._fileMap.getDitaFileObj(f).isMap:
       
  1045                 mapPathSet.add(f)
       
  1046             else:
       
  1047                 pathSetRemain.add(f)
       
  1048         #print 'TRACE: mapPathSet', mapPathSet
       
  1049         #print 'TRACE: pathSetRemain', pathSetRemain
       
  1050         for aMapPath in mapPathSet:
       
  1051             myMapObj = self._fileMap.getDitaFileObj(aMapPath)
       
  1052             for r in myMapObj.refS:
       
  1053                 refFile, frag = r.fileFragment(aMapPath)
       
  1054                 #print 'TRACE: removing:', refFile
       
  1055                 try:
       
  1056                     pathSetRemain.remove(refFile)
       
  1057                 except KeyError:
       
  1058                     # topic has already been seen in another map
       
  1059                     pass
       
  1060         if len(pathSetRemain) > 0:
       
  1061             for aPath in pathSetRemain:
       
  1062                 self.addError(600, (aPath,))
       
  1063             
       
  1064     def _checkRefArcs(self):
       
  1065         """Checks all references are reachable."""
       
  1066         for fPath in self._fileMap.keys():
       
  1067             fObjSrc = self._fileMap.getDitaFileObj(fPath)
       
  1068             hasMutated = False
       
  1069             for rObjSrc in fObjSrc.refS:
       
  1070                 if rObjSrc.scheme:
       
  1071                     # Decide whether to test and external URL
       
  1072                     if self._testExt:
       
  1073                         rObjSrc.checkUrl()
       
  1074                 else:
       
  1075                     fi, fr = rObjSrc.fileFragment(fPath)
       
  1076                     assert(fi is not None), 'fi is None for rObjSrc: %s in file: %s' % (rObjSrc, fPath)
       
  1077                     assert(fr is not None), 'fr is None for rObjSrc: %s in file: %s' % (rObjSrc, fPath)
       
  1078                     ## If a url then fileFragment() returns (None, None)
       
  1079                     #if fi is None:
       
  1080                     #    print 'fPath', fPath
       
  1081                     #    print 'rObjSrc', rObjSrc
       
  1082                     #    print 'fi', fi
       
  1083                     #    print 'fr', fr
       
  1084                     try:
       
  1085                         fObjTgt = self._fileMap.getDitaFileObj(fi)
       
  1086                     except KeyError:
       
  1087                         # Target file can not be found in the IR
       
  1088                         # check the file system to see if it is a non-DITA resource
       
  1089                         if not os.path.isfile(fi):
       
  1090                             #print 'TRACE: adding 410 to', fObj.identity
       
  1091                             fObjSrc.addError(410, (fi,))
       
  1092                             hasMutated = True
       
  1093                     else:
       
  1094                         if len(fr) > 0:
       
  1095                             # Target file is found, test fragment
       
  1096                             if not fObjTgt.hasId(fr):
       
  1097                                 # Fragment not found
       
  1098                                 fObjSrc.addError(411, (fi, fr))
       
  1099                                 hasMutated = True
       
  1100                         if self._checkRefArcElemName(fObjSrc, rObjSrc, fObjTgt, fr):
       
  1101                             hasMutated = True
       
  1102             if hasMutated:
       
  1103                 self._fileMap.setDitaFileObj(fPath, fObjSrc)
       
  1104 
       
  1105     def _checkRefArcElemName(self, fObjSrc, rObjSrc, fObjTgt, frag):
       
  1106         """Test source and target element names
       
  1107         e.g. Source <cxxClassRef> should match target <cxxClass>
       
  1108         And in vanilla DITA:
       
  1109         <topicref href="batcaring.dita" type="task"></topicref>
       
  1110         or:
       
  1111         <topicref href="batcaring.dita" format="ditamap"></topicref>
       
  1112         Should match target element <task>."""
       
  1113         isRootTgt = False
       
  1114         hasMutated = False
       
  1115         if len(frag) == 0:
       
  1116             # iObjTgt is the root element of fObjTgt
       
  1117             if fObjTgt.rootId is None or fObjTgt.idElem(fObjTgt.rootId) is None:
       
  1118                 # Covered by other error codes
       
  1119                 return
       
  1120             iObjTgt = fObjTgt.idObj(fObjTgt.rootId)
       
  1121             isRootTgt = True
       
  1122         elif fObjTgt.hasId(frag):
       
  1123             iObjTgt = fObjTgt.idObj(frag)
       
  1124         else:
       
  1125             # frag not found that will be a 411 error (handled by caller).
       
  1126             return
       
  1127         # Have an rObjSrc + iObjTgt so check elements
       
  1128         # First case:
       
  1129         if rObjSrc.elem.endswith('Ref'):
       
  1130             if rObjSrc.elem[:-3] != iObjTgt.elem:
       
  1131                 if isRootTgt:
       
  1132                     fObjSrc.addError(412, (rObjSrc.elem, iObjTgt.elem))
       
  1133                 else:
       
  1134                     fObjSrc.addError(413, (fObjTgt.idElem(frag), rObjSrc.elem, frag))
       
  1135                 hasMutated = True
       
  1136         # Second case(s) for vanilla DITA
       
  1137         elif rObjSrc.elem == 'topicref':
       
  1138             # Check DITA map links
       
  1139             if rObjSrc.format == 'ditamap' and iObjTgt.elem != 'map':
       
  1140                 # Target must be a root element (actually we don't care)
       
  1141                 fObjSrc.addError(414, (iObjTgt.elem,))
       
  1142                 hasMutated = True
       
  1143             elif iObjTgt.elem == 'map' and rObjSrc.format != 'ditamap':
       
  1144                 fObjSrc.addError(415, (rObjSrc.format,))
       
  1145                 hasMutated = True
       
  1146             elif not (rObjSrc.format == 'ditamap' and iObjTgt.elem == 'map'):
       
  1147                 # Treat refType None as type="topic", see DITA standard for <topicref>
       
  1148                 # Well, also look at the type attribute in chapter 25
       
  1149                 # "When the type attribute is unspecified, it should be
       
  1150                 # determined by inspecting the target if possible. If the
       
  1151                 # target cannot be inspected for some reason, the value
       
  1152                 # should default to "topic".
       
  1153                 # Note: DITA 1.2 takes a different view...
       
  1154                 # Was:
       
  1155                 #if (rObjSrc.refType is None and iObjTgt.elem != 'topic') \
       
  1156                 #or (rObjSrc.refType is not None and rObjSrc.refType != iObjTgt.elem):
       
  1157                 if rObjSrc.refType is not None and rObjSrc.refType != iObjTgt.elem:
       
  1158                     if isRootTgt:
       
  1159                         fObjSrc.addError(416, (rObjSrc.refType, iObjTgt.elem,))
       
  1160                         hasMutated = True
       
  1161                     else:
       
  1162                         fObjSrc.addError(417, (rObjSrc.refType, iObjTgt.elem, frag,))
       
  1163                         hasMutated = True
       
  1164                 # Otherwise topicref looks OK
       
  1165         elif rObjSrc.elem != 'xref' and rObjSrc.elem not in XREF_DESCENDENTS:
       
  1166             # Unknown referencing element
       
  1167             if isRootTgt:
       
  1168                 fObjSrc.addError(418, (rObjSrc.elem, fObjTgt.doctype))
       
  1169                 hasMutated = True
       
  1170             else:
       
  1171                 fObjSrc.addError(419, (rObjSrc.elem, fObjTgt.idElem(frag), frag))
       
  1172                 hasMutated = True
       
  1173         return hasMutated
       
  1174                                         
       
  1175     def updateErrorCount(self, theMap):
       
  1176         """Updates a map of {error_code, : count, ...}."""
       
  1177         if self._errS is not None:
       
  1178             for e in self._errS.keys():
       
  1179                 theMap[e] += len(self._errS[e])
       
  1180         for fPath in self._fileMap.keys():
       
  1181             fObj = self._fileMap.getDitaFileObj(fPath)
       
  1182             # Mutable call so need to update
       
  1183             fObj.updateErrorCount(theMap)
       
  1184             self._fileMap.setDitaFileObj(fPath, fObj)
       
  1185 
       
  1186     def debugDump(self, s=sys.stdout, prefix=''):
       
  1187         """Dump of IR for debug purposes."""
       
  1188         s.write(' Debug Dump '.center(PRINT_WIDTH, '+'))
       
  1189         s.write('\n')
       
  1190         fileS = self._fileMap.keys()
       
  1191         fileS.sort()
       
  1192         for f in fileS:
       
  1193             self._fileMap.getDitaFileObj(f).debugDump(s, prefix)
       
  1194         s.write(' END Debug Dump '.center(PRINT_WIDTH, '+'))
       
  1195         s.write('\n\n')
       
  1196     
       
  1197 #####################################
       
  1198 # Multiprocessing code
       
  1199 #####################################
       
  1200 def retDitaFileObj(thePath):
       
  1201     return DitaFilePath(thePath)
       
  1202  
       
  1203 def genDitaPath(theDir, thePatS, recursive):
       
  1204     assert(os.path.isdir(theDir))
       
  1205     for aName in os.listdir(theDir):
       
  1206         aPath = os.path.join(theDir, aName)
       
  1207         if os.path.isdir(aPath) and recursive:
       
  1208             for p in genDitaPath(aPath, thePatS, recursive):
       
  1209                 yield p
       
  1210         elif os.path.isfile(aPath):
       
  1211             for aPat in thePatS:
       
  1212                 if fnmatch.fnmatch(aName, aPat):
       
  1213                     #logging.info('genDitaPath(): %s' % aPath)
       
  1214                     yield aPath
       
  1215                     break    
       
  1216     
       
  1217 def retMpDitaFileSetObj(theDir,
       
  1218                         thePatterns,
       
  1219                         recursive,
       
  1220                         numJobs, 
       
  1221                         checkExt,
       
  1222                         useDb):
       
  1223     assert(os.path.isdir(theDir))
       
  1224     assert(numJobs >= 0)
       
  1225     retObj = DitaFileSet(theDir, procDir=False, testExt=checkExt, useDbase=useDb)
       
  1226     myNumJobs = numJobs
       
  1227     if numJobs == 0:
       
  1228         myNumJobs = multiprocessing.cpu_count()
       
  1229         logging.info('Set multiprocessing number of jobs to %d' % myNumJobs)
       
  1230     myPool = multiprocessing.Pool(processes=myNumJobs)
       
  1231     for result in [
       
  1232             myPool.apply_async(retDitaFileObj, (f,))
       
  1233                 for f in genDitaPath(theDir, thePatterns, recursive)
       
  1234             ]:
       
  1235         myObj = result.get()
       
  1236         logging.debug('Got %s' % myObj.identity)
       
  1237         retObj._addDitaFileObj(myObj)
       
  1238     # Note: finalise() is a serial process
       
  1239     logging.info('retMpDitaFileSetObj(): finalising')
       
  1240     retObj.finalise()
       
  1241     return retObj
       
  1242 
       
  1243 ######################################
       
  1244 # Test code
       
  1245 ######################################
       
  1246 try:
       
  1247     import cStringIO as StringIO
       
  1248 except ImportError:
       
  1249     import StringIO
       
  1250 
       
  1251 class NullClass(unittest.TestCase):
       
  1252     pass
       
  1253 
       
  1254 class TestCountDict(unittest.TestCase):
       
  1255     def setUp(self):
       
  1256         pass
       
  1257     
       
  1258     def tearDown(self):
       
  1259         pass
       
  1260     
       
  1261     def testSetUpTearDown(self):
       
  1262         """TestCountDict: test setUp() and tearDown()."""
       
  1263         pass
       
  1264     
       
  1265     def test_basic(self):
       
  1266         """TestCountDict: test basic functionality."""
       
  1267         myMap = CountDict()
       
  1268         self.assertEqual(myMap.has_key('wtf'), False)
       
  1269         self.assertEqual(myMap['wtf'], 0)
       
  1270         self.assertEqual(myMap.has_key('wtf'), True)
       
  1271         myMap['wtf'] += 1
       
  1272         self.assertEqual(myMap['wtf'], 1)
       
  1273 
       
  1274 class TestDitaId(unittest.TestCase):
       
  1275     def setUp(self):
       
  1276         pass
       
  1277     
       
  1278     def tearDown(self):
       
  1279         pass
       
  1280     
       
  1281     def testSetUpTearDown(self):
       
  1282         """DitaId: test setUp() and tearDown()."""
       
  1283         pass
       
  1284     
       
  1285     def test_basic(self):
       
  1286         """DitaId: basic read of an node with an id"""
       
  1287         myXml = """<cxxClass id="class_big_endian"/>"""
       
  1288         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1289         myObj = DitaId(myTree.getroot())
       
  1290         self.assertEqual(myObj.id, 'class_big_endian')
       
  1291         self.assertEqual(str(myObj), 'class_big_endian')
       
  1292         self.assertEqual(myObj.errStrings(True, None), [])
       
  1293         self.assertEqual(myObj.errStrings(False, None), [])
       
  1294         
       
  1295     def test_guid_00(self):
       
  1296         """DitaId: basic read of an node with an GUID id"""
       
  1297         myXml = """<cxxClass id="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
       
  1298         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1299         myObj = DitaId(myTree.getroot())
       
  1300         self.assertEqual(myObj.id, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
       
  1301         myObj.checkGuid()
       
  1302         self.assertEqual(myObj.errStrings(True, None), [])
       
  1303         self.assertEqual(myObj.errStrings(False, None), [])
       
  1304 
       
  1305     def test_guid_01(self):
       
  1306         """DitaId: basic read of an node with an GUID id fails"""
       
  1307         myXml = """<cxxClass id="25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
       
  1308         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1309         myObj = DitaId(myTree.getroot())
       
  1310         self.assertEqual(myObj.id, '25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
       
  1311         myObj.checkGuid()
       
  1312         self.assertEqual(
       
  1313             myObj.errStrings(False, None),
       
  1314             [
       
  1315              'GUID specification does not match id="25825EC4-341F-3EA4-94AA-7DCE380E6D2E"'
       
  1316             ])
       
  1317         self.assertEqual(
       
  1318             myObj.errStrings(True, None),
       
  1319             [
       
  1320              'GUID specification does not match id="%s"' % GENERIC_STRING,
       
  1321             ])
       
  1322 
       
  1323     def test_cmp_eq_00(self):
       
  1324         """DitaId: cmp(), == of two identical nodes"""
       
  1325         myXml = """<cxxClass id="class_big_endian"/>"""
       
  1326         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1327         myObj_00 = DitaId(myTree.getroot())
       
  1328         myObj_01 = DitaId(myTree.getroot())
       
  1329         self.assertEqual(cmp(myObj_00, myObj_01), 0)
       
  1330         self.assertEqual((myObj_00 == myObj_01), True)
       
  1331 
       
  1332     def test_cmp_eq_01(self):
       
  1333         """DitaId: cmp(), == of two identical nodes from different elements."""
       
  1334         myXml_00 = """<cxxClass id="big_endian"/>"""
       
  1335         myTree_00 = etree.parse(StringIO.StringIO(myXml_00))
       
  1336         myObj_00 = DitaId(myTree_00.getroot())
       
  1337         myXml_01 = """<cxxStruct id="big_endian"/>"""
       
  1338         myTree_01 = etree.parse(StringIO.StringIO(myXml_01))
       
  1339         myObj_01 = DitaId(myTree_01.getroot())
       
  1340         self.assertEqual(cmp(myObj_00, myObj_01), 0)
       
  1341         self.assertEqual((myObj_00 == myObj_01), True)
       
  1342 
       
  1343     def test_set(self):
       
  1344         """DitaId: read of an node with an id several times into a set and check unique,"""
       
  1345         myXml = """<cxxClass id="class_big_endian"/>"""
       
  1346         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1347         s = set()
       
  1348         i = 0
       
  1349         while i < 8:
       
  1350             s.add(DitaId(myTree.getroot()))
       
  1351             i += 1
       
  1352         self.assertEqual(len(s), 1)
       
  1353         self.assertEqual(DitaId(myTree.getroot()) in s, True)
       
  1354 
       
  1355     def test_map(self):
       
  1356         """DitaId: read of an node with an id several times into a map and check unique,"""
       
  1357         myXml = """<cxxClass id="class_big_endian"/>"""
       
  1358         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1359         m = {}
       
  1360         i = 0
       
  1361         while i < 8:
       
  1362             m[DitaId(myTree.getroot())] = 1
       
  1363             i += 1
       
  1364         self.assertEqual(len(m), 1)
       
  1365         self.assertEqual(m.has_key(DitaId(myTree.getroot())), True)
       
  1366 
       
  1367     def test_error_hash(self):
       
  1368         """DitaId: error with a '#' in an id"""
       
  1369         myXml = """<cxxClass id="class_#big_endian"/>"""
       
  1370         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1371         myObj = DitaId(myTree.getroot())
       
  1372         self.assertEqual(myObj.id, 'class_#big_endian')
       
  1373         self.assertEqual(str(myObj), 'class_#big_endian')
       
  1374         self.assertEqual(
       
  1375                 myObj.errStrings(True, None),
       
  1376                 [
       
  1377                     genericStringForErrorCode(100),
       
  1378                 ]
       
  1379             )
       
  1380         self.assertEqual(
       
  1381                 myObj.errStrings(False, None),
       
  1382                 [
       
  1383                  'Character \'#\' not allowed in id="class_#big_endian"',
       
  1384                  ]
       
  1385             )
       
  1386         
       
  1387 
       
  1388 
       
  1389 class TestDitaRef(unittest.TestCase):
       
  1390     def setUp(self):
       
  1391         pass
       
  1392     
       
  1393     def tearDown(self):
       
  1394         pass
       
  1395     
       
  1396     def testSetUpTearDown(self):
       
  1397         """DitaRef: test setUp() and tearDown()."""
       
  1398         pass
       
  1399     
       
  1400     def test_basic(self):
       
  1401         """DitaRef: basic read of an xref node, no fragment"""
       
  1402         myXml = """<xref href="class_big_endian"/>"""
       
  1403         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1404         myObj = DitaRef(myTree.getroot())
       
  1405         self.assertEqual(myObj.href, 'class_big_endian')
       
  1406         self.assertEqual(myObj.path, 'class_big_endian')
       
  1407         self.assertEqual(myObj.elem, 'xref')
       
  1408         self.assertEqual(str(myObj), 'xref class_big_endian')
       
  1409         self.assertEqual(myObj.fragment, '')
       
  1410         self.assertEqual(myObj.scheme, '')
       
  1411         self.assertEqual(myObj.errStrings(False, None), [])
       
  1412         self.assertEqual(myObj.errStrings(True, None), [])
       
  1413 
       
  1414     def test_basic_frag(self):
       
  1415         """DitaRef: basic read of an xref node, with fragment"""
       
  1416         myXml = """<xref href="class_big_endian.xml#function"/>"""
       
  1417         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1418         myObj = DitaRef(myTree.getroot())
       
  1419         self.assertEqual(myObj.href, 'class_big_endian.xml#function')
       
  1420         self.assertEqual(myObj.path, 'class_big_endian.xml')
       
  1421         self.assertEqual(myObj.fragment, 'function')
       
  1422         self.assertEqual(myObj.scheme, '')
       
  1423         self.assertEqual(myObj.errStrings(False, None), [])
       
  1424         self.assertEqual(myObj.errStrings(True, None), [])
       
  1425 
       
  1426     def test_file_frag_00(self):
       
  1427         """DitaRef: accessing an xref node, with a file and a fragment"""
       
  1428         myXml = """<xref href="class_big_endian.xml#function"/>"""
       
  1429         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1430         myObj = DitaRef(myTree.getroot())
       
  1431         self.assertEqual(myObj.href, 'class_big_endian.xml#function')
       
  1432         self.assertEqual(myObj.path, 'class_big_endian.xml')
       
  1433         self.assertEqual(myObj.fragment, 'function')
       
  1434         self.assertEqual(myObj.scheme, '')
       
  1435         srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
       
  1436         expPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'class_big_endian.xml'))
       
  1437         self.assertEqual(
       
  1438             myObj.fileFragment(srcPath),
       
  1439             (expPath, 'function')
       
  1440         )
       
  1441         self.assertEqual(myObj.errStrings(False, None), [])
       
  1442         self.assertEqual(myObj.errStrings(True, None), [])
       
  1443         
       
  1444     def test_file_frag_01(self):
       
  1445         """DitaRef: accessing an xref node, with a file and a fragment and relative path with '\\'."""
       
  1446         myXml = """<xref href="..\\chips\\class_big_endian.xml#function"/>"""
       
  1447         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1448         myObj = DitaRef(myTree.getroot())
       
  1449         srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
       
  1450         expPath = normalisePath(os.path.join('C:%s' % os.sep, 'chips', 'class_big_endian.xml'))
       
  1451         self.assertEqual(
       
  1452             myObj.fileFragment(srcPath),
       
  1453             (expPath, 'function')
       
  1454         )
       
  1455         self.assertEqual(myObj.errStrings(False, None), [])
       
  1456         self.assertEqual(myObj.errStrings(True, None), [])
       
  1457         
       
  1458     def test_file_frag_02(self):
       
  1459         """DitaRef: accessing an xref node, with a file and a fragment and relative path with '/'."""
       
  1460         myXml = """<xref href="../chips/class_big_endian.xml#function"/>"""
       
  1461         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1462         myObj = DitaRef(myTree.getroot())
       
  1463         srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
       
  1464         expPath = normalisePath(os.path.join('C:%s' % os.sep, 'chips', 'class_big_endian.xml'))
       
  1465         self.assertEqual(
       
  1466             myObj.fileFragment(srcPath),
       
  1467             (expPath, 'function')
       
  1468         )
       
  1469         self.assertEqual(myObj.errStrings(False, None), [])
       
  1470         self.assertEqual(myObj.errStrings(True, None), [])
       
  1471         
       
  1472     def test_file_frag_03(self):
       
  1473         """DitaRef: accessing an xref node, with a no file but with a fragment"""
       
  1474         myXml = """<xref href="#function"/>"""
       
  1475         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1476         myObj = DitaRef(myTree.getroot())
       
  1477         self.assertEqual(myObj.href, '#function')
       
  1478         self.assertEqual(myObj.path, '')
       
  1479         self.assertEqual(myObj.fragment, 'function')
       
  1480         self.assertEqual(myObj.scheme, '')
       
  1481         srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
       
  1482         expPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
       
  1483         self.assertEqual(
       
  1484             myObj.fileFragment(srcPath),
       
  1485             (expPath, 'function')
       
  1486         )
       
  1487         self.assertEqual(myObj.errStrings(False, None), [])
       
  1488         self.assertEqual(myObj.errStrings(True, None), [])
       
  1489         
       
  1490     def test_basic_scheme(self):
       
  1491         """DitaRef: an xref node with a URI scheme"""
       
  1492         myXml = """<xref href="http://www.cwi.nl:80/%7Eguido/Python.html#fragment"/>"""
       
  1493         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1494         myObj = DitaRef(myTree.getroot())
       
  1495         self.assertEqual(myObj.href, 'http://www.cwi.nl:80/%7Eguido/Python.html#fragment')
       
  1496         self.assertEqual(myObj.path, '/%7Eguido/Python.html')
       
  1497         self.assertEqual(myObj.fragment, 'fragment')
       
  1498         self.assertEqual(myObj.scheme, 'http')
       
  1499         self.assertEqual(myObj.errStrings(False, None), [])
       
  1500         self.assertEqual(myObj.errStrings(True, None), [])
       
  1501 
       
  1502     def test_basic_scheme_file_frag(self):
       
  1503         """DitaRef: an xref node with a URI scheme, invoking fileFragment()"""
       
  1504         myXml = """<xref href="http://www.cwi.nl:80/%7Eguido/Python.html#fragment"/>"""
       
  1505         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1506         myObj = DitaRef(myTree.getroot())
       
  1507         self.assertEqual(myObj.href, 'http://www.cwi.nl:80/%7Eguido/Python.html#fragment')
       
  1508         self.assertEqual(myObj.path, '/%7Eguido/Python.html')
       
  1509         self.assertEqual(myObj.fragment, 'fragment')
       
  1510         self.assertEqual(myObj.scheme, 'http')
       
  1511         srcPath = os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml')
       
  1512         self.assertEqual(
       
  1513             myObj.fileFragment(srcPath),
       
  1514             (None, None)
       
  1515         )
       
  1516         self.assertEqual(myObj.errStrings(False, None), [])
       
  1517         self.assertEqual(myObj.errStrings(True, None), [])
       
  1518 
       
  1519     def test_fail_no_href(self):
       
  1520         """DitaRef: Fails on an xref node with no href attribute"""
       
  1521         myXml = """<xref />"""
       
  1522         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1523         myObj = DitaRef(myTree.getroot())
       
  1524         self.assertEqual(
       
  1525             myObj.errStrings(False, None),
       
  1526             [
       
  1527              'Reference element "xref" is missing href=... attribute',
       
  1528              ]
       
  1529         )
       
  1530         self.assertEqual(
       
  1531             myObj.errStrings(True, None),
       
  1532             [
       
  1533              'Reference element "%s" is missing href=... attribute' % GENERIC_STRING,
       
  1534              ]
       
  1535         )
       
  1536 
       
  1537     def test_fail_bad_frag(self):
       
  1538         """DitaRef: Fails on an xref node with href attribute that has multiple '#' characters"""
       
  1539         myXml = """<xref href="a#b#c" />"""
       
  1540         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1541         myObj = DitaRef(myTree.getroot())
       
  1542         self.assertEqual(
       
  1543             myObj.errStrings(False, None),
       
  1544             [
       
  1545              'Multiple \'#\' not allowed in reference "a#b#c"',
       
  1546              ]
       
  1547         )
       
  1548         self.assertEqual(
       
  1549             myObj.errStrings(True, None),
       
  1550             [
       
  1551              'Multiple \'#\' not allowed in reference "%s"' % GENERIC_STRING,
       
  1552              ]
       
  1553         )
       
  1554 
       
  1555     def test_guid_00(self):
       
  1556         """DitaRef: basic read of an node with an GUID file/fragment reference"""
       
  1557         myXml = """<xref href="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
       
  1558         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1559         myObj = DitaRef(myTree.getroot())
       
  1560         self.assertEqual(myObj.href, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
       
  1561         self.assertEqual(myObj.path, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml')
       
  1562         self.assertEqual(myObj.elem, 'xref')
       
  1563         self.assertEqual(str(myObj), 'xref GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
       
  1564         self.assertEqual(myObj.fragment, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
       
  1565         self.assertEqual(myObj.scheme, '')
       
  1566         self.assertEqual(myObj.errStrings(False, None), [])
       
  1567         self.assertEqual(myObj.errStrings(True, None), [])
       
  1568 
       
  1569     def test_guid_01(self):
       
  1570         """DitaRef: basic read of an node with an GUID file part fails"""
       
  1571         myXml = """<xref href="GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
       
  1572         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1573         myObj = DitaRef(myTree.getroot())
       
  1574         self.assertEqual(myObj.href, 'GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
       
  1575         self.assertEqual(myObj.path, 'GUID-.xml')
       
  1576         self.assertEqual(myObj.elem, 'xref')
       
  1577         self.assertEqual(str(myObj), 'xref GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
       
  1578         self.assertEqual(myObj.fragment, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
       
  1579         self.assertEqual(myObj.errStrings(False, None), [])
       
  1580         self.assertEqual(myObj.errStrings(True, None), [])
       
  1581         myObj.checkGuid()
       
  1582         self.assertEqual(
       
  1583             myObj.errStrings(False, None),
       
  1584             [
       
  1585              'GUID specification does not match file reference "GUID-.xml"'
       
  1586             ])
       
  1587         self.assertEqual(
       
  1588             myObj.errStrings(True, None),
       
  1589             [
       
  1590              genericStringForErrorCode(203),
       
  1591             ]
       
  1592         )
       
  1593 
       
  1594     def test_guid_02(self):
       
  1595         """DitaRef: basic read of an node with an GUID fragment part fails"""
       
  1596         myXml = """<xref href="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4"/>"""
       
  1597         myTree = etree.parse(StringIO.StringIO(myXml))
       
  1598         myObj = DitaRef(myTree.getroot())
       
  1599         self.assertEqual(myObj.href, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4')
       
  1600         self.assertEqual(myObj.path, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml')
       
  1601         self.assertEqual(myObj.elem, 'xref')
       
  1602         self.assertEqual(str(myObj), 'xref GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4')
       
  1603         self.assertEqual(myObj.fragment, 'GUID-25825EC4')
       
  1604         self.assertEqual(myObj.errStrings(False, None), [])
       
  1605         self.assertEqual(myObj.errStrings(True, None), [])
       
  1606         myObj.checkGuid()
       
  1607         self.assertEqual(
       
  1608             myObj.errStrings(False, None),
       
  1609             [
       
  1610              'GUID specification does not match fragment reference "GUID-25825EC4"'
       
  1611             ])
       
  1612         self.assertEqual(
       
  1613             myObj.errStrings(True, None),
       
  1614             [
       
  1615              genericStringForErrorCode(204),
       
  1616             ]
       
  1617         )
       
  1618 
       
  1619 class TestDitaFile(unittest.TestCase):
       
  1620     def setUp(self):
       
  1621         pass
       
  1622     
       
  1623     def tearDown(self):
       
  1624         pass
       
  1625     
       
  1626     def testSetUpTearDown(self):
       
  1627         """DitaFile: test setUp() and tearDown()."""
       
  1628         pass
       
  1629     
       
  1630     def test_Basic(self):
       
  1631         """DitaFile: basic read of an XML file"""
       
  1632         myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
       
  1633 <!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
       
  1634 <cxxClass id="class_big_endian">
       
  1635     <apiName>BigEndian</apiName>
       
  1636     <shortdesc/>
       
  1637     <cxxClassDetail>
       
  1638         <cxxClassDefinition>
       
  1639             <cxxClassAccessSpecifier value="public"/>
       
  1640             <cxxClassAPIItemLocation>
       
  1641                 <cxxClassDeclarationFile name="filePath" value="K:/sf/os/commsfw/datacommsserver/esockserver/inc/es_sock.h"/>
       
  1642                 <cxxClassDeclarationFileLine name="lineNumber" value="1520"/>
       
  1643                 <cxxClassDefinitionFile name="filePath" value="K:/sf/os/commsfw/datacommsserver/esockserver/inc/es_sock.h"/>
       
  1644                 <cxxClassDefinitionFileLineStart name="lineNumber" value="1516"/>
       
  1645                 <cxxClassDefinitionFileLineEnd name="lineNumber" value="1526"/>
       
  1646             </cxxClassAPIItemLocation>
       
  1647         </cxxClassDefinition>
       
  1648         <apiDesc>
       
  1649             <p>Inserts and extracts integers in big-endian format.   </p>
       
  1650         </apiDesc>
       
  1651     </cxxClassDetail>
       
  1652     <cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f">
       
  1653     </cxxFunction>
       
  1654     <cxxFunction id="class_big_endian_1aedf702f5c0118e4294d1a6d9684f8441">
       
  1655     </cxxFunction>
       
  1656     <cxxFunction id="class_big_endian_1ae266722f7bb965c971155a3315bad484">
       
  1657     </cxxFunction>
       
  1658     <cxxFunction id="class_big_endian_1a497d5248ea259f8490fb40ac4f2aafb2">
       
  1659     </cxxFunction>
       
  1660 </cxxClass>"""
       
  1661         myFile = StringIO.StringIO(myXml)
       
  1662         myObj = DitaFileObj(myFile, 'foo')
       
  1663         self.assertEqual(myObj.identity, normalisePath('foo'))
       
  1664         self.assertEqual(myObj.doctype, 'cxxClass')
       
  1665         self.assertEqual(myObj.rootId, 'class_big_endian')
       
  1666         #print myObj.idMap()
       
  1667         self.assertEqual(
       
  1668             myObj.idElemMap(),
       
  1669             {
       
  1670                 'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'   : 'cxxFunction',
       
  1671                 'class_big_endian_1aedf702f5c0118e4294d1a6d9684f8441'   : 'cxxFunction',
       
  1672                 'class_big_endian'                                      : 'cxxClass',
       
  1673                 'class_big_endian_1a497d5248ea259f8490fb40ac4f2aafb2'   : 'cxxFunction',
       
  1674                 'class_big_endian_1ae266722f7bb965c971155a3315bad484'   : 'cxxFunction',
       
  1675                 }
       
  1676         )
       
  1677         self.assertEqual(myObj.errStrings(False, None), [])
       
  1678         self.assertEqual(myObj.errStrings(True, None), [])
       
  1679 
       
  1680     def test_missing_file(self):
       
  1681         """DitaFile: read an missing XML file"""
       
  1682         myObj = DitaFileObj(None, 'foo')
       
  1683         self.assertEqual(
       
  1684             myObj.errStrings(False, None),
       
  1685             [
       
  1686              'Failed to open: "%s"' % normalisePath('foo'),
       
  1687              ]
       
  1688         )
       
  1689         self.assertEqual(
       
  1690             myObj.errStrings(True, None),
       
  1691             [
       
  1692              genericStringForErrorCode(400),
       
  1693              ]
       
  1694         )
       
  1695     
       
  1696     def test_IllFormedFile(self):
       
  1697         """DitaFile: read an ill-formed XML file"""
       
  1698         myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
       
  1699 <!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
       
  1700 <cxxClass id="class_big_endian">
       
  1701 """
       
  1702         myFile = StringIO.StringIO(myXml)
       
  1703         myObj = DitaFileObj(myFile, 'foo')
       
  1704         self.assertEqual(myObj.identity, normalisePath('foo'))
       
  1705         self.assertEqual(myObj.doctype, None)
       
  1706         self.assertEqual(myObj.rootId, None)
       
  1707         #print myObj.idMap()
       
  1708         self.assertEqual(myObj.idElemMap(), {})
       
  1709         self.assertEqual(
       
  1710             myObj.errStrings(False, None),
       
  1711             [
       
  1712              'Can not parse: "no element found: line 4, column 0"',
       
  1713              ]
       
  1714         )
       
  1715         self.assertEqual(
       
  1716             myObj.errStrings(True, None),
       
  1717             [
       
  1718              genericStringForErrorCode(404),
       
  1719              ]
       
  1720         )
       
  1721 
       
  1722     def test_missing_root_id(self):
       
  1723         """DitaFile: read of an XML file with no id on root element"""
       
  1724         myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
       
  1725 <!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
       
  1726 <cxxClass>
       
  1727     <xref href="OtherClass">OtherClass</xref>
       
  1728     <cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f"/>
       
  1729 </cxxClass>"""
       
  1730         myFile = StringIO.StringIO(myXml)
       
  1731         myObj = DitaFileObj(myFile, 'foo')
       
  1732         self.assertEqual(myObj.identity, normalisePath('foo'))
       
  1733         self.assertEqual(myObj.doctype, 'cxxClass')
       
  1734         self.assertEqual(myObj.rootId, None)
       
  1735         self.assertEqual(
       
  1736             myObj.idElemMap(),
       
  1737             {
       
  1738                 'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'   : 'cxxFunction',
       
  1739                 }
       
  1740         )
       
  1741         self.assertEqual(myObj.errStrings(False, None), [genericStringForErrorCode(402)])
       
  1742         self.assertEqual(myObj.errStrings(True, None), [genericStringForErrorCode(402)])
       
  1743 
       
  1744     def test_duplicate_id(self):
       
  1745         """DitaFile: duplicate IDs"""
       
  1746         myXml = """<root id="AnID">
       
  1747 <elem id="AnID"/>
       
  1748 </root>"""
       
  1749         myFile = StringIO.StringIO(myXml)
       
  1750         myObj = DitaFileObj(myFile, 'spam.xml')
       
  1751         self.assertEqual(myObj.identity, normalisePath('spam.xml'))
       
  1752         self.assertEqual(myObj.doctype, 'root')
       
  1753         self.assertEqual(myObj.rootId, 'AnID')
       
  1754         self.assertEqual(myObj.idElemMap(), {})
       
  1755         self.assertEqual(
       
  1756             myObj.errStrings(False, None),
       
  1757             [
       
  1758                 'Multiple id="AnID"',
       
  1759             ]
       
  1760         )
       
  1761         self.assertEqual(myObj.errStrings(True, None), [genericStringForErrorCode(401)])
       
  1762 
       
  1763     def test_ismap_00(self):
       
  1764         """DitaFile: Is a map for <map>."""
       
  1765         myXml = """<map id="myMap"/>"""
       
  1766         myFile = StringIO.StringIO(myXml)
       
  1767         myObj = DitaFileObj(myFile, 'spam.xml')
       
  1768         self.assertEqual(myObj.isMap, True)
       
  1769     
       
  1770     def test_ismap_01(self):
       
  1771         """DitaFile: Is a map for <cxxAPIMap>."""
       
  1772         myXml = """<cxxAPIMap id="myMap"/>"""
       
  1773         myFile = StringIO.StringIO(myXml)
       
  1774         myObj = DitaFileObj(myFile, 'spam.xml')
       
  1775         self.assertEqual(myObj.isMap, True)
       
  1776     
       
  1777     def test_Basic_01(self):
       
  1778         """DitaFile: read of an simple XML file with id and xref"""
       
  1779         myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
       
  1780 <!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
       
  1781 <cxxClass id="class_big_endian">
       
  1782     <xref href="OtherClass">OtherClass</xref>
       
  1783     <cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f"/>
       
  1784 </cxxClass>"""
       
  1785         myFile = StringIO.StringIO(myXml)
       
  1786         myObj = DitaFileObj(myFile, 'foo')
       
  1787         self.assertEqual(myObj.identity, normalisePath('foo'))
       
  1788         self.assertEqual(myObj.doctype, 'cxxClass')
       
  1789         self.assertEqual(myObj.rootId, 'class_big_endian')
       
  1790         self.assertEqual(myObj.isMap, False)
       
  1791         self.assertEqual(len(myObj.idS), 2)
       
  1792         self.assertEqual(len(myObj.refS), 1)
       
  1793         self.assertEqual(myObj.hasId('class_big_endian'), True)
       
  1794         self.assertEqual(myObj.hasId('class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'), True)
       
  1795         self.assertEqual(myObj.hasId('noID'), False)
       
  1796         self.assertEqual(myObj.idElem('class_big_endian'), 'cxxClass')
       
  1797         self.assertEqual(myObj.idElem('noID'), None)
       
  1798         self.assertEqual(
       
  1799             myObj.idElem('class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'),
       
  1800             'cxxFunction'
       
  1801         )
       
  1802         #print myObj.idMap()
       
  1803         self.assertEqual(
       
  1804             myObj.idElemMap(),
       
  1805             {
       
  1806                 'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'   : 'cxxFunction',
       
  1807                 'class_big_endian'                                      : 'cxxClass',
       
  1808                 }
       
  1809         )
       
  1810         self.assertEqual(myObj.errStrings(False, None), [])
       
  1811         self.assertEqual(myObj.errStrings(True, None), [])
       
  1812 
       
  1813 class TestDitaFileSet(unittest.TestCase):
       
  1814     def setUp(self):
       
  1815         pass
       
  1816     
       
  1817     def tearDown(self):
       
  1818         pass
       
  1819     
       
  1820     def testSetUpTearDown(self):
       
  1821         """DitaFileSet: test setUp() and tearDown()."""
       
  1822         pass
       
  1823     
       
  1824     def test_None(self):
       
  1825         """DitaFileSet: read of None."""
       
  1826         myO = DitaFileSet(None)
       
  1827         myO.finalise()
       
  1828         self.assertEqual(myO.errStrings(False, None), ['Not a directory: None'])
       
  1829         self.assertEqual(myO.errStrings(True, None), ['Not a directory: %s' % GENERIC_STRING, ])
       
  1830         self.assertEqual(myO.errCountMap, {500 : 1})
       
  1831 
       
  1832     def test_basic(self):
       
  1833         """DitaFileSet: Test reading a map and a couple of files."""
       
  1834         myO = DitaFileSet(None, procDir=False)
       
  1835         myO._addFileObj(
       
  1836             StringIO.StringIO(
       
  1837 """<map id="map_00">
       
  1838     <topicref href="spam.dita" />
       
  1839     <topicref href="eggs.dita" />
       
  1840 </map>"""
       
  1841             ),
       
  1842             'map.ditamap'
       
  1843         )
       
  1844         myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita')
       
  1845         myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'eggs.dita')
       
  1846         myO.finalise()
       
  1847         #print 'HI'
       
  1848         #myO.writeErrors(False)
       
  1849         self.assertEqual(myO.allErrStrings(False, None), [])
       
  1850         self.assertEqual(myO.allErrStrings(True, None), [])
       
  1851         self.assertEqual(myO.errCountMap, {})
       
  1852 
       
  1853     def test_duplicate_paths(self):
       
  1854         """DitaFileSet: Test reading a couple of files in duplicate paths."""
       
  1855         myO = DitaFileSet(None, procDir=False)
       
  1856         myO._addFileObj(
       
  1857             StringIO.StringIO(
       
  1858 """<map id="map_00">
       
  1859     <topicref href="spam.dita" />
       
  1860 </map>"""
       
  1861             ),
       
  1862             'map.ditamap'
       
  1863         )
       
  1864         myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita')
       
  1865         myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'spam.dita')
       
  1866         myO.finalise()
       
  1867         self.assertEqual(
       
  1868             myO.errStrings(False, None),
       
  1869             [
       
  1870                 'Duplicate file path: "%s"' % normalisePath('spam.dita'),
       
  1871             ]
       
  1872         )
       
  1873         self.assertEqual(myO.errStrings(True, None), [genericStringForErrorCode(504),])
       
  1874         self.assertEqual(myO.errCountMap, {504 : 1})
       
  1875 
       
  1876     def test_duplicate_ids(self):
       
  1877         """DitaFileSet: Test reading a map and a couple of files with duplicate IDs."""
       
  1878         myO = DitaFileSet(None, procDir=False)
       
  1879         myO._addFileObj(
       
  1880             StringIO.StringIO(
       
  1881 """<map id="map_00">
       
  1882     <topicref href="spam.dita" />
       
  1883     <topicref href="eggs.dita" />
       
  1884     <topicref href="chips.dita" />
       
  1885 </map>"""
       
  1886             ),
       
  1887             'map.ditamap'
       
  1888         )
       
  1889         myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'spam.dita')
       
  1890         myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'eggs.dita')
       
  1891         myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'chips.dita')
       
  1892         myO.finalise()
       
  1893         #print 'HI'
       
  1894         #myO.writeErrors(False)
       
  1895         #pprint.pprint(myO.errStrings(False, None))
       
  1896         self.assertEqual(
       
  1897             myO.errStrings(True, None),
       
  1898             [
       
  1899              genericStringForErrorCode(505),
       
  1900              genericStringForErrorCode(501),
       
  1901              ]
       
  1902         )
       
  1903         expErrs = [
       
  1904                 """Duplicate id="chips" in files: ('%s', '%s', '%s')""" \
       
  1905                     % (normalisePath('chips.dita'), normalisePath('eggs.dita'), normalisePath('spam.dita')),
       
  1906                 """Duplicate root id="chips" in files: ('%s', '%s', '%s')""" \
       
  1907                     % (normalisePath('chips.dita'), normalisePath('eggs.dita'), normalisePath('spam.dita')),
       
  1908             ]
       
  1909         myErrs = myO.errStrings(False, None)
       
  1910 #===============================================================================
       
  1911 #        for i in range(2):
       
  1912 #            if myErrs[i] != expErrs[i]:
       
  1913 #                print myErrs[i]
       
  1914 #                print expErrs[i]
       
  1915 #                print
       
  1916 #===============================================================================
       
  1917         self.assertEqual(myErrs, expErrs)
       
  1918         self.assertEqual(myO.errCountMap, {505: 1, 501: 1})
       
  1919     
       
  1920     def test_lonely_topics(self):
       
  1921         """DitaFileSet: Test a couple of lonely topics."""
       
  1922         myO = DitaFileSet(None, procDir=False)
       
  1923         myO._addFileObj(StringIO.StringIO('<spam id="spam"/>'), 'spam')
       
  1924         myO._addFileObj(StringIO.StringIO('<eggs id="eggs"/>'), 'eggs')
       
  1925         myO.finalise()
       
  1926         self.assertEqual(
       
  1927             myO.errStrings(False, None),
       
  1928             [
       
  1929              'Topic id="%s" is not referenced by any map' % normalisePath('eggs'),
       
  1930              'Topic id="%s" is not referenced by any map' % normalisePath('spam'),
       
  1931              ]
       
  1932         )
       
  1933         self.assertEqual(
       
  1934             myO.errStrings(True, None),
       
  1935             [
       
  1936                 genericStringForErrorCode(600),
       
  1937             ]
       
  1938         )
       
  1939 
       
  1940     def test_map_cycles_00(self):
       
  1941         """DitaFileSet: Cyclic references between two maps."""
       
  1942         myO = DitaFileSet(None, procDir=False)
       
  1943         myO._addFileObj(
       
  1944             StringIO.StringIO(
       
  1945 """<map id="map_00">
       
  1946     <topicref href="map_01.ditamap" format="ditamap" />
       
  1947 </map>"""
       
  1948             ),
       
  1949             'map_00.ditamap'
       
  1950         )
       
  1951         myO._addFileObj(
       
  1952             StringIO.StringIO(
       
  1953 """<map id="map_01">
       
  1954     <topicref href="map_00.ditamap" format="ditamap" />
       
  1955 </map>"""
       
  1956             ),
       
  1957             'map_01.ditamap'
       
  1958         )
       
  1959         myO.finalise()
       
  1960         #print 'HI test_map_cycles_00()'
       
  1961         #pprint.pprint(myO._retMapAdjList())
       
  1962         self.assertEqual(
       
  1963             myO.errStrings(False, None),
       
  1964             [
       
  1965                 'Maps "%s" are in a a cycle.' % str(
       
  1966                     (
       
  1967                      normalisePath('map_00.ditamap'),
       
  1968                      normalisePath('map_01.ditamap'),
       
  1969                      )
       
  1970                 ),
       
  1971                 'Maps "%s" are in a a cycle.' % str(
       
  1972                     (
       
  1973                      normalisePath('map_01.ditamap'),
       
  1974                      normalisePath('map_00.ditamap'),
       
  1975                      )
       
  1976                 ),
       
  1977             ]
       
  1978         )
       
  1979         #print
       
  1980         #pprint.pprint(myO.allErrStrings(False, None))
       
  1981         self.assertEqual(myO.allErrStrings(True, None), [genericStringForErrorCode(701)])
       
  1982         self.assertEqual(myO.errCountMap, {701 : 4})
       
  1983 
       
  1984     def test_map_cycles_01(self):
       
  1985         """DitaFileSet: Cyclic references between three maps."""
       
  1986         myO = DitaFileSet(None, procDir=False)
       
  1987         myO._addFileObj(
       
  1988             StringIO.StringIO(
       
  1989 """<map id="map_00">
       
  1990     <topicref href="map_01.ditamap" format="ditamap" />
       
  1991 </map>"""
       
  1992             ),
       
  1993             'map_00.ditamap'
       
  1994         )
       
  1995         myO._addFileObj(
       
  1996             StringIO.StringIO(
       
  1997 """<map id="map_01">
       
  1998     <topicref href="map_02.ditamap" format="ditamap" />
       
  1999 </map>"""
       
  2000             ),
       
  2001             'map_01.ditamap'
       
  2002         )
       
  2003         myO._addFileObj(
       
  2004             StringIO.StringIO(
       
  2005 """<map id="map_02">
       
  2006     <topicref href="map_00.ditamap" format="ditamap" />
       
  2007 </map>"""
       
  2008             ),
       
  2009             'map_02.ditamap'
       
  2010         )
       
  2011         myO.finalise()
       
  2012         #print 'HI test_map_cycles_00()'
       
  2013         #pprint.pprint(myO._retMapAdjList())
       
  2014         self.assertEqual(
       
  2015             myO.errStrings(False, None),
       
  2016             [
       
  2017                 'Maps "%s" are in a a cycle.' % str(
       
  2018                     (
       
  2019                      normalisePath('map_00.ditamap'),
       
  2020                      normalisePath('map_01.ditamap'),
       
  2021                      normalisePath('map_02.ditamap'),
       
  2022                      )
       
  2023                 ),
       
  2024                 'Maps "%s" are in a a cycle.' % str(
       
  2025                     (
       
  2026                      normalisePath('map_01.ditamap'),
       
  2027                      normalisePath('map_02.ditamap'),
       
  2028                      normalisePath('map_00.ditamap'),
       
  2029                      )
       
  2030                 ),
       
  2031                 'Maps "%s" are in a a cycle.' % str(
       
  2032                     (
       
  2033                      normalisePath('map_02.ditamap'),
       
  2034                      normalisePath('map_00.ditamap'),
       
  2035                      normalisePath('map_01.ditamap'),
       
  2036                      )
       
  2037                 ),
       
  2038             ]
       
  2039         )
       
  2040         self.assertEqual(myO.errStrings(True, None), [genericStringForErrorCode(701)])
       
  2041         self.assertEqual(myO.errCountMap, {701 : 6})
       
  2042 
       
  2043     def test_refarc_00(self):
       
  2044         """DitaFileSet: Test ref arcing - all resolve."""
       
  2045         myO = DitaFileSet(None, procDir=False)
       
  2046         myO._addFileObj(
       
  2047             StringIO.StringIO(
       
  2048 """<map id="map_00">
       
  2049     <topicref href="spam.dita#spam" />
       
  2050     <topicref href="eggs.dita#eggs" />
       
  2051 </map>"""
       
  2052             ),
       
  2053             'map.ditamap'
       
  2054         )
       
  2055         myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita')
       
  2056         myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'eggs.dita')
       
  2057         myO.finalise()
       
  2058         self.assertEqual(myO.errCountMap, {})
       
  2059         self.assertEqual(myO.allErrStrings(False, None), [])
       
  2060         self.assertEqual(myO.allErrStrings(True, None), [])
       
  2061         self.assertEqual(myO.errStrings(False, None), [])
       
  2062         self.assertEqual(myO.errStrings(True, None), [])
       
  2063 
       
  2064     def test_refarc_fail_00(self):
       
  2065         """DitaFileSet: Test ref arcing - can't find file."""
       
  2066         myO = DitaFileSet(None, procDir=False)
       
  2067         myO._addFileObj(
       
  2068             StringIO.StringIO(
       
  2069 """<map id="map_00">
       
  2070     <topicref href="spam_.dita" />
       
  2071     <topicref href="eggs_for_tea.dita" />
       
  2072 </map>"""
       
  2073             ),
       
  2074             'map.ditamap'
       
  2075         )
       
  2076         myO.finalise()
       
  2077         self.assertEqual(myO.errCountMap, {410: 2})
       
  2078         #print 'HI'
       
  2079         #pprint.pprint(myO.allErrStrings(False, None))
       
  2080         self.assertEqual(
       
  2081             myO.allErrStrings(False, None),
       
  2082             [
       
  2083                 'Can not resolve reference to file "%s"' % normalisePath('eggs_for_tea.dita'),
       
  2084                 'Can not resolve reference to file "%s"' % normalisePath('spam_.dita'),
       
  2085             ]
       
  2086         )
       
  2087         self.assertEqual(
       
  2088             myO.allErrStrings(True, None),
       
  2089             [
       
  2090                 'Can not resolve reference to file "..."',
       
  2091             ]
       
  2092         )
       
  2093         self.assertEqual(myO.errStrings(False, None), [])
       
  2094         self.assertEqual(myO.errStrings(True, None), [])
       
  2095 
       
  2096     def test_refarc_fail_01(self):
       
  2097         """DitaFileSet: Test ref arcing - can't find fragment."""
       
  2098         myO = DitaFileSet(None, procDir=False)
       
  2099         myO._addFileObj(
       
  2100             StringIO.StringIO(
       
  2101 """<map id="map_00">
       
  2102     <topicref href="spam.dita#spam_" />
       
  2103     <topicref href="eggs.dita#eggs_" />
       
  2104 </map>"""
       
  2105             ),
       
  2106             'map.ditamap'
       
  2107         )
       
  2108         myO._addFileObj(StringIO.StringIO('<spam id="spam"/>'), 'spam.dita')
       
  2109         myO._addFileObj(StringIO.StringIO('<eggs id="eggs"/>'), 'eggs.dita')
       
  2110         myO.finalise()
       
  2111         self.assertEqual(myO.errCountMap, {411: 2})
       
  2112         #print 'HI'
       
  2113         #pprint.pprint(myO.allErrStrings(False, None))
       
  2114         self.assertEqual(
       
  2115             myO.allErrStrings(False, None),
       
  2116             [
       
  2117                 'Can resolve reference to file "%s" but not to fragment "eggs_"' % normalisePath('eggs.dita'),
       
  2118                 'Can resolve reference to file "%s" but not to fragment "spam_"' % normalisePath('spam.dita'),
       
  2119             ]
       
  2120         )
       
  2121         self.assertEqual(
       
  2122             myO.allErrStrings(True, None),
       
  2123             [
       
  2124                 'Can resolve reference to file "%s" but not to fragment "%s"' % (GENERIC_STRING, GENERIC_STRING),
       
  2125             ]
       
  2126         )
       
  2127         self.assertEqual(myO.errStrings(False, None), [])
       
  2128         self.assertEqual(myO.errStrings(True, None), [])
       
  2129 
       
  2130     def test_refarc_url_00(self):
       
  2131         """DitaFileSet: Test ref arcing - URL."""
       
  2132         myO = DitaFileSet(None, procDir=False, testExt=True)
       
  2133         myO._addFileObj(
       
  2134             StringIO.StringIO(
       
  2135 """<map id="map_00">
       
  2136     <topicref href="spam.dita#spam" />
       
  2137     <topicref href="eggs.dita#eggs" />
       
  2138 </map>"""
       
  2139             ),
       
  2140             'map.ditamap'
       
  2141         )
       
  2142         myO._addFileObj(StringIO.StringIO("""<topic id="spam">
       
  2143         <xref href="http://www.nokia.com">Nokia</xref>
       
  2144 </topic>"""), 'spam.dita')
       
  2145         myO._addFileObj(StringIO.StringIO("""<topic id="eggs">
       
  2146         <xref href="http://www.google.com">Google</xref>
       
  2147 </topic>"""), 'eggs.dita')
       
  2148         myO.finalise()
       
  2149         #print 'HI'
       
  2150         #pprint.pprint(myO.allErrStrings(False, None))
       
  2151         self.assertEqual(myO.errCountMap, {})
       
  2152         self.assertEqual(
       
  2153             myO.allErrStrings(False, None),
       
  2154             [
       
  2155             ]
       
  2156         )
       
  2157         self.assertEqual(
       
  2158             myO.allErrStrings(True, None),
       
  2159             [
       
  2160             ]
       
  2161         )
       
  2162         self.assertEqual(myO.errStrings(False, None), [])
       
  2163         self.assertEqual(myO.errStrings(True, None), [])
       
  2164 
       
  2165 class TestDitaBookmapFileSet(unittest.TestCase):
       
  2166     def setUp(self):
       
  2167         pass
       
  2168     
       
  2169     def tearDown(self):
       
  2170         pass
       
  2171     
       
  2172     def testSetUpTearDown(self):
       
  2173         """TestDitaBookmapFileSet: test setUp() and tearDown()."""
       
  2174         pass
       
  2175     
       
  2176     def test_basic(self):
       
  2177         """TestDitaBookmapFileSet: Test reading a bookmap and a topic."""
       
  2178         myO = DitaFileSet(None, procDir=False)
       
  2179         myO._addFileObj(
       
  2180             StringIO.StringIO(
       
  2181 """<?xml version="1.0" encoding="utf-8"?>
       
  2182 <!DOCTYPE bookmap PUBLIC "-//OASIS//DTD DITA BookMap//EN"
       
  2183 "bookmap.dtd">
       
  2184 <bookmap id="GUID-5BDFDB6B-7801-4804-9F41-2BDC5BE53DDF">
       
  2185   <booktitle>
       
  2186     <mainbooktitle>My Bookmap</mainbooktitle>
       
  2187     <booktitlealt>Alternate title</booktitlealt>
       
  2188   </booktitle>
       
  2189   <frontmatter id="GUID-DA857913-F826-4CF7-A135-93F2AEB48353">
       
  2190     <topicref href="GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB.dita" id="GUID-994B1764-393F-401F-8571-CE0955AB6CA6" />
       
  2191   </frontmatter>
       
  2192 </bookmap>
       
  2193 """
       
  2194             ),
       
  2195             'bookmap.ditamap'
       
  2196         )
       
  2197         myO._addFileObj(StringIO.StringIO("""<?xml version="1.0" encoding="utf-8"?>
       
  2198 <!DOCTYPE concept  PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
       
  2199 <concept id="GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB" xml:lang="en">
       
  2200     <title>How to read and write a file</title>
       
  2201 </concept>
       
  2202 """), 'GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB.dita')
       
  2203         myO.finalise()
       
  2204         #print
       
  2205         #myO.debugDump()
       
  2206         #print 'HI'
       
  2207         #myO.writeErrors(False)
       
  2208         self.assertEqual(myO.allErrStrings(False, None), [])
       
  2209         self.assertEqual(myO.allErrStrings(True, None), [])
       
  2210         self.assertEqual(myO.errCountMap, {})
       
  2211 
       
  2212 class Special(unittest.TestCase):
       
  2213     pass
       
  2214 
       
  2215 def unitTest(theVerbosity=2):
       
  2216     suite = unittest.TestLoader().loadTestsFromTestCase(NullClass)
       
  2217     suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCountDict))
       
  2218     suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaId))
       
  2219     suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaRef))
       
  2220     suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaFile))
       
  2221     suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaFileSet))
       
  2222     suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaBookmapFileSet))
       
  2223     suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Special))
       
  2224     myResult = unittest.TextTestRunner(verbosity=theVerbosity).run(suite)
       
  2225     return (myResult.testsRun, len(myResult.errors), len(myResult.failures))
       
  2226 
       
  2227 ######################################
       
  2228 # main() stuff
       
  2229 ######################################
       
  2230 def main():
       
  2231     print 'CMD: %s' % ' '.join(sys.argv)
       
  2232     usage = "usage: %prog [options] <Directory of XML content>"
       
  2233     parser = OptionParser(usage, version='%prog ' + __version__)
       
  2234     parser.add_option("-d", action="store_true", dest="dump", default=False, 
       
  2235                       help="Dump internal representation. [default: %default]")
       
  2236     parser.add_option(
       
  2237             "-e", "--errors",
       
  2238             type="str",
       
  2239             dest="error_codes",
       
  2240             default='All',
       
  2241             help="Only report on certain error codes (space seperated list). [default: \"%default\"]"
       
  2242         )      
       
  2243     parser.add_option("-f", "--file", dest="file", type="str", default='None', 
       
  2244                       help="Report of errors by file either 'None', 'generic', 'specific'. [default: %default]")
       
  2245     parser.add_option("-g", action="store_true", dest="guid", default=False, 
       
  2246                       help="Enforce GUID specification. [default: %default]")
       
  2247     parser.add_option(
       
  2248             "-j", "--jobs",
       
  2249             type="int",
       
  2250             dest="jobs",
       
  2251             default=-1,
       
  2252             help="Max processes when multiprocessing. 0 takes CPUs, -1 no MP. [default: %default]"
       
  2253         )      
       
  2254     parser.add_option(
       
  2255             "-l", "--loglevel",
       
  2256             type="int",
       
  2257             dest="loglevel",
       
  2258             default=20,
       
  2259             help="Log Level (debug=10, info=20, warning=30, [error=40], critical=50) [default: %default]"
       
  2260         )      
       
  2261     parser.add_option(
       
  2262             "-p", "--pattern",
       
  2263             type="str",
       
  2264             dest="pattern",
       
  2265             default=FNMATCH_STRING,
       
  2266             help="Pattern match. [default: \"%default\"]"
       
  2267         )      
       
  2268     parser.add_option("-r", action="store_true", dest="recursive", default=False, 
       
  2269                       help="Recursive. [default: %default]")
       
  2270     parser.add_option("-s", action="store_true", dest="shelve", default=False, 
       
  2271                       help="Use the shelve dBase rather than storing the internal representation in memory. This is slower but is useful for large data sets where a memory error might occur. [default: %default]")
       
  2272     parser.add_option("-u", action="store_true", dest="unit_test", default=False, 
       
  2273                       help="Execute unit tests and exit. [default: %default]")
       
  2274     parser.add_option("-x", action="store_true", dest="ext_url", default=False, 
       
  2275                       help="Test external |URLs. [default: %default]")
       
  2276     parser.add_option("-?", action="store_true", dest="query_errors", default=False, 
       
  2277                       help="Display the error types that are detected. [default: %default]")
       
  2278     (options, args) = parser.parse_args()
       
  2279     logging.basicConfig(
       
  2280         level=options.loglevel,
       
  2281         format='%(asctime)s %(levelname)-8s %(message)s',
       
  2282         stream=sys.stdout,
       
  2283     )
       
  2284     if options.file not in ('None', 'generic', 'specific'):
       
  2285         parser.error("--file option must be: 'None' | 'generic' | 'specific'")
       
  2286         return 1
       
  2287     if options.unit_test:
       
  2288         unitTest()
       
  2289     if options.query_errors:
       
  2290         writeGenericStringsForErrorCodes()
       
  2291     if len(args) < 1 and not options.unit_test:
       
  2292         parser.print_help()
       
  2293         parser.error("I can't do much without a path to the XML content.")
       
  2294         return 1
       
  2295     elif len(args) == 1:
       
  2296         if options.jobs > -1:
       
  2297             myObj = retMpDitaFileSetObj(
       
  2298                         args[0],
       
  2299                         options.pattern.split(' '),
       
  2300                         options.recursive,
       
  2301                         options.jobs,
       
  2302                         options.ext_url,
       
  2303                         options.shelve,
       
  2304                         )
       
  2305         else:
       
  2306             myObj = DitaFileSet(args[0],
       
  2307                                 procDir=True,
       
  2308                                 thePatterns=options.pattern.split(' '),
       
  2309                                 recursive=options.recursive,
       
  2310                                 testExt=options.ext_url,
       
  2311                                 useDbase=options.shelve,
       
  2312                                 )
       
  2313             #print 'MyObj:', myObj
       
  2314         if options.dump:
       
  2315             myObj.debugDump()
       
  2316         myObj.writeStatistics()
       
  2317         myObj.writeErrorSummary()
       
  2318         #pprint.pprint(myObj.statsMap)
       
  2319         # TODO: Write out the results in different ways
       
  2320         errFilter = set(PROBLEM_CODE_FORMAT.keys())
       
  2321         if options.error_codes != 'All':
       
  2322             errFilter = set([int(i) for i in options.error_codes.split()])
       
  2323         if options.file == 'generic':
       
  2324             print 'Generic problems:'
       
  2325             myObj.writeErrors(True, errFilter)
       
  2326         elif options.file == 'specific':
       
  2327             print 'Specific problems:'
       
  2328             myObj.writeErrors(False, errFilter)
       
  2329     elif len(args) > 1:
       
  2330         parser.error("Too many arguments, I need only one.")
       
  2331         return 1
       
  2332     return 0
       
  2333 
       
  2334 if __name__ == '__main__':
       
  2335     multiprocessing.freeze_support()
       
  2336     sys.exit(main())