mpdot/linkcheck.py
changeset 4 468f4c8d3d5b
parent 3 d8fccb2cd802
--- a/mpdot/linkcheck.py	Fri Apr 23 20:47:58 2010 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2336 +0,0 @@
-# Copyright (c) 2007-2010 Nokia Corporation and/or its subsidiary(-ies) All rights reserved.
-# This component and the accompanying materials are made available under the terms of the License 
-# "Eclipse Public License v1.0" which accompanies this distribution, 
-# and is available at the URL "http://www.eclipse.org/legal/epl-v10.html".
-#
-# Initial Contributors:
-# Nokia Corporation - initial contribution.
-#
-# Contributors:
-#
-# Description:
-# Checks links in DITA XML and reports issues.
-"""
-Created on 12 Feb 2010
-
-@author: p2ross
-
-Definitions
-===========
-Doctype
--------
-See: http://www.w3.org/TR/2008/REC-xml-20081126/#dt-root
-Note: this is sometimes called the Doctype because of http://www.w3.org/TR/2008/REC-xml-20081126/#vc-roottype
-
-ID
---
-The value of the 'id' attribute of an element.
-
-Root ID
--------
-The value of the 'id' attribute of the root element.
-Note: A development would allow differently named attributes provided that they
-were ID types. See http://www.w3.org/TR/2008/REC-xml-20081126/#sec-attribute-types
-for validity constraints for ID types.
-
-Reference
----------
-The value of the href attribute of an element.
-
-Map
----
-An XML file whose root element name is 'map' or ends with 'Map'.   
-
-Topic
------
-An XML file that is not a Map.
-
-Lonely topic
-------------
-A topic whose root ID is not referenced by any map. 
-
-Lonely map
-----------
-A map whose root ID is not referenced by any map. 
-
-Map Cycle
----------
-A sequence of map references whose members are not unique.
-
-"""
-
-import os
-import unittest
-import sys
-import logging
-import pprint
-import fnmatch
-import re
-import urllib
-import time
-from optparse import OptionParser, check_choice
-try:
-    from xml.etree import cElementTree as etree
-except ImportError:
-    from xml.etree import ElementTree as etree
-import urlparse
-import multiprocessing
-# used for DitaFileObj persistence
-import shelve
-
-__version__ = '0.1.5'
-
-class ExceptionLinkCheck(Exception):
-    pass
-
-class CountDict(dict):
-    """Dictionary with a default value of 0 for unknown keys."""
-    def __getitem__(self, key):
-        if key not in self: 
-            self[key] = 0
-        return self.get(key)
-
-# Matches stuff like: GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E
-RE_GUID = re.compile(r'GUID-[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}', re.IGNORECASE)
-
-# Of the form {integer_error_code : (format_string, num_args), ...}
-PROBLEM_CODE_FORMAT = {
-    # 'id_syntax'
-    100 : ('Character \'#\' not allowed in id="%s"', 1),
-    101 : ('NMTOKEN character \'%s\' not allowed in id="%s"', 2),
-    102 : ('GUID specification does not match id="%s"', 1),
-    # 'ref_syntax'
-    200 : ('Multiple \'#\' not allowed in reference "%s"', 1),
-    201 : ('Reference element "%s" is missing href=... attribute', 1),
-    202 : ('URL has missing type/format in reference "%s"', 1),
-    203 : ('GUID specification does not match file reference "%s"', 1),
-    204 : ('GUID specification does not match fragment reference "%s"', 1),
-    # 'ref'
-    300 : ('Can not resolve URI "%s"', 1),
-    # 'file'
-    400 : ('Failed to open: "%s"', 1),
-    401 : ('Multiple id="%s"', 1),
-    402 : ('No id attribute on root element', 0),
-    403 : ('Root ID in cycle: %s', 1),
-    404 : ('Can not parse: "%s"', 1),
-    410 : ('Can not resolve reference to file "%s"', 1),
-    411 : ('Can resolve reference to file "%s" but not to fragment "%s"', 2),
-    412 : ('Referencing element "%s" does not match target root element "%s"', 2),
-    413 : ('Referencing element "%s" does not match target element "%s" for id="%s"', 3),
-    414 : ('topicref element with format="ditamap" does not match target root element "%s"', 1),
-    415 : ('topicref to <map> does not have format="ditamap" but format="%s"', 1),
-    416 : ('topicref element type="%s" does not match target root element "%s"', 2),
-    417 : ('topicref element type="%s" does not match target element "%s" for id="%s"', 3),
-    418 : ('Unknown referencing element "%s" does not match target root element "%s"', 2),
-    419 : ('Unknown referencing element "%s" does not match target element "%s" for id="%s"', 3),
-    # 'file_set'
-    500 : ('Not a directory: %s', 1),
-    501 : ('Duplicate root id="%s" in files: %s', 2), 
-    #502 : ('Can not resolve reference to "%s"', 1),
-    #503 : ('Reference type "%s" does not match target type "%s" for id="%s"', 3),
-    504 : ('Duplicate file path: "%s"', 1),
-    505 : ('Duplicate id="%s" in files: %s', 2),
-    # 'topic_set'
-    600 : ('Topic id="%s" is not referenced by any map', 1), 
-    # 'map_set'
-    700 : ('More than one top level map exists: %s', 1),  
-    701 : ('Maps "%s" are in a a cycle.', 1),
-}
-
-GENERIC_STRING = '...'
-PRINT_WIDTH = 75
-
-def genericStringForErrorCode(ec):
-    assert(PROBLEM_CODE_FORMAT.has_key(ec))
-    f, c = PROBLEM_CODE_FORMAT[ec]
-    if c == 0:
-        return f
-    return f % ((GENERIC_STRING,) * c)
-
-def writeGenericStringsForErrorCodes(s=sys.stdout):
-    s.write(' All Error Codes '.center(PRINT_WIDTH, '='))
-    s.write('\n')
-    s.write('%4s  %s\n' % ('Code', 'Error'))
-    s.write('%4s  %s\n' % ('----', '-----'))
-    ecS = PROBLEM_CODE_FORMAT.keys()
-    ecS.sort()
-    for ec in ecS:
-        s.write('%4d  %s\n' % (ec, genericStringForErrorCode(ec)))
-    s.write('='*PRINT_WIDTH)
-    s.write('\n\n')
-
-def normalisePath(thePath):
-    # TODO: How come this does not work?
-    #return os.path.abspath(thePath)
-    return os.path.abspath(thePath).replace('\\', '/')
-
-FNMATCH_PATTERNS = ['*.xml', '*.dita', '*.ditamap']
-FNMATCH_STRING = ' '.join(FNMATCH_PATTERNS)
-
-# These elements descend from topic/xref so can be treated as referencing elements
-XREF_DESCENDENTS = set(
-    (
-        # From the api specialisation
-        'apiRelation',
-        'apiBaseClassifier',
-        'apiOtherClassifier',
-        'apiOperationClassifier',
-        'apiValueClassifier',
-        # From the C++ specialisation
-        'cxxfile',
-        'cxxclass',
-        'cxxstruct',
-        'cxxunion',
-        'cxxfunction',
-        'cxxdefine',
-        'cxxtypedef',
-        'cxxvariable',
-        'cxxenumeration',
-        'cxxClassBaseClass',
-        'cxxClassBaseStruct',
-        'cxxClassBaseUnion',
-        'cxxClassNestedClass',
-        'cxxClassNestedStruct',
-        'cxxClassNestedUnion',
-        'cxxClassEnumerationInherited',
-        'cxxClassEnumeratorInherited',
-        'cxxClassFunctionInherited',
-        'cxxClassVariableInherited',
-        'cxxDefineReimplemented',
-        'cxxEnumerationReimplemented',
-        'cxxFunctionReimplemented',
-        'cxxStructBaseClass',
-        'cxxStructBaseStruct',
-        'cxxStructBaseUnion',
-        'cxxStructNestedClass',
-        'cxxStructNestedStruct',
-        'cxxStructNestedUnion',
-        'cxxStructEnumerationInherited',
-        'cxxStructEnumeratorInherited',
-        'cxxStructFunctionInherited',
-        'cxxStructVariableInherited',
-        'cxxTypedefReimplemented',
-        'cxxUnionBaseClass',
-        'cxxUnionBaseStruct',
-        'cxxUnionBaseUnion',
-        'cxxUnionNestedClass',
-        'cxxUnionNestedStruct',
-        'cxxUnionNestedUnion',
-        'cxxUnionEnumerationInherited',
-        'cxxUnionFunctionInherited',
-        'cxxUnionVariableInherited',
-        'cxxVariableReimplemented',
-    )
-)
-
-class UrlAccessCache(object):
-    def __init__(self):
-        # {URL : True/False, ...}
-        self._cache = {}
-        
-    def clear(self):
-        self._cache = {}
-        
-    def canAccess(self, theUrl):
-        if not self._cache.has_key(theUrl):
-            try:
-                u = urllib.urlopen(theUrl)#, data, proxies)
-                u.read()
-                self._cache[theUrl] = True
-                logging.debug('URL: %s  for %s' % (True, theUrl))
-            except IOError:
-                self._cache[theUrl] = False
-                logging.debug('URL: %s for %s' % (False, theUrl))
-        return self._cache[theUrl]
-
-GlobalUrlCache = UrlAccessCache()
- 
-class DitaLinkCheckBase(object):
-    """Base class that holds some common functionality."""
-    def __init__(self, theIdentity):#=None):
-        self.__identity = theIdentity
-        # Set of error strings, lazily evaluated
-        self._errS = None
-    
-    @property
-    def identity(self):
-        return self.__identity
-    
-    def __cmp__(self, other):
-        assert(self.identity is not None)
-        assert(other.identity is not None)
-        return cmp(self.identity, other.identity)
-
-    def __eq__(self, other):
-        assert(self.identity is not None)
-        assert(other.identity is not None)
-        return self.identity == other.identity
-
-    def __hash__(self):
-        assert(self.identity is not None)
-        return hash(self.identity)
-    
-    def __str__(self):
-        return str(self.__identity)
-
-    def debugDump(self, s=sys.stdout, prefix=''):
-        """Dump of IR for debug purposes."""
-        raise NotImplementedError
-    
-    def addError(self, errCode, argTuple):
-        assert(errCode in PROBLEM_CODE_FORMAT.keys()), 'No error code: %s' % errCode
-        assert(PROBLEM_CODE_FORMAT[errCode][1] == len(argTuple)), \
-            'Length missmatch for error code %d: %d != %d for %s' \
-            % (errCode, PROBLEM_CODE_FORMAT[errCode][1], len(argTuple), str(argTuple))
-        if self._errS is None:
-            self._errS = {}
-        try:
-            self._errS[errCode].add(argTuple)
-        except KeyError:
-            self._errS[errCode] = set((argTuple,))
-
-    def errStrings(self, generic, theFilter):
-        """Return a sorted list of error messages without duplicates."""
-        if self._errS is not None:
-            mySet = set()
-            for ec in self._errS.keys():
-                if theFilter is None or ec in theFilter:
-                    assert(ec in PROBLEM_CODE_FORMAT.keys())
-                    for tu in self._errS[ec]:
-                        if generic:
-                            mySet.add(genericStringForErrorCode(ec))
-                        else:
-                            f, c = PROBLEM_CODE_FORMAT[ec]
-                            assert(len(tu) == c)
-                            mySet.add(f % tu)
-            l = list(mySet)
-            l.sort()
-            return l
-        return []
-    
-    def updateErrorCount(self, theMap):
-        """Updates a map of {error_code, : count, ...}.
-        Overridden for file and file set."""
-        if self._errS is not None:
-            for e in self._errS.keys():
-                theMap[e] += len(self._errS[e])
-    
-    def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout):
-        """Can be overridden in child classes to recurse into
-        their data structures."""
-        theStream.write('\n'.join(self.errStrings(isGeneric, theFilter)))
-    
-class DitaId(DitaLinkCheckBase):
-    """Represents a node with an id."""
-    def __init__(self, theN):
-        assert(theN.get('id', None) is not None)
-        super(DitaId, self).__init__(theN.get('id', None))
-        self._elem = theN.tag
-        if '#' in self.id:
-            self.addError(100, (self.id,))
-        # TODO: NMTOKENS
-    
-    @property
-    def elem(self):
-        return self._elem
-
-    @property
-    def id(self):
-        return self.identity
-
-    def checkGuid(self):
-        """optionally applies additional checks for GUID requirements."""
-        if RE_GUID.match(self.id) is None:
-            self.addError(102, (self.id,))
-
-    def debugDump(self, s=sys.stdout, prefix=''):
-        """Dump of IR for debug purposes."""
-        s.write('%sID:  <%s id="%s" />\n' % (prefix, self.elem, self.id))
-        
-class DitaRef(DitaLinkCheckBase):
-    """Represents a reference node."""
-    def __init__(self, theN):
-        self._elem = theN.tag
-        self._href = theN.get('href', None)
-        super(DitaRef, self).__init__('%s %s' % (self._elem, self._href))
-        # This is used when figuring out of the target is the correct element
-        # e.g. in Vanilla DITA
-        # <topicref href="batcaring.dita" type="task"></topicref>
-        self._refType = theN.get('type', None)
-        # Format attribute, this can be format="ditamap"
-        self._format = theN.get('format', None)
-        if self._href is None:
-            self.addError(201, (self._elem,))
-            self._url = None
-        else:
-            self._url = urlparse.urlparse(self._href)
-            if '#' in self._url.fragment:
-                self.addError(200, (self._href,))
-
-    @property
-    def elem(self):
-        return self._elem
-
-    @property
-    def href(self):
-        """The value of the href attribute."""
-        return self._href
-    
-    @property
-    def refType(self):
-        """The value of the type attribute."""
-        return self._refType
-    
-    @property
-    def format(self):
-        """The value of the format attribute."""
-        return self._format
-    
-    @property
-    def path(self):
-        """The value of the path part of the href attribute."""
-        return self._url.path
-        
-    @property
-    def fragment(self):
-        """The value of the fragment part of the href attribute."""
-        return self._url.fragment
-        
-    @property
-    def scheme(self):
-        """The URI scheme e.g. 'http' or '' if no scheme."""
-        return self._url.scheme
-    
-    def fileFragment(self, theRefFile):                               
-        """The absolute path of the file and the fragment identifier or (None, None)."""
-        if self.scheme not in ('', 'file'):
-            return (None, None)
-        if len(self.path) == 0:
-            myPath = theRefFile
-        else:
-            myPath = os.path.join(os.path.dirname(theRefFile), self.path)
-        return normalisePath(myPath), self.fragment
-    
-    def checkGuid(self):
-        """optionally applies additional checks for GUID requirements."""
-        if RE_GUID.match(self.path) is None:
-            self.addError(203, (self.path,))
-        if RE_GUID.match(self.fragment) is None:
-            self.addError(204, (self.fragment,))                
-
-    def checkUrl(self):
-        if self.scheme:
-            myU = urlparse.urlunparse(self._url)
-            if not GlobalUrlCache.canAccess(myU):
-                self.addError(300, (myU,))
-
-    def debugDump(self, s=sys.stdout, prefix=''):
-        """Dump of IR for debug purposes."""
-        s.write('%sREF: <%s href="%s" />\n' % (prefix, self.elem, self._href))
-
-class DitaFileObj(DitaLinkCheckBase):
-    """Base class for a DITA topic or map."""
-    def __init__(self, theFileObj, theFileName=None):
-        """Initialiser with a file object and a file path"""
-        #print '\nDitaFileObj(%s, %s)' % (theFileObj, theFileName)
-        if theFileName is not None:
-            super(DitaFileObj, self).__init__(normalisePath(theFileName))
-        elif theFileObj is not None:
-            super(DitaFileObj, self).__init__(theFileObj.name)
-        else:
-            super(DitaFileObj, self).__init__(None)
-        self._rootId = None
-        self._doctype = None
-        # Sets of class DitaId
-        self._idS = set()
-        self._dupeIdS = set()
-        # Set of class DitaRef
-        self._xrefS = set()
-        # Ouptut control
-        self._hasWritten = False
-        # Size of input
-        try:
-            self._bytes = os.path.getsize(theFileName)
-        except Exception:
-            # Try as if a StringIO
-            try:
-                self._bytes = theFileObj.len
-            except AttributeError:
-                # Give up
-                self._bytes = 0
-        # Process the file object
-        if theFileObj is not None:
-            try:
-                # TODO: use iterparse?
-                theTree = etree.parse(theFileObj)
-            except SyntaxError, err:
-                self.addError(404, (str(err),))
-            else:
-                # Walk the tree
-                for i, e in enumerate(theTree.getiterator()):
-                    #print 'TRACE: e', e
-                    # Element [0] is the root element
-                    if i == 0:
-                        assert(self._rootId is None)
-                        assert(self._doctype is None)
-                        self._doctype = e.tag
-                        if e.get('id', None) is not None:
-                            self._rootId = DitaId(e)
-                            self._addId(self._rootId)
-                        else:
-                            self.addError(402, ())
-                    else:
-                        # NOTE: Elements with id attributes can also have href
-                        # attributes. For example a <topicref> in a <bookmap>
-                        # Thus these tests are not exclusive
-                        if e.get('id', None) is not None:
-                            self._addId(DitaId(e))
-                        if e.get('href', None) is not None:
-                            # TODO: Do we limit ourselves to only a certain set of elements?
-                            self._xrefS.add(DitaRef(e))
-        else:
-            self.addError(400, (self.identity,))
-    
-    def _addId(self, theId):
-        #print 'TRACE: adding %s' % theId
-        #print 'TRACE: self._idS %s' % self._idS
-        if theId in self._idS:
-            # Remove from self._idS
-            #print 'TRACE: removing %s' % theId
-            self._idS.remove(theId)
-            self._dupeIdS.add(theId)
-            self.addError(401, (theId.identity,))
-        elif theId not in self._dupeIdS:
-            self._idS.add(theId)
-    
-    @property
-    def bytes(self):
-        return self._bytes
-    
-    @property
-    def doctype(self):
-        return self._doctype
-    
-    @property
-    def rootId(self):
-        if self._rootId is not None:
-            return self._rootId.id
-    
-    @property
-    def isMap(self):
-        return self.doctype == "map" \
-        or self.doctype == 'bookmap' \
-        or (self.doctype is not None and self.doctype.endswith('Map'))
-    
-    @property
-    def idS(self):
-        """The set of IDs."""
-        return self._idS
-    
-    @property
-    def refS(self):
-        """The set of DitaRef objects."""
-        return self._xrefS
-    
-    def idElemMap(self):
-        """Returns a map {id : elem name, ...}."""
-        retVal = {}
-        for anId in self._idS:
-            retVal[anId.id] = anId.elem
-        return retVal
-    
-    def hasId(self, theString):
-        for anId in self._idS:
-            if theString == anId.id:
-                return True
-        return False
-
-    def idElem(self, theString):
-        for anId in self._idS:
-            if theString == anId.id:
-                return anId.elem
-        return None
-
-    def idObj(self, theString):
-        for anId in self._idS:
-            if theString == anId.id:
-                return anId
-        return None
-
-    def updateErrorCount(self, theMap):
-        """Updates a map of {error_code, : count, ...}."""
-        if self._errS is not None:
-            for e in self._errS.keys():
-                theMap[e] += len(self._errS[e])
-        for idObj in self.idS:
-            idObj.updateErrorCount(theMap)
-        for refObj in self.refS:
-            refObj.updateErrorCount(theMap)
-    
-    def writeErrorList(self, theList, theSubHead='', theS=sys.stdout):
-        if len(theList) > 0:
-            theList.sort()
-            if not self._hasWritten:
-                theS.write('File: %s\n' % self.identity)
-            self._hasWritten = True
-            if len(theSubHead) > 0:
-                theS.write('%s [%d]:\n' % (theSubHead, len(theList)))
-            theS.write('\n'.join(theList))
-            theS.write('\n')
-    
-    def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout):
-        """Writes out errors for me, my IDs and my Refs."""
-        self._hasWritten = False
-        self.writeErrorList(self.errStrings(isGeneric, theFilter), 'File errors:', theStream)
-#===============================================================================
-#        # Duplicate IDs
-#        myList = (list(self._dupeIdS))
-#        if len(myList):
-#            self.writeErrorList(
-#                    [i.identity for i in myList],
-#                    'Duplicate ID',
-#                    theStream)
-#===============================================================================
-        # Now IDs
-        myList = (list(self.idS))
-        myList.sort()
-        for anId in myList:
-            self.writeErrorList(anId.errStrings(isGeneric, theFilter), 'ID=%s' % anId.identity, theStream)
-        # Now Refs
-        myList = (list(self._xrefS))
-        myList.sort()
-        for anId in myList:
-            self.writeErrorList(anId.errStrings(isGeneric, theFilter), 'Ref=%s' % anId.identity, theStream)
-        if self._hasWritten:
-            theStream.write('\n')
-    
-    def debugDump(self, s=sys.stdout, prefix=''):
-        """Dump of IR for debug purposes."""
-        s.write('%sFile: %s\n' % (prefix, self.identity))
-        for anId in self._idS:
-            anId.debugDump(s, prefix=prefix+'  ')
-        for aRef in self._xrefS:
-            aRef.debugDump(s, prefix=prefix+'  ')
-    
-class DitaFilePath(DitaFileObj):
-    """Base class for a DITA topic or map from the file system."""
-    def __init__(self, theFilePath):
-        """Initialiser with a file path"""
-        try:
-            f = open(theFilePath)
-        except IOError:
-            f = None
-        #print 'DitaFilePath(%s)' % theFilePath
-        super(DitaFilePath, self).__init__(f, theFilePath)
-        if f is None:
-            self.addError(400, (theFilePath,))
-            
-            
-class DitaFileMapBase(object):
-    """Base class for holding a map of {file path : class DitaFile, ...}
-    Actual implementation can be in-memory or via a database e.g. the
-    shelve module."""
-    def keys(self):
-        """Returns an unsorted list of keys in the map."""
-        raise NotImplementedError()
-    
-    def has_key(self, thePath):
-        """Return True if the key exists."""
-        raise NotImplementedError()
-    
-    def remove(self, thePath):
-        """Remove the entry corresponding to thePath, may raise KeyError."""
-        raise NotImplementedError()
-    
-    def getDitaFileObj(self, thePath):
-        """Return a DitaFileObj that corresponds to thePath, may raise KeyError."""
-        raise NotImplementedError()
-        
-    def setDitaFileObj(self, thePath, theObj):
-        """Load a DitaFileObj or update a mutated DitaFileObj."""
-        raise NotImplementedError()
-        
-class DitaFileMapInMemory(DitaFileMapBase):
-    """Holds map of {file path : class DitaFile, ...} in memory."""
-    def __init__(self):
-        # Map of {file path : class DitaFile, ...}
-        self._fileMap = {}
-    
-    def keys(self):
-        """Returns an unsorted list of keys in the map."""
-        return self._fileMap.keys()
-    
-    def has_key(self, thePath):
-        """Return True if the key exists."""
-        return self._fileMap.has_key(thePath)
-        
-    def remove(self, thePath):
-        """Remove the entry corresponding to thePath, may raise KeyError."""
-        del self._fileMap[thePath]
-    
-    def getDitaFileObj(self, thePath):
-        """Return a DitaFileObj that corresponds to thePath, may raise KeyError."""
-        return self._fileMap[thePath]
-        
-    def setDitaFileObj(self, thePath, theObj):
-        """Load a DitaFileObj or update a mutated DitaFileObj."""
-        self._fileMap[thePath] = theObj
-        
-class DitaFileMapShelve(DitaFileMapBase):
-    """Holds map of {file path : class DitaFile, ...} in a shelve database."""
-    DBASE_FILENAME = 'linkchecker.dbase'
-    def __init__(self):
-        if os.path.exists(self.DBASE_FILENAME):
-            os.remove(self.DBASE_FILENAME)
-        self._db = shelve.open(self.DBASE_FILENAME)
-        # Use this as a 'cache' as shelf.keys() is slow
-        self._keys = set()
-    
-    def keys(self):
-        """Returns an unsorted list of keys in the map."""
-        return list(self._keys)
-    
-    def has_key(self, thePath):
-        """Return True if the key exists."""
-        return thePath in self._keys
-        
-    def remove(self, thePath):
-        """Remove the entry corresponding to thePath, may raise KeyError."""
-        del self._db[thePath]
-        self._keys.remove(thePath)
-    
-    def getDitaFileObj(self, thePath):
-        """Return a DitaFileObj that corresponds to thePath, may raise KeyError."""
-        return self._db[thePath]
-        
-    def setDitaFileObj(self, thePath, theObj):
-        """Load a DitaFileObj or update a mutated DitaFileObj."""
-        self._db[thePath] = theObj
-        self._keys.add(thePath)
-        
-class DitaFileSet(DitaLinkCheckBase):
-    """Holds information about a set of DITA files."""
-    STATS_KEYS = ('Maps', 'Non-maps', 'Files', 'Bytes', 'IDs', 'Refs')
-    def __init__(self,
-                 theDir,
-                 procDir=True,
-                 thePatterns=None,
-                 recursive=False,
-                 testExt=False,
-                 useDbase=False):
-        """Constructor. theDir is the root directory of DITA XML.
-        procDir - If True then process this directory immediately, otherwise
-                    the directory can be processed independently and
-                    _addFileObj() or _addDitaFileObj() invoked.
-        thePatterns - If supplied this should be a space separated string of
-                        fnmatch extensions.
-        recursive - If True and procDir True the directory is processed recursively.
-        testExt - If True then test external URLs.
-        useDbase - If True then store all DitaFile objects in an external dbase
-                    (slower but less memory issues).
-        """
-        if thePatterns is None:
-            thePatterns = FNMATCH_STRING.split(' ')
-        if theDir is not None:
-            theDir = normalisePath(theDir)
-        super(DitaFileSet, self).__init__(theDir)
-        logging.info('DitaFileSet starting to read...')
-        GlobalUrlCache.clear()
-        self._testExt = testExt
-        # Set up how we store the DitaFile objects
-        if useDbase:
-            self._fileMap = DitaFileMapShelve()
-        else:
-            self._fileMap = DitaFileMapInMemory()
-        # Map of (str(rootId) : filepath, ...) with no duplicates
-        # Keys will be in self._uniqueRootIds
-        self._rootIdToFilePathMap = {}
-        # Path to the unique DITA map
-        self._uniqueMapPath = None
-        # Count of {error_code : count, ...}
-        self._errCountMap = CountDict()
-        # Statistics
-        self._statsMap = CountDict()
-        ## and initialise
-        #for k in self.STATS_KEYS:
-        #    self._statsMap[k]
-        # Finalisation control (weak)
-        self._hasFinalised = False
-        # Timers
-        self._timeRead = time.clock()
-        self._timeAnalyse = 0.0
-        if procDir:
-            if theDir is not None and os.path.isdir(theDir):
-                self._readDir(theDir, thePatterns, recursive)
-            else:
-                self.addError(500, (theDir,))
-            # Finalise and run all the tests
-            self.finalise()
-    
-    @property
-    def errCountMap(self):
-        return self._errCountMap
-    
-    @property
-    def statsMap(self):
-        return self._statsMap
-    
-    def writeStatistics(self, s=sys.stdout):
-        """Writes out read statistics."""
-        s.write(' Statistics '.center(PRINT_WIDTH, '='))
-        s.write('\n')
-        if len(self._statsMap) > 0:
-            o = self.STATS_KEYS
-            #assert(set(o) == set(self._statsMap.keys())), \
-            #    '%s != %s' % (o, self._statsMap.keys())
-            for k in o:
-                try:
-                    m = self._statsMap[k] / (1024.0*1024.0)
-                    s.write('%20s: %10d [%10.3f M]\n' % (k, self._statsMap[k], m))
-                except KeyError:
-                    s.write('%20s: %10s \n' % (k, 'Not seen'))
-            s.write('%20s: %10.3f (s)\n' % ('Read time', self._timeRead))
-            s.write('%20s: %10.3f (s)\n' % ('Analysis time', self._timeAnalyse))
-            s.write('='*PRINT_WIDTH)
-        else:
-            s.write('Nothing processed.')
-        s.write('\n')
-        
-    def writeErrorSummary(self, s=sys.stdout):
-        s.write(' Error Summary '.center(PRINT_WIDTH, '='))
-        s.write('\n')
-        if len(self._errCountMap):
-            s.write('%4s %10s %s\n' % ('Code', 'Count', 'Error'))
-            s.write('%4s %10s %s\n' % ('----', '-----', '-----'))
-            errCodeS = self._errCountMap.keys()
-            errCodeS.sort()
-            for c in errCodeS:
-                s.write('%4d %10d %s\n' \
-                        % (c, self._errCountMap[c], genericStringForErrorCode(c)))
-        else:
-            s.write('No errors\n')            
-        s.write('='*PRINT_WIDTH)
-        s.write('\n')
-        
-    def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout):
-        """Writes out errors for me and my files."""
-        theStream.write('\n'.join(self.errStrings(isGeneric, theFilter)))
-        fileS = self._fileMap.keys()
-        fileS.sort()
-        for aFile in fileS:
-            # Immutable call so just use get
-            self._fileMap.getDitaFileObj(aFile).writeErrors(isGeneric, theFilter, theStream)
-        
-    def allErrStrings(self, isGeneric, theFilter):
-        """Return a sorted list of error messages without duplicates including
-        files."""
-        retSet = set(self.errStrings(isGeneric, theFilter))
-        fileS = self._fileMap.keys()
-        fileS.sort()
-        for aFilePath in self._fileMap.keys():
-            # Immutable call so just use get
-            for anErr in self._fileMap.getDitaFileObj(aFilePath).errStrings(isGeneric, theFilter): 
-                retSet.add(anErr)
-        retList = list(retSet)
-        retList.sort()
-        return retList
-            
-    def _readDir(self, theDir, thePatS, recursive):    
-        assert(os.path.isdir(theDir))
-        for aName in os.listdir(theDir):
-            aPath = os.path.join(theDir, aName)
-            if os.path.isdir(aPath) and recursive:
-                self._readDir(aPath, thePatS, recursive)
-            elif os.path.isfile(aPath):
-                for aPat in thePatS:
-                    if fnmatch.fnmatch(aName, aPat):
-                        assert(not self._fileMap.has_key(aPath))
-                        logging.debug(' Reading %s' % aPath)
-                        try:
-                            f = open(aPath)
-                        except IOError:
-                            f = None
-                        self._addFileObj(f, aPath)
-                        break
-
-    def _addFileObj(self, theFileObj, theFilePath):
-        myObj = DitaFileObj(theFileObj, theFilePath)
-        self._addDitaFileObj(myObj)
-
-    def _addDitaFileObj(self, theDitaFileObj):
-        if self._fileMap.has_key(theDitaFileObj.identity):
-            self.addError(504, (theDitaFileObj.identity,))
-        else:
-            # Mutable call so use set
-            self._fileMap.setDitaFileObj(theDitaFileObj.identity, theDitaFileObj)
-        # Update statistics (files, bytes, ids, refs) etc.
-        self._statsMap['Files'] += 1
-        self._statsMap['Bytes'] += theDitaFileObj.bytes
-        self._statsMap['IDs'] += len(theDitaFileObj.idS)
-        self._statsMap['Refs'] += len(theDitaFileObj.refS)
-        if theDitaFileObj.isMap:
-            self._statsMap['Maps'] += 1
-        else:
-            self._statsMap['Non-maps'] += 1
-    
-    def finalise(self):
-        """Creates the environment for all checks and then runs them."""
-        logging.info('DitaFileSet.finalise() start...')
-        if not self._hasFinalised:
-            self._timeRead = time.clock() - self._timeRead
-            self._timeAnalyse = time.clock()
-            self._initRootIdToFilePathMap()
-            self._checkDupeIdS()
-            self._setMapCycles()
-            self._checkLonely()
-            self._checkRefArcs()
-            self._errCountMap = CountDict()
-            self.updateErrorCount(self._errCountMap)
-            self._hasFinalised = True
-            self._timeAnalyse = time.clock() - self._timeAnalyse
-        logging.info('DitaFileSet.finalise() done.')
-        
-    def _initRootIdToFilePathMap(self):
-        # Map of (str(rootId) : filepath, ...) with no duplicates
-        self._rootIdToFilePathMap = {}
-        # Temporary map of (str(rootId) : [filepath, ...], ...)
-        myDupeIdFiles = {}
-        for fPath in self._fileMap.keys():
-            # fObj is not written to so we don't need to use set
-            fObj = self._fileMap.getDitaFileObj(fPath)
-            #print 'TRACE: _initRootIdToFilePathMap() fPath:', fPath
-            rId = fObj.rootId
-            if rId is not None:
-                if myDupeIdFiles.has_key(rId):
-                    #print 'TRACE: _initRootIdToFilePathMap() another dupe:', fPath
-                    myDupeIdFiles[rId].append(fObj.identity)
-                elif self._rootIdToFilePathMap.has_key(rId):
-                    #print 'TRACE: _initRootIdToFilePathMap() first dupe:', fPath
-                    # Remove from map and add to myDupeIdFiles
-                    myFile = self._rootIdToFilePathMap.pop(rId)
-                    try:
-                        myDupeIdFiles[rId].append(myFile)
-                    except KeyError:
-                        myDupeIdFiles[rId] = [myFile,]
-                    myDupeIdFiles[rId].append(fPath)
-                else:
-                    #print 'TRACE: _initRootIdToFilePathMap() adding:', fPath
-                    self._rootIdToFilePathMap[rId] = fObj.identity
-        # Set duplicate errors
-        for k in myDupeIdFiles.keys():
-            myDupeIdFiles[k].sort()
-            self.addError(501, (k, tuple(myDupeIdFiles[k])))
-            #self.addError(501, (k, str([str(a) for a in myDupeIdFiles[k]])))
-    
-    def _checkDupeIdS(self):
-        """Checks if there are any duplicate IDs anywhere."""
-        # {ID : [fileS, ...], ...}
-        myDupeIdMap = {}
-        # Temporary data structure
-        # {ID : first file ID is seen in, ...}
-        seenIdMap = {}
-        for f in self._fileMap.keys():
-            # o is not written to so we don't need set...
-            o = self._fileMap.getDitaFileObj(f)
-            for anId in o.idS:
-                if seenIdMap.has_key(anId):
-                    try:
-                        myDupeIdMap[anId].append(f)
-                    except KeyError:
-                        myDupeIdMap[anId] = [seenIdMap[anId],]
-                        myDupeIdMap[anId].append(f)
-                else:
-                    seenIdMap[anId] = f
-        # Now add to errs as a 505 error message
-        # Sort the files in the map
-        for k in myDupeIdMap.keys():
-            myDupeIdMap[k].sort()
-            self.addError(505, (k, tuple(myDupeIdMap[k])))
-            #self.addError(505, (k, str([str(a) for a in myDupeIdMap[k]])))
-                    
-    def _retMapAdjList(self):
-        """Create an adjacency list {file_path : set(refs), ...} (all strings)"""
-        adjList = {}
-        for f in self._fileMap.keys():
-            fObj = self._fileMap.getDitaFileObj(f)
-            if fObj.isMap:# and fObj.rootId is not None:
-                assert(fObj.identity not in adjList.keys())
-                refSet = set()
-                for r in fObj.refS:
-                    refSet.add(r.fileFragment(fObj.identity)[0])
-                adjList[fObj.identity] = refSet
-        return adjList
-
-    def _setMapCycles(self):
-        """Sets any cyclic references seen in DITA maps."""
-        adjList = self._retMapAdjList()
-        # A branch
-        myBr = []
-        myCycles = set()
-        for aPath, aSet in adjList.items():
-            myBr.append(aPath)
-            self._recurseCycles(adjList, myBr, myCycles)
-            myBr.pop()
-        self._setCycleErrors(myCycles)      
-            
-    def _recurseCycles(self, a, b, c):
-        assert(len(b) > 0)
-        try:
-            myPath = b[-1]
-            for r in a[myPath]:
-                #print '_recurseCycles() testing r', r
-                #print '_recurseCycles() testing b', b
-                if r in b:
-                    #print 'Adding cycle', tuple(b[b.index(r):])
-                    c.add(tuple(b[b.index(r):]))
-                else:
-                    b.append(r)
-                    self._recurseCycles(a, b, c)
-                    b.pop()
-        except KeyError:
-            pass
-        
-    def _setCycleErrors(self, theC):
-        for aT in theC:
-            self.addError(701, (str(aT),))
-            myL = list(aT)
-            assert(len(myL) > 0)
-            i = 0
-            while i < len(myL):
-                myL.append(myL[0])
-                # Should this be in the file thus, or in the files set?
-                # As we are mutating the file object we need to use both
-                # getDitaFileObj() and setDitaFileObj()
-                fObj = self._fileMap.getDitaFileObj(myL[0])
-                fObj.addError(701, (str(myL),))
-                self._fileMap.setDitaFileObj(myL[0], fObj)
-                myL.pop()
-                myL.append(myL.pop(0))
-                i += 1    
-
-    def _checkLonely(self):
-        self._checkLonelyMaps()
-        self._checkLonelyTopics()
-        
-    def _checkLonelyMaps(self):
-        """Checks for lonely maps."""
-        mapPathSet = set()
-        pathSetRemain = set()
-        for f in self._fileMap.keys():
-            if self._fileMap.getDitaFileObj(f).isMap:
-                mapPathSet.add(f)
-                pathSetRemain.add(f)
-        for aPath in mapPathSet:
-            myMapObj = self._fileMap.getDitaFileObj(aPath)
-            for r in myMapObj.refS:
-                refFile, frag = r.fileFragment(f)
-                try:
-                    pathSetRemain.remove(refFile)
-                except KeyError:
-                    # refFile is a topic or an already seen map
-                    pass
-        if len(pathSetRemain) > 1:
-            for aPath in pathSetRemain:
-                self.addError(700, (aPath,))
-        elif len(pathSetRemain) == 1:
-            self._uniqueMapPath = pathSetRemain.pop()
-
-    def _checkLonelyTopics(self):
-        """Checks for topics that are not referenced by any map."""
-        mapPathSet = set()
-        pathSetRemain = set()
-        for f in self._fileMap.keys():
-            #print 'TRACE: f:', f
-            if self._fileMap.getDitaFileObj(f).isMap:
-                mapPathSet.add(f)
-            else:
-                pathSetRemain.add(f)
-        #print 'TRACE: mapPathSet', mapPathSet
-        #print 'TRACE: pathSetRemain', pathSetRemain
-        for aMapPath in mapPathSet:
-            myMapObj = self._fileMap.getDitaFileObj(aMapPath)
-            for r in myMapObj.refS:
-                refFile, frag = r.fileFragment(aMapPath)
-                #print 'TRACE: removing:', refFile
-                try:
-                    pathSetRemain.remove(refFile)
-                except KeyError:
-                    # topic has already been seen in another map
-                    pass
-        if len(pathSetRemain) > 0:
-            for aPath in pathSetRemain:
-                self.addError(600, (aPath,))
-            
-    def _checkRefArcs(self):
-        """Checks all references are reachable."""
-        for fPath in self._fileMap.keys():
-            fObjSrc = self._fileMap.getDitaFileObj(fPath)
-            hasMutated = False
-            for rObjSrc in fObjSrc.refS:
-                if rObjSrc.scheme:
-                    # Decide whether to test and external URL
-                    if self._testExt:
-                        rObjSrc.checkUrl()
-                else:
-                    fi, fr = rObjSrc.fileFragment(fPath)
-                    assert(fi is not None), 'fi is None for rObjSrc: %s in file: %s' % (rObjSrc, fPath)
-                    assert(fr is not None), 'fr is None for rObjSrc: %s in file: %s' % (rObjSrc, fPath)
-                    ## If a url then fileFragment() returns (None, None)
-                    #if fi is None:
-                    #    print 'fPath', fPath
-                    #    print 'rObjSrc', rObjSrc
-                    #    print 'fi', fi
-                    #    print 'fr', fr
-                    try:
-                        fObjTgt = self._fileMap.getDitaFileObj(fi)
-                    except KeyError:
-                        # Target file can not be found in the IR
-                        # check the file system to see if it is a non-DITA resource
-                        if not os.path.isfile(fi):
-                            #print 'TRACE: adding 410 to', fObj.identity
-                            fObjSrc.addError(410, (fi,))
-                            hasMutated = True
-                    else:
-                        if len(fr) > 0:
-                            # Target file is found, test fragment
-                            if not fObjTgt.hasId(fr):
-                                # Fragment not found
-                                fObjSrc.addError(411, (fi, fr))
-                                hasMutated = True
-                        if self._checkRefArcElemName(fObjSrc, rObjSrc, fObjTgt, fr):
-                            hasMutated = True
-            if hasMutated:
-                self._fileMap.setDitaFileObj(fPath, fObjSrc)
-
-    def _checkRefArcElemName(self, fObjSrc, rObjSrc, fObjTgt, frag):
-        """Test source and target element names
-        e.g. Source <cxxClassRef> should match target <cxxClass>
-        And in vanilla DITA:
-        <topicref href="batcaring.dita" type="task"></topicref>
-        or:
-        <topicref href="batcaring.dita" format="ditamap"></topicref>
-        Should match target element <task>."""
-        isRootTgt = False
-        hasMutated = False
-        if len(frag) == 0:
-            # iObjTgt is the root element of fObjTgt
-            if fObjTgt.rootId is None or fObjTgt.idElem(fObjTgt.rootId) is None:
-                # Covered by other error codes
-                return
-            iObjTgt = fObjTgt.idObj(fObjTgt.rootId)
-            isRootTgt = True
-        elif fObjTgt.hasId(frag):
-            iObjTgt = fObjTgt.idObj(frag)
-        else:
-            # frag not found that will be a 411 error (handled by caller).
-            return
-        # Have an rObjSrc + iObjTgt so check elements
-        # First case:
-        if rObjSrc.elem.endswith('Ref'):
-            if rObjSrc.elem[:-3] != iObjTgt.elem:
-                if isRootTgt:
-                    fObjSrc.addError(412, (rObjSrc.elem, iObjTgt.elem))
-                else:
-                    fObjSrc.addError(413, (fObjTgt.idElem(frag), rObjSrc.elem, frag))
-                hasMutated = True
-        # Second case(s) for vanilla DITA
-        elif rObjSrc.elem == 'topicref':
-            # Check DITA map links
-            if rObjSrc.format == 'ditamap' and iObjTgt.elem != 'map':
-                # Target must be a root element (actually we don't care)
-                fObjSrc.addError(414, (iObjTgt.elem,))
-                hasMutated = True
-            elif iObjTgt.elem == 'map' and rObjSrc.format != 'ditamap':
-                fObjSrc.addError(415, (rObjSrc.format,))
-                hasMutated = True
-            elif not (rObjSrc.format == 'ditamap' and iObjTgt.elem == 'map'):
-                # Treat refType None as type="topic", see DITA standard for <topicref>
-                # Well, also look at the type attribute in chapter 25
-                # "When the type attribute is unspecified, it should be
-                # determined by inspecting the target if possible. If the
-                # target cannot be inspected for some reason, the value
-                # should default to "topic".
-                # Note: DITA 1.2 takes a different view...
-                # Was:
-                #if (rObjSrc.refType is None and iObjTgt.elem != 'topic') \
-                #or (rObjSrc.refType is not None and rObjSrc.refType != iObjTgt.elem):
-                if rObjSrc.refType is not None and rObjSrc.refType != iObjTgt.elem:
-                    if isRootTgt:
-                        fObjSrc.addError(416, (rObjSrc.refType, iObjTgt.elem,))
-                        hasMutated = True
-                    else:
-                        fObjSrc.addError(417, (rObjSrc.refType, iObjTgt.elem, frag,))
-                        hasMutated = True
-                # Otherwise topicref looks OK
-        elif rObjSrc.elem != 'xref' and rObjSrc.elem not in XREF_DESCENDENTS:
-            # Unknown referencing element
-            if isRootTgt:
-                fObjSrc.addError(418, (rObjSrc.elem, fObjTgt.doctype))
-                hasMutated = True
-            else:
-                fObjSrc.addError(419, (rObjSrc.elem, fObjTgt.idElem(frag), frag))
-                hasMutated = True
-        return hasMutated
-                                        
-    def updateErrorCount(self, theMap):
-        """Updates a map of {error_code, : count, ...}."""
-        if self._errS is not None:
-            for e in self._errS.keys():
-                theMap[e] += len(self._errS[e])
-        for fPath in self._fileMap.keys():
-            fObj = self._fileMap.getDitaFileObj(fPath)
-            # Mutable call so need to update
-            fObj.updateErrorCount(theMap)
-            self._fileMap.setDitaFileObj(fPath, fObj)
-
-    def debugDump(self, s=sys.stdout, prefix=''):
-        """Dump of IR for debug purposes."""
-        s.write(' Debug Dump '.center(PRINT_WIDTH, '+'))
-        s.write('\n')
-        fileS = self._fileMap.keys()
-        fileS.sort()
-        for f in fileS:
-            self._fileMap.getDitaFileObj(f).debugDump(s, prefix)
-        s.write(' END Debug Dump '.center(PRINT_WIDTH, '+'))
-        s.write('\n\n')
-    
-#####################################
-# Multiprocessing code
-#####################################
-def retDitaFileObj(thePath):
-    return DitaFilePath(thePath)
- 
-def genDitaPath(theDir, thePatS, recursive):
-    assert(os.path.isdir(theDir))
-    for aName in os.listdir(theDir):
-        aPath = os.path.join(theDir, aName)
-        if os.path.isdir(aPath) and recursive:
-            for p in genDitaPath(aPath, thePatS, recursive):
-                yield p
-        elif os.path.isfile(aPath):
-            for aPat in thePatS:
-                if fnmatch.fnmatch(aName, aPat):
-                    #logging.info('genDitaPath(): %s' % aPath)
-                    yield aPath
-                    break    
-    
-def retMpDitaFileSetObj(theDir,
-                        thePatterns,
-                        recursive,
-                        numJobs, 
-                        checkExt,
-                        useDb):
-    assert(os.path.isdir(theDir))
-    assert(numJobs >= 0)
-    retObj = DitaFileSet(theDir, procDir=False, testExt=checkExt, useDbase=useDb)
-    myNumJobs = numJobs
-    if numJobs == 0:
-        myNumJobs = multiprocessing.cpu_count()
-        logging.info('Set multiprocessing number of jobs to %d' % myNumJobs)
-    myPool = multiprocessing.Pool(processes=myNumJobs)
-    for result in [
-            myPool.apply_async(retDitaFileObj, (f,))
-                for f in genDitaPath(theDir, thePatterns, recursive)
-            ]:
-        myObj = result.get()
-        logging.debug('Got %s' % myObj.identity)
-        retObj._addDitaFileObj(myObj)
-    # Note: finalise() is a serial process
-    logging.info('retMpDitaFileSetObj(): finalising')
-    retObj.finalise()
-    return retObj
-
-######################################
-# Test code
-######################################
-try:
-    import cStringIO as StringIO
-except ImportError:
-    import StringIO
-
-class NullClass(unittest.TestCase):
-    pass
-
-class TestCountDict(unittest.TestCase):
-    def setUp(self):
-        pass
-    
-    def tearDown(self):
-        pass
-    
-    def testSetUpTearDown(self):
-        """TestCountDict: test setUp() and tearDown()."""
-        pass
-    
-    def test_basic(self):
-        """TestCountDict: test basic functionality."""
-        myMap = CountDict()
-        self.assertEqual(myMap.has_key('wtf'), False)
-        self.assertEqual(myMap['wtf'], 0)
-        self.assertEqual(myMap.has_key('wtf'), True)
-        myMap['wtf'] += 1
-        self.assertEqual(myMap['wtf'], 1)
-
-class TestDitaId(unittest.TestCase):
-    def setUp(self):
-        pass
-    
-    def tearDown(self):
-        pass
-    
-    def testSetUpTearDown(self):
-        """DitaId: test setUp() and tearDown()."""
-        pass
-    
-    def test_basic(self):
-        """DitaId: basic read of an node with an id"""
-        myXml = """<cxxClass id="class_big_endian"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaId(myTree.getroot())
-        self.assertEqual(myObj.id, 'class_big_endian')
-        self.assertEqual(str(myObj), 'class_big_endian')
-        self.assertEqual(myObj.errStrings(True, None), [])
-        self.assertEqual(myObj.errStrings(False, None), [])
-        
-    def test_guid_00(self):
-        """DitaId: basic read of an node with an GUID id"""
-        myXml = """<cxxClass id="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaId(myTree.getroot())
-        self.assertEqual(myObj.id, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
-        myObj.checkGuid()
-        self.assertEqual(myObj.errStrings(True, None), [])
-        self.assertEqual(myObj.errStrings(False, None), [])
-
-    def test_guid_01(self):
-        """DitaId: basic read of an node with an GUID id fails"""
-        myXml = """<cxxClass id="25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaId(myTree.getroot())
-        self.assertEqual(myObj.id, '25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
-        myObj.checkGuid()
-        self.assertEqual(
-            myObj.errStrings(False, None),
-            [
-             'GUID specification does not match id="25825EC4-341F-3EA4-94AA-7DCE380E6D2E"'
-            ])
-        self.assertEqual(
-            myObj.errStrings(True, None),
-            [
-             'GUID specification does not match id="%s"' % GENERIC_STRING,
-            ])
-
-    def test_cmp_eq_00(self):
-        """DitaId: cmp(), == of two identical nodes"""
-        myXml = """<cxxClass id="class_big_endian"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj_00 = DitaId(myTree.getroot())
-        myObj_01 = DitaId(myTree.getroot())
-        self.assertEqual(cmp(myObj_00, myObj_01), 0)
-        self.assertEqual((myObj_00 == myObj_01), True)
-
-    def test_cmp_eq_01(self):
-        """DitaId: cmp(), == of two identical nodes from different elements."""
-        myXml_00 = """<cxxClass id="big_endian"/>"""
-        myTree_00 = etree.parse(StringIO.StringIO(myXml_00))
-        myObj_00 = DitaId(myTree_00.getroot())
-        myXml_01 = """<cxxStruct id="big_endian"/>"""
-        myTree_01 = etree.parse(StringIO.StringIO(myXml_01))
-        myObj_01 = DitaId(myTree_01.getroot())
-        self.assertEqual(cmp(myObj_00, myObj_01), 0)
-        self.assertEqual((myObj_00 == myObj_01), True)
-
-    def test_set(self):
-        """DitaId: read of an node with an id several times into a set and check unique,"""
-        myXml = """<cxxClass id="class_big_endian"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        s = set()
-        i = 0
-        while i < 8:
-            s.add(DitaId(myTree.getroot()))
-            i += 1
-        self.assertEqual(len(s), 1)
-        self.assertEqual(DitaId(myTree.getroot()) in s, True)
-
-    def test_map(self):
-        """DitaId: read of an node with an id several times into a map and check unique,"""
-        myXml = """<cxxClass id="class_big_endian"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        m = {}
-        i = 0
-        while i < 8:
-            m[DitaId(myTree.getroot())] = 1
-            i += 1
-        self.assertEqual(len(m), 1)
-        self.assertEqual(m.has_key(DitaId(myTree.getroot())), True)
-
-    def test_error_hash(self):
-        """DitaId: error with a '#' in an id"""
-        myXml = """<cxxClass id="class_#big_endian"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaId(myTree.getroot())
-        self.assertEqual(myObj.id, 'class_#big_endian')
-        self.assertEqual(str(myObj), 'class_#big_endian')
-        self.assertEqual(
-                myObj.errStrings(True, None),
-                [
-                    genericStringForErrorCode(100),
-                ]
-            )
-        self.assertEqual(
-                myObj.errStrings(False, None),
-                [
-                 'Character \'#\' not allowed in id="class_#big_endian"',
-                 ]
-            )
-        
-
-
-class TestDitaRef(unittest.TestCase):
-    def setUp(self):
-        pass
-    
-    def tearDown(self):
-        pass
-    
-    def testSetUpTearDown(self):
-        """DitaRef: test setUp() and tearDown()."""
-        pass
-    
-    def test_basic(self):
-        """DitaRef: basic read of an xref node, no fragment"""
-        myXml = """<xref href="class_big_endian"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaRef(myTree.getroot())
-        self.assertEqual(myObj.href, 'class_big_endian')
-        self.assertEqual(myObj.path, 'class_big_endian')
-        self.assertEqual(myObj.elem, 'xref')
-        self.assertEqual(str(myObj), 'xref class_big_endian')
-        self.assertEqual(myObj.fragment, '')
-        self.assertEqual(myObj.scheme, '')
-        self.assertEqual(myObj.errStrings(False, None), [])
-        self.assertEqual(myObj.errStrings(True, None), [])
-
-    def test_basic_frag(self):
-        """DitaRef: basic read of an xref node, with fragment"""
-        myXml = """<xref href="class_big_endian.xml#function"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaRef(myTree.getroot())
-        self.assertEqual(myObj.href, 'class_big_endian.xml#function')
-        self.assertEqual(myObj.path, 'class_big_endian.xml')
-        self.assertEqual(myObj.fragment, 'function')
-        self.assertEqual(myObj.scheme, '')
-        self.assertEqual(myObj.errStrings(False, None), [])
-        self.assertEqual(myObj.errStrings(True, None), [])
-
-    def test_file_frag_00(self):
-        """DitaRef: accessing an xref node, with a file and a fragment"""
-        myXml = """<xref href="class_big_endian.xml#function"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaRef(myTree.getroot())
-        self.assertEqual(myObj.href, 'class_big_endian.xml#function')
-        self.assertEqual(myObj.path, 'class_big_endian.xml')
-        self.assertEqual(myObj.fragment, 'function')
-        self.assertEqual(myObj.scheme, '')
-        srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
-        expPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'class_big_endian.xml'))
-        self.assertEqual(
-            myObj.fileFragment(srcPath),
-            (expPath, 'function')
-        )
-        self.assertEqual(myObj.errStrings(False, None), [])
-        self.assertEqual(myObj.errStrings(True, None), [])
-        
-    def test_file_frag_01(self):
-        """DitaRef: accessing an xref node, with a file and a fragment and relative path with '\\'."""
-        myXml = """<xref href="..\\chips\\class_big_endian.xml#function"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaRef(myTree.getroot())
-        srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
-        expPath = normalisePath(os.path.join('C:%s' % os.sep, 'chips', 'class_big_endian.xml'))
-        self.assertEqual(
-            myObj.fileFragment(srcPath),
-            (expPath, 'function')
-        )
-        self.assertEqual(myObj.errStrings(False, None), [])
-        self.assertEqual(myObj.errStrings(True, None), [])
-        
-    def test_file_frag_02(self):
-        """DitaRef: accessing an xref node, with a file and a fragment and relative path with '/'."""
-        myXml = """<xref href="../chips/class_big_endian.xml#function"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaRef(myTree.getroot())
-        srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
-        expPath = normalisePath(os.path.join('C:%s' % os.sep, 'chips', 'class_big_endian.xml'))
-        self.assertEqual(
-            myObj.fileFragment(srcPath),
-            (expPath, 'function')
-        )
-        self.assertEqual(myObj.errStrings(False, None), [])
-        self.assertEqual(myObj.errStrings(True, None), [])
-        
-    def test_file_frag_03(self):
-        """DitaRef: accessing an xref node, with a no file but with a fragment"""
-        myXml = """<xref href="#function"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaRef(myTree.getroot())
-        self.assertEqual(myObj.href, '#function')
-        self.assertEqual(myObj.path, '')
-        self.assertEqual(myObj.fragment, 'function')
-        self.assertEqual(myObj.scheme, '')
-        srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
-        expPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
-        self.assertEqual(
-            myObj.fileFragment(srcPath),
-            (expPath, 'function')
-        )
-        self.assertEqual(myObj.errStrings(False, None), [])
-        self.assertEqual(myObj.errStrings(True, None), [])
-        
-    def test_basic_scheme(self):
-        """DitaRef: an xref node with a URI scheme"""
-        myXml = """<xref href="http://www.cwi.nl:80/%7Eguido/Python.html#fragment"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaRef(myTree.getroot())
-        self.assertEqual(myObj.href, 'http://www.cwi.nl:80/%7Eguido/Python.html#fragment')
-        self.assertEqual(myObj.path, '/%7Eguido/Python.html')
-        self.assertEqual(myObj.fragment, 'fragment')
-        self.assertEqual(myObj.scheme, 'http')
-        self.assertEqual(myObj.errStrings(False, None), [])
-        self.assertEqual(myObj.errStrings(True, None), [])
-
-    def test_basic_scheme_file_frag(self):
-        """DitaRef: an xref node with a URI scheme, invoking fileFragment()"""
-        myXml = """<xref href="http://www.cwi.nl:80/%7Eguido/Python.html#fragment"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaRef(myTree.getroot())
-        self.assertEqual(myObj.href, 'http://www.cwi.nl:80/%7Eguido/Python.html#fragment')
-        self.assertEqual(myObj.path, '/%7Eguido/Python.html')
-        self.assertEqual(myObj.fragment, 'fragment')
-        self.assertEqual(myObj.scheme, 'http')
-        srcPath = os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml')
-        self.assertEqual(
-            myObj.fileFragment(srcPath),
-            (None, None)
-        )
-        self.assertEqual(myObj.errStrings(False, None), [])
-        self.assertEqual(myObj.errStrings(True, None), [])
-
-    def test_fail_no_href(self):
-        """DitaRef: Fails on an xref node with no href attribute"""
-        myXml = """<xref />"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaRef(myTree.getroot())
-        self.assertEqual(
-            myObj.errStrings(False, None),
-            [
-             'Reference element "xref" is missing href=... attribute',
-             ]
-        )
-        self.assertEqual(
-            myObj.errStrings(True, None),
-            [
-             'Reference element "%s" is missing href=... attribute' % GENERIC_STRING,
-             ]
-        )
-
-    def test_fail_bad_frag(self):
-        """DitaRef: Fails on an xref node with href attribute that has multiple '#' characters"""
-        myXml = """<xref href="a#b#c" />"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaRef(myTree.getroot())
-        self.assertEqual(
-            myObj.errStrings(False, None),
-            [
-             'Multiple \'#\' not allowed in reference "a#b#c"',
-             ]
-        )
-        self.assertEqual(
-            myObj.errStrings(True, None),
-            [
-             'Multiple \'#\' not allowed in reference "%s"' % GENERIC_STRING,
-             ]
-        )
-
-    def test_guid_00(self):
-        """DitaRef: basic read of an node with an GUID file/fragment reference"""
-        myXml = """<xref href="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaRef(myTree.getroot())
-        self.assertEqual(myObj.href, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
-        self.assertEqual(myObj.path, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml')
-        self.assertEqual(myObj.elem, 'xref')
-        self.assertEqual(str(myObj), 'xref GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
-        self.assertEqual(myObj.fragment, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
-        self.assertEqual(myObj.scheme, '')
-        self.assertEqual(myObj.errStrings(False, None), [])
-        self.assertEqual(myObj.errStrings(True, None), [])
-
-    def test_guid_01(self):
-        """DitaRef: basic read of an node with an GUID file part fails"""
-        myXml = """<xref href="GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaRef(myTree.getroot())
-        self.assertEqual(myObj.href, 'GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
-        self.assertEqual(myObj.path, 'GUID-.xml')
-        self.assertEqual(myObj.elem, 'xref')
-        self.assertEqual(str(myObj), 'xref GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
-        self.assertEqual(myObj.fragment, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
-        self.assertEqual(myObj.errStrings(False, None), [])
-        self.assertEqual(myObj.errStrings(True, None), [])
-        myObj.checkGuid()
-        self.assertEqual(
-            myObj.errStrings(False, None),
-            [
-             'GUID specification does not match file reference "GUID-.xml"'
-            ])
-        self.assertEqual(
-            myObj.errStrings(True, None),
-            [
-             genericStringForErrorCode(203),
-            ]
-        )
-
-    def test_guid_02(self):
-        """DitaRef: basic read of an node with an GUID fragment part fails"""
-        myXml = """<xref href="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4"/>"""
-        myTree = etree.parse(StringIO.StringIO(myXml))
-        myObj = DitaRef(myTree.getroot())
-        self.assertEqual(myObj.href, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4')
-        self.assertEqual(myObj.path, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml')
-        self.assertEqual(myObj.elem, 'xref')
-        self.assertEqual(str(myObj), 'xref GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4')
-        self.assertEqual(myObj.fragment, 'GUID-25825EC4')
-        self.assertEqual(myObj.errStrings(False, None), [])
-        self.assertEqual(myObj.errStrings(True, None), [])
-        myObj.checkGuid()
-        self.assertEqual(
-            myObj.errStrings(False, None),
-            [
-             'GUID specification does not match fragment reference "GUID-25825EC4"'
-            ])
-        self.assertEqual(
-            myObj.errStrings(True, None),
-            [
-             genericStringForErrorCode(204),
-            ]
-        )
-
-class TestDitaFile(unittest.TestCase):
-    def setUp(self):
-        pass
-    
-    def tearDown(self):
-        pass
-    
-    def testSetUpTearDown(self):
-        """DitaFile: test setUp() and tearDown()."""
-        pass
-    
-    def test_Basic(self):
-        """DitaFile: basic read of an XML file"""
-        myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
-<!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
-<cxxClass id="class_big_endian">
-    <apiName>BigEndian</apiName>
-    <shortdesc/>
-    <cxxClassDetail>
-        <cxxClassDefinition>
-            <cxxClassAccessSpecifier value="public"/>
-            <cxxClassAPIItemLocation>
-                <cxxClassDeclarationFile name="filePath" value="K:/sf/os/commsfw/datacommsserver/esockserver/inc/es_sock.h"/>
-                <cxxClassDeclarationFileLine name="lineNumber" value="1520"/>
-                <cxxClassDefinitionFile name="filePath" value="K:/sf/os/commsfw/datacommsserver/esockserver/inc/es_sock.h"/>
-                <cxxClassDefinitionFileLineStart name="lineNumber" value="1516"/>
-                <cxxClassDefinitionFileLineEnd name="lineNumber" value="1526"/>
-            </cxxClassAPIItemLocation>
-        </cxxClassDefinition>
-        <apiDesc>
-            <p>Inserts and extracts integers in big-endian format.   </p>
-        </apiDesc>
-    </cxxClassDetail>
-    <cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f">
-    </cxxFunction>
-    <cxxFunction id="class_big_endian_1aedf702f5c0118e4294d1a6d9684f8441">
-    </cxxFunction>
-    <cxxFunction id="class_big_endian_1ae266722f7bb965c971155a3315bad484">
-    </cxxFunction>
-    <cxxFunction id="class_big_endian_1a497d5248ea259f8490fb40ac4f2aafb2">
-    </cxxFunction>
-</cxxClass>"""
-        myFile = StringIO.StringIO(myXml)
-        myObj = DitaFileObj(myFile, 'foo')
-        self.assertEqual(myObj.identity, normalisePath('foo'))
-        self.assertEqual(myObj.doctype, 'cxxClass')
-        self.assertEqual(myObj.rootId, 'class_big_endian')
-        #print myObj.idMap()
-        self.assertEqual(
-            myObj.idElemMap(),
-            {
-                'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'   : 'cxxFunction',
-                'class_big_endian_1aedf702f5c0118e4294d1a6d9684f8441'   : 'cxxFunction',
-                'class_big_endian'                                      : 'cxxClass',
-                'class_big_endian_1a497d5248ea259f8490fb40ac4f2aafb2'   : 'cxxFunction',
-                'class_big_endian_1ae266722f7bb965c971155a3315bad484'   : 'cxxFunction',
-                }
-        )
-        self.assertEqual(myObj.errStrings(False, None), [])
-        self.assertEqual(myObj.errStrings(True, None), [])
-
-    def test_missing_file(self):
-        """DitaFile: read an missing XML file"""
-        myObj = DitaFileObj(None, 'foo')
-        self.assertEqual(
-            myObj.errStrings(False, None),
-            [
-             'Failed to open: "%s"' % normalisePath('foo'),
-             ]
-        )
-        self.assertEqual(
-            myObj.errStrings(True, None),
-            [
-             genericStringForErrorCode(400),
-             ]
-        )
-    
-    def test_IllFormedFile(self):
-        """DitaFile: read an ill-formed XML file"""
-        myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
-<!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
-<cxxClass id="class_big_endian">
-"""
-        myFile = StringIO.StringIO(myXml)
-        myObj = DitaFileObj(myFile, 'foo')
-        self.assertEqual(myObj.identity, normalisePath('foo'))
-        self.assertEqual(myObj.doctype, None)
-        self.assertEqual(myObj.rootId, None)
-        #print myObj.idMap()
-        self.assertEqual(myObj.idElemMap(), {})
-        self.assertEqual(
-            myObj.errStrings(False, None),
-            [
-             'Can not parse: "no element found: line 4, column 0"',
-             ]
-        )
-        self.assertEqual(
-            myObj.errStrings(True, None),
-            [
-             genericStringForErrorCode(404),
-             ]
-        )
-
-    def test_missing_root_id(self):
-        """DitaFile: read of an XML file with no id on root element"""
-        myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
-<!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
-<cxxClass>
-    <xref href="OtherClass">OtherClass</xref>
-    <cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f"/>
-</cxxClass>"""
-        myFile = StringIO.StringIO(myXml)
-        myObj = DitaFileObj(myFile, 'foo')
-        self.assertEqual(myObj.identity, normalisePath('foo'))
-        self.assertEqual(myObj.doctype, 'cxxClass')
-        self.assertEqual(myObj.rootId, None)
-        self.assertEqual(
-            myObj.idElemMap(),
-            {
-                'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'   : 'cxxFunction',
-                }
-        )
-        self.assertEqual(myObj.errStrings(False, None), [genericStringForErrorCode(402)])
-        self.assertEqual(myObj.errStrings(True, None), [genericStringForErrorCode(402)])
-
-    def test_duplicate_id(self):
-        """DitaFile: duplicate IDs"""
-        myXml = """<root id="AnID">
-<elem id="AnID"/>
-</root>"""
-        myFile = StringIO.StringIO(myXml)
-        myObj = DitaFileObj(myFile, 'spam.xml')
-        self.assertEqual(myObj.identity, normalisePath('spam.xml'))
-        self.assertEqual(myObj.doctype, 'root')
-        self.assertEqual(myObj.rootId, 'AnID')
-        self.assertEqual(myObj.idElemMap(), {})
-        self.assertEqual(
-            myObj.errStrings(False, None),
-            [
-                'Multiple id="AnID"',
-            ]
-        )
-        self.assertEqual(myObj.errStrings(True, None), [genericStringForErrorCode(401)])
-
-    def test_ismap_00(self):
-        """DitaFile: Is a map for <map>."""
-        myXml = """<map id="myMap"/>"""
-        myFile = StringIO.StringIO(myXml)
-        myObj = DitaFileObj(myFile, 'spam.xml')
-        self.assertEqual(myObj.isMap, True)
-    
-    def test_ismap_01(self):
-        """DitaFile: Is a map for <cxxAPIMap>."""
-        myXml = """<cxxAPIMap id="myMap"/>"""
-        myFile = StringIO.StringIO(myXml)
-        myObj = DitaFileObj(myFile, 'spam.xml')
-        self.assertEqual(myObj.isMap, True)
-    
-    def test_Basic_01(self):
-        """DitaFile: read of an simple XML file with id and xref"""
-        myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
-<!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
-<cxxClass id="class_big_endian">
-    <xref href="OtherClass">OtherClass</xref>
-    <cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f"/>
-</cxxClass>"""
-        myFile = StringIO.StringIO(myXml)
-        myObj = DitaFileObj(myFile, 'foo')
-        self.assertEqual(myObj.identity, normalisePath('foo'))
-        self.assertEqual(myObj.doctype, 'cxxClass')
-        self.assertEqual(myObj.rootId, 'class_big_endian')
-        self.assertEqual(myObj.isMap, False)
-        self.assertEqual(len(myObj.idS), 2)
-        self.assertEqual(len(myObj.refS), 1)
-        self.assertEqual(myObj.hasId('class_big_endian'), True)
-        self.assertEqual(myObj.hasId('class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'), True)
-        self.assertEqual(myObj.hasId('noID'), False)
-        self.assertEqual(myObj.idElem('class_big_endian'), 'cxxClass')
-        self.assertEqual(myObj.idElem('noID'), None)
-        self.assertEqual(
-            myObj.idElem('class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'),
-            'cxxFunction'
-        )
-        #print myObj.idMap()
-        self.assertEqual(
-            myObj.idElemMap(),
-            {
-                'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'   : 'cxxFunction',
-                'class_big_endian'                                      : 'cxxClass',
-                }
-        )
-        self.assertEqual(myObj.errStrings(False, None), [])
-        self.assertEqual(myObj.errStrings(True, None), [])
-
-class TestDitaFileSet(unittest.TestCase):
-    def setUp(self):
-        pass
-    
-    def tearDown(self):
-        pass
-    
-    def testSetUpTearDown(self):
-        """DitaFileSet: test setUp() and tearDown()."""
-        pass
-    
-    def test_None(self):
-        """DitaFileSet: read of None."""
-        myO = DitaFileSet(None)
-        myO.finalise()
-        self.assertEqual(myO.errStrings(False, None), ['Not a directory: None'])
-        self.assertEqual(myO.errStrings(True, None), ['Not a directory: %s' % GENERIC_STRING, ])
-        self.assertEqual(myO.errCountMap, {500 : 1})
-
-    def test_basic(self):
-        """DitaFileSet: Test reading a map and a couple of files."""
-        myO = DitaFileSet(None, procDir=False)
-        myO._addFileObj(
-            StringIO.StringIO(
-"""<map id="map_00">
-    <topicref href="spam.dita" />
-    <topicref href="eggs.dita" />
-</map>"""
-            ),
-            'map.ditamap'
-        )
-        myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita')
-        myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'eggs.dita')
-        myO.finalise()
-        #print 'HI'
-        #myO.writeErrors(False)
-        self.assertEqual(myO.allErrStrings(False, None), [])
-        self.assertEqual(myO.allErrStrings(True, None), [])
-        self.assertEqual(myO.errCountMap, {})
-
-    def test_duplicate_paths(self):
-        """DitaFileSet: Test reading a couple of files in duplicate paths."""
-        myO = DitaFileSet(None, procDir=False)
-        myO._addFileObj(
-            StringIO.StringIO(
-"""<map id="map_00">
-    <topicref href="spam.dita" />
-</map>"""
-            ),
-            'map.ditamap'
-        )
-        myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita')
-        myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'spam.dita')
-        myO.finalise()
-        self.assertEqual(
-            myO.errStrings(False, None),
-            [
-                'Duplicate file path: "%s"' % normalisePath('spam.dita'),
-            ]
-        )
-        self.assertEqual(myO.errStrings(True, None), [genericStringForErrorCode(504),])
-        self.assertEqual(myO.errCountMap, {504 : 1})
-
-    def test_duplicate_ids(self):
-        """DitaFileSet: Test reading a map and a couple of files with duplicate IDs."""
-        myO = DitaFileSet(None, procDir=False)
-        myO._addFileObj(
-            StringIO.StringIO(
-"""<map id="map_00">
-    <topicref href="spam.dita" />
-    <topicref href="eggs.dita" />
-    <topicref href="chips.dita" />
-</map>"""
-            ),
-            'map.ditamap'
-        )
-        myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'spam.dita')
-        myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'eggs.dita')
-        myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'chips.dita')
-        myO.finalise()
-        #print 'HI'
-        #myO.writeErrors(False)
-        #pprint.pprint(myO.errStrings(False, None))
-        self.assertEqual(
-            myO.errStrings(True, None),
-            [
-             genericStringForErrorCode(505),
-             genericStringForErrorCode(501),
-             ]
-        )
-        expErrs = [
-                """Duplicate id="chips" in files: ('%s', '%s', '%s')""" \
-                    % (normalisePath('chips.dita'), normalisePath('eggs.dita'), normalisePath('spam.dita')),
-                """Duplicate root id="chips" in files: ('%s', '%s', '%s')""" \
-                    % (normalisePath('chips.dita'), normalisePath('eggs.dita'), normalisePath('spam.dita')),
-            ]
-        myErrs = myO.errStrings(False, None)
-#===============================================================================
-#        for i in range(2):
-#            if myErrs[i] != expErrs[i]:
-#                print myErrs[i]
-#                print expErrs[i]
-#                print
-#===============================================================================
-        self.assertEqual(myErrs, expErrs)
-        self.assertEqual(myO.errCountMap, {505: 1, 501: 1})
-    
-    def test_lonely_topics(self):
-        """DitaFileSet: Test a couple of lonely topics."""
-        myO = DitaFileSet(None, procDir=False)
-        myO._addFileObj(StringIO.StringIO('<spam id="spam"/>'), 'spam')
-        myO._addFileObj(StringIO.StringIO('<eggs id="eggs"/>'), 'eggs')
-        myO.finalise()
-        self.assertEqual(
-            myO.errStrings(False, None),
-            [
-             'Topic id="%s" is not referenced by any map' % normalisePath('eggs'),
-             'Topic id="%s" is not referenced by any map' % normalisePath('spam'),
-             ]
-        )
-        self.assertEqual(
-            myO.errStrings(True, None),
-            [
-                genericStringForErrorCode(600),
-            ]
-        )
-
-    def test_map_cycles_00(self):
-        """DitaFileSet: Cyclic references between two maps."""
-        myO = DitaFileSet(None, procDir=False)
-        myO._addFileObj(
-            StringIO.StringIO(
-"""<map id="map_00">
-    <topicref href="map_01.ditamap" format="ditamap" />
-</map>"""
-            ),
-            'map_00.ditamap'
-        )
-        myO._addFileObj(
-            StringIO.StringIO(
-"""<map id="map_01">
-    <topicref href="map_00.ditamap" format="ditamap" />
-</map>"""
-            ),
-            'map_01.ditamap'
-        )
-        myO.finalise()
-        #print 'HI test_map_cycles_00()'
-        #pprint.pprint(myO._retMapAdjList())
-        self.assertEqual(
-            myO.errStrings(False, None),
-            [
-                'Maps "%s" are in a a cycle.' % str(
-                    (
-                     normalisePath('map_00.ditamap'),
-                     normalisePath('map_01.ditamap'),
-                     )
-                ),
-                'Maps "%s" are in a a cycle.' % str(
-                    (
-                     normalisePath('map_01.ditamap'),
-                     normalisePath('map_00.ditamap'),
-                     )
-                ),
-            ]
-        )
-        #print
-        #pprint.pprint(myO.allErrStrings(False, None))
-        self.assertEqual(myO.allErrStrings(True, None), [genericStringForErrorCode(701)])
-        self.assertEqual(myO.errCountMap, {701 : 4})
-
-    def test_map_cycles_01(self):
-        """DitaFileSet: Cyclic references between three maps."""
-        myO = DitaFileSet(None, procDir=False)
-        myO._addFileObj(
-            StringIO.StringIO(
-"""<map id="map_00">
-    <topicref href="map_01.ditamap" format="ditamap" />
-</map>"""
-            ),
-            'map_00.ditamap'
-        )
-        myO._addFileObj(
-            StringIO.StringIO(
-"""<map id="map_01">
-    <topicref href="map_02.ditamap" format="ditamap" />
-</map>"""
-            ),
-            'map_01.ditamap'
-        )
-        myO._addFileObj(
-            StringIO.StringIO(
-"""<map id="map_02">
-    <topicref href="map_00.ditamap" format="ditamap" />
-</map>"""
-            ),
-            'map_02.ditamap'
-        )
-        myO.finalise()
-        #print 'HI test_map_cycles_00()'
-        #pprint.pprint(myO._retMapAdjList())
-        self.assertEqual(
-            myO.errStrings(False, None),
-            [
-                'Maps "%s" are in a a cycle.' % str(
-                    (
-                     normalisePath('map_00.ditamap'),
-                     normalisePath('map_01.ditamap'),
-                     normalisePath('map_02.ditamap'),
-                     )
-                ),
-                'Maps "%s" are in a a cycle.' % str(
-                    (
-                     normalisePath('map_01.ditamap'),
-                     normalisePath('map_02.ditamap'),
-                     normalisePath('map_00.ditamap'),
-                     )
-                ),
-                'Maps "%s" are in a a cycle.' % str(
-                    (
-                     normalisePath('map_02.ditamap'),
-                     normalisePath('map_00.ditamap'),
-                     normalisePath('map_01.ditamap'),
-                     )
-                ),
-            ]
-        )
-        self.assertEqual(myO.errStrings(True, None), [genericStringForErrorCode(701)])
-        self.assertEqual(myO.errCountMap, {701 : 6})
-
-    def test_refarc_00(self):
-        """DitaFileSet: Test ref arcing - all resolve."""
-        myO = DitaFileSet(None, procDir=False)
-        myO._addFileObj(
-            StringIO.StringIO(
-"""<map id="map_00">
-    <topicref href="spam.dita#spam" />
-    <topicref href="eggs.dita#eggs" />
-</map>"""
-            ),
-            'map.ditamap'
-        )
-        myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita')
-        myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'eggs.dita')
-        myO.finalise()
-        self.assertEqual(myO.errCountMap, {})
-        self.assertEqual(myO.allErrStrings(False, None), [])
-        self.assertEqual(myO.allErrStrings(True, None), [])
-        self.assertEqual(myO.errStrings(False, None), [])
-        self.assertEqual(myO.errStrings(True, None), [])
-
-    def test_refarc_fail_00(self):
-        """DitaFileSet: Test ref arcing - can't find file."""
-        myO = DitaFileSet(None, procDir=False)
-        myO._addFileObj(
-            StringIO.StringIO(
-"""<map id="map_00">
-    <topicref href="spam_.dita" />
-    <topicref href="eggs_for_tea.dita" />
-</map>"""
-            ),
-            'map.ditamap'
-        )
-        myO.finalise()
-        self.assertEqual(myO.errCountMap, {410: 2})
-        #print 'HI'
-        #pprint.pprint(myO.allErrStrings(False, None))
-        self.assertEqual(
-            myO.allErrStrings(False, None),
-            [
-                'Can not resolve reference to file "%s"' % normalisePath('eggs_for_tea.dita'),
-                'Can not resolve reference to file "%s"' % normalisePath('spam_.dita'),
-            ]
-        )
-        self.assertEqual(
-            myO.allErrStrings(True, None),
-            [
-                'Can not resolve reference to file "..."',
-            ]
-        )
-        self.assertEqual(myO.errStrings(False, None), [])
-        self.assertEqual(myO.errStrings(True, None), [])
-
-    def test_refarc_fail_01(self):
-        """DitaFileSet: Test ref arcing - can't find fragment."""
-        myO = DitaFileSet(None, procDir=False)
-        myO._addFileObj(
-            StringIO.StringIO(
-"""<map id="map_00">
-    <topicref href="spam.dita#spam_" />
-    <topicref href="eggs.dita#eggs_" />
-</map>"""
-            ),
-            'map.ditamap'
-        )
-        myO._addFileObj(StringIO.StringIO('<spam id="spam"/>'), 'spam.dita')
-        myO._addFileObj(StringIO.StringIO('<eggs id="eggs"/>'), 'eggs.dita')
-        myO.finalise()
-        self.assertEqual(myO.errCountMap, {411: 2})
-        #print 'HI'
-        #pprint.pprint(myO.allErrStrings(False, None))
-        self.assertEqual(
-            myO.allErrStrings(False, None),
-            [
-                'Can resolve reference to file "%s" but not to fragment "eggs_"' % normalisePath('eggs.dita'),
-                'Can resolve reference to file "%s" but not to fragment "spam_"' % normalisePath('spam.dita'),
-            ]
-        )
-        self.assertEqual(
-            myO.allErrStrings(True, None),
-            [
-                'Can resolve reference to file "%s" but not to fragment "%s"' % (GENERIC_STRING, GENERIC_STRING),
-            ]
-        )
-        self.assertEqual(myO.errStrings(False, None), [])
-        self.assertEqual(myO.errStrings(True, None), [])
-
-    def test_refarc_url_00(self):
-        """DitaFileSet: Test ref arcing - URL."""
-        myO = DitaFileSet(None, procDir=False, testExt=True)
-        myO._addFileObj(
-            StringIO.StringIO(
-"""<map id="map_00">
-    <topicref href="spam.dita#spam" />
-    <topicref href="eggs.dita#eggs" />
-</map>"""
-            ),
-            'map.ditamap'
-        )
-        myO._addFileObj(StringIO.StringIO("""<topic id="spam">
-        <xref href="http://www.nokia.com">Nokia</xref>
-</topic>"""), 'spam.dita')
-        myO._addFileObj(StringIO.StringIO("""<topic id="eggs">
-        <xref href="http://www.google.com">Google</xref>
-</topic>"""), 'eggs.dita')
-        myO.finalise()
-        #print 'HI'
-        #pprint.pprint(myO.allErrStrings(False, None))
-        self.assertEqual(myO.errCountMap, {})
-        self.assertEqual(
-            myO.allErrStrings(False, None),
-            [
-            ]
-        )
-        self.assertEqual(
-            myO.allErrStrings(True, None),
-            [
-            ]
-        )
-        self.assertEqual(myO.errStrings(False, None), [])
-        self.assertEqual(myO.errStrings(True, None), [])
-
-class TestDitaBookmapFileSet(unittest.TestCase):
-    def setUp(self):
-        pass
-    
-    def tearDown(self):
-        pass
-    
-    def testSetUpTearDown(self):
-        """TestDitaBookmapFileSet: test setUp() and tearDown()."""
-        pass
-    
-    def test_basic(self):
-        """TestDitaBookmapFileSet: Test reading a bookmap and a topic."""
-        myO = DitaFileSet(None, procDir=False)
-        myO._addFileObj(
-            StringIO.StringIO(
-"""<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE bookmap PUBLIC "-//OASIS//DTD DITA BookMap//EN"
-"bookmap.dtd">
-<bookmap id="GUID-5BDFDB6B-7801-4804-9F41-2BDC5BE53DDF">
-  <booktitle>
-    <mainbooktitle>My Bookmap</mainbooktitle>
-    <booktitlealt>Alternate title</booktitlealt>
-  </booktitle>
-  <frontmatter id="GUID-DA857913-F826-4CF7-A135-93F2AEB48353">
-    <topicref href="GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB.dita" id="GUID-994B1764-393F-401F-8571-CE0955AB6CA6" />
-  </frontmatter>
-</bookmap>
-"""
-            ),
-            'bookmap.ditamap'
-        )
-        myO._addFileObj(StringIO.StringIO("""<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE concept  PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
-<concept id="GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB" xml:lang="en">
-    <title>How to read and write a file</title>
-</concept>
-"""), 'GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB.dita')
-        myO.finalise()
-        #print
-        #myO.debugDump()
-        #print 'HI'
-        #myO.writeErrors(False)
-        self.assertEqual(myO.allErrStrings(False, None), [])
-        self.assertEqual(myO.allErrStrings(True, None), [])
-        self.assertEqual(myO.errCountMap, {})
-
-class Special(unittest.TestCase):
-    pass
-
-def unitTest(theVerbosity=2):
-    suite = unittest.TestLoader().loadTestsFromTestCase(NullClass)
-    suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCountDict))
-    suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaId))
-    suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaRef))
-    suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaFile))
-    suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaFileSet))
-    suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaBookmapFileSet))
-    suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Special))
-    myResult = unittest.TextTestRunner(verbosity=theVerbosity).run(suite)
-    return (myResult.testsRun, len(myResult.errors), len(myResult.failures))
-
-######################################
-# main() stuff
-######################################
-def main():
-    print 'CMD: %s' % ' '.join(sys.argv)
-    usage = "usage: %prog [options] <Directory of XML content>"
-    parser = OptionParser(usage, version='%prog ' + __version__)
-    parser.add_option("-d", action="store_true", dest="dump", default=False, 
-                      help="Dump internal representation. [default: %default]")
-    parser.add_option(
-            "-e", "--errors",
-            type="str",
-            dest="error_codes",
-            default='All',
-            help="Only report on certain error codes (space seperated list). [default: \"%default\"]"
-        )      
-    parser.add_option("-f", "--file", dest="file", type="str", default='None', 
-                      help="Report of errors by file either 'None', 'generic', 'specific'. [default: %default]")
-    parser.add_option("-g", action="store_true", dest="guid", default=False, 
-                      help="Enforce GUID specification. [default: %default]")
-    parser.add_option(
-            "-j", "--jobs",
-            type="int",
-            dest="jobs",
-            default=-1,
-            help="Max processes when multiprocessing. 0 takes CPUs, -1 no MP. [default: %default]"
-        )      
-    parser.add_option(
-            "-l", "--loglevel",
-            type="int",
-            dest="loglevel",
-            default=20,
-            help="Log Level (debug=10, info=20, warning=30, [error=40], critical=50) [default: %default]"
-        )      
-    parser.add_option(
-            "-p", "--pattern",
-            type="str",
-            dest="pattern",
-            default=FNMATCH_STRING,
-            help="Pattern match. [default: \"%default\"]"
-        )      
-    parser.add_option("-r", action="store_true", dest="recursive", default=False, 
-                      help="Recursive. [default: %default]")
-    parser.add_option("-s", action="store_true", dest="shelve", default=False, 
-                      help="Use the shelve dBase rather than storing the internal representation in memory. This is slower but is useful for large data sets where a memory error might occur. [default: %default]")
-    parser.add_option("-u", action="store_true", dest="unit_test", default=False, 
-                      help="Execute unit tests and exit. [default: %default]")
-    parser.add_option("-x", action="store_true", dest="ext_url", default=False, 
-                      help="Test external |URLs. [default: %default]")
-    parser.add_option("-?", action="store_true", dest="query_errors", default=False, 
-                      help="Display the error types that are detected. [default: %default]")
-    (options, args) = parser.parse_args()
-    logging.basicConfig(
-        level=options.loglevel,
-        format='%(asctime)s %(levelname)-8s %(message)s',
-        stream=sys.stdout,
-    )
-    if options.file not in ('None', 'generic', 'specific'):
-        parser.error("--file option must be: 'None' | 'generic' | 'specific'")
-        return 1
-    if options.unit_test:
-        unitTest()
-    if options.query_errors:
-        writeGenericStringsForErrorCodes()
-    if len(args) < 1 and not options.unit_test:
-        parser.print_help()
-        parser.error("I can't do much without a path to the XML content.")
-        return 1
-    elif len(args) == 1:
-        if options.jobs > -1:
-            myObj = retMpDitaFileSetObj(
-                        args[0],
-                        options.pattern.split(' '),
-                        options.recursive,
-                        options.jobs,
-                        options.ext_url,
-                        options.shelve,
-                        )
-        else:
-            myObj = DitaFileSet(args[0],
-                                procDir=True,
-                                thePatterns=options.pattern.split(' '),
-                                recursive=options.recursive,
-                                testExt=options.ext_url,
-                                useDbase=options.shelve,
-                                )
-            #print 'MyObj:', myObj
-        if options.dump:
-            myObj.debugDump()
-        myObj.writeStatistics()
-        myObj.writeErrorSummary()
-        #pprint.pprint(myObj.statsMap)
-        # TODO: Write out the results in different ways
-        errFilter = set(PROBLEM_CODE_FORMAT.keys())
-        if options.error_codes != 'All':
-            errFilter = set([int(i) for i in options.error_codes.split()])
-        if options.file == 'generic':
-            print 'Generic problems:'
-            myObj.writeErrors(True, errFilter)
-        elif options.file == 'specific':
-            print 'Specific problems:'
-            myObj.writeErrors(False, errFilter)
-    elif len(args) > 1:
-        parser.error("Too many arguments, I need only one.")
-        return 1
-    return 0
-
-if __name__ == '__main__':
-    multiprocessing.freeze_support()
-    sys.exit(main())