--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpdot/linkcheck.py Fri Apr 23 20:45:58 2010 +0100
@@ -0,0 +1,2336 @@
+# Copyright (c) 2007-2010 Nokia Corporation and/or its subsidiary(-ies) All rights reserved.
+# This component and the accompanying materials are made available under the terms of the License
+# "Eclipse Public License v1.0" which accompanies this distribution,
+# and is available at the URL "http://www.eclipse.org/legal/epl-v10.html".
+#
+# Initial Contributors:
+# Nokia Corporation - initial contribution.
+#
+# Contributors:
+#
+# Description:
+# Checks links in DITA XML and reports issues.
+"""
+Created on 12 Feb 2010
+
+@author: p2ross
+
+Definitions
+===========
+Doctype
+-------
+See: http://www.w3.org/TR/2008/REC-xml-20081126/#dt-root
+Note: this is sometimes called the Doctype because of http://www.w3.org/TR/2008/REC-xml-20081126/#vc-roottype
+
+ID
+--
+The value of the 'id' attribute of an element.
+
+Root ID
+-------
+The value of the 'id' attribute of the root element.
+Note: A development would allow differently named attributes provided that they
+were ID types. See http://www.w3.org/TR/2008/REC-xml-20081126/#sec-attribute-types
+for validity constraints for ID types.
+
+Reference
+---------
+The value of the href attribute of an element.
+
+Map
+---
+An XML file whose root element name is 'map' or ends with 'Map'.
+
+Topic
+-----
+An XML file that is not a Map.
+
+Lonely topic
+------------
+A topic whose root ID is not referenced by any map.
+
+Lonely map
+----------
+A map whose root ID is not referenced by any map.
+
+Map Cycle
+---------
+A sequence of map references whose members are not unique.
+
+"""
+
+import os
+import unittest
+import sys
+import logging
+import pprint
+import fnmatch
+import re
+import urllib
+import time
+from optparse import OptionParser, check_choice
+try:
+ from xml.etree import cElementTree as etree
+except ImportError:
+ from xml.etree import ElementTree as etree
+import urlparse
+import multiprocessing
+# used for DitaFileObj persistence
+import shelve
+
+__version__ = '0.1.5'
+
+class ExceptionLinkCheck(Exception):
+ pass
+
+class CountDict(dict):
+ """Dictionary with a default value of 0 for unknown keys."""
+ def __getitem__(self, key):
+ if key not in self:
+ self[key] = 0
+ return self.get(key)
+
+# Matches stuff like: GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E
+RE_GUID = re.compile(r'GUID-[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}', re.IGNORECASE)
+
+# Of the form {integer_error_code : (format_string, num_args), ...}
+PROBLEM_CODE_FORMAT = {
+ # 'id_syntax'
+ 100 : ('Character \'#\' not allowed in id="%s"', 1),
+ 101 : ('NMTOKEN character \'%s\' not allowed in id="%s"', 2),
+ 102 : ('GUID specification does not match id="%s"', 1),
+ # 'ref_syntax'
+ 200 : ('Multiple \'#\' not allowed in reference "%s"', 1),
+ 201 : ('Reference element "%s" is missing href=... attribute', 1),
+ 202 : ('URL has missing type/format in reference "%s"', 1),
+ 203 : ('GUID specification does not match file reference "%s"', 1),
+ 204 : ('GUID specification does not match fragment reference "%s"', 1),
+ # 'ref'
+ 300 : ('Can not resolve URI "%s"', 1),
+ # 'file'
+ 400 : ('Failed to open: "%s"', 1),
+ 401 : ('Multiple id="%s"', 1),
+ 402 : ('No id attribute on root element', 0),
+ 403 : ('Root ID in cycle: %s', 1),
+ 404 : ('Can not parse: "%s"', 1),
+ 410 : ('Can not resolve reference to file "%s"', 1),
+ 411 : ('Can resolve reference to file "%s" but not to fragment "%s"', 2),
+ 412 : ('Referencing element "%s" does not match target root element "%s"', 2),
+ 413 : ('Referencing element "%s" does not match target element "%s" for id="%s"', 3),
+ 414 : ('topicref element with format="ditamap" does not match target root element "%s"', 1),
+ 415 : ('topicref to <map> does not have format="ditamap" but format="%s"', 1),
+ 416 : ('topicref element type="%s" does not match target root element "%s"', 2),
+ 417 : ('topicref element type="%s" does not match target element "%s" for id="%s"', 3),
+ 418 : ('Unknown referencing element "%s" does not match target root element "%s"', 2),
+ 419 : ('Unknown referencing element "%s" does not match target element "%s" for id="%s"', 3),
+ # 'file_set'
+ 500 : ('Not a directory: %s', 1),
+ 501 : ('Duplicate root id="%s" in files: %s', 2),
+ #502 : ('Can not resolve reference to "%s"', 1),
+ #503 : ('Reference type "%s" does not match target type "%s" for id="%s"', 3),
+ 504 : ('Duplicate file path: "%s"', 1),
+ 505 : ('Duplicate id="%s" in files: %s', 2),
+ # 'topic_set'
+ 600 : ('Topic id="%s" is not referenced by any map', 1),
+ # 'map_set'
+ 700 : ('More than one top level map exists: %s', 1),
+ 701 : ('Maps "%s" are in a a cycle.', 1),
+}
+
+GENERIC_STRING = '...'
+PRINT_WIDTH = 75
+
+def genericStringForErrorCode(ec):
+ assert(PROBLEM_CODE_FORMAT.has_key(ec))
+ f, c = PROBLEM_CODE_FORMAT[ec]
+ if c == 0:
+ return f
+ return f % ((GENERIC_STRING,) * c)
+
+def writeGenericStringsForErrorCodes(s=sys.stdout):
+ s.write(' All Error Codes '.center(PRINT_WIDTH, '='))
+ s.write('\n')
+ s.write('%4s %s\n' % ('Code', 'Error'))
+ s.write('%4s %s\n' % ('----', '-----'))
+ ecS = PROBLEM_CODE_FORMAT.keys()
+ ecS.sort()
+ for ec in ecS:
+ s.write('%4d %s\n' % (ec, genericStringForErrorCode(ec)))
+ s.write('='*PRINT_WIDTH)
+ s.write('\n\n')
+
+def normalisePath(thePath):
+ # TODO: How come this does not work?
+ #return os.path.abspath(thePath)
+ return os.path.abspath(thePath).replace('\\', '/')
+
+FNMATCH_PATTERNS = ['*.xml', '*.dita', '*.ditamap']
+FNMATCH_STRING = ' '.join(FNMATCH_PATTERNS)
+
+# These elements descend from topic/xref so can be treated as referencing elements
+XREF_DESCENDENTS = set(
+ (
+ # From the api specialisation
+ 'apiRelation',
+ 'apiBaseClassifier',
+ 'apiOtherClassifier',
+ 'apiOperationClassifier',
+ 'apiValueClassifier',
+ # From the C++ specialisation
+ 'cxxfile',
+ 'cxxclass',
+ 'cxxstruct',
+ 'cxxunion',
+ 'cxxfunction',
+ 'cxxdefine',
+ 'cxxtypedef',
+ 'cxxvariable',
+ 'cxxenumeration',
+ 'cxxClassBaseClass',
+ 'cxxClassBaseStruct',
+ 'cxxClassBaseUnion',
+ 'cxxClassNestedClass',
+ 'cxxClassNestedStruct',
+ 'cxxClassNestedUnion',
+ 'cxxClassEnumerationInherited',
+ 'cxxClassEnumeratorInherited',
+ 'cxxClassFunctionInherited',
+ 'cxxClassVariableInherited',
+ 'cxxDefineReimplemented',
+ 'cxxEnumerationReimplemented',
+ 'cxxFunctionReimplemented',
+ 'cxxStructBaseClass',
+ 'cxxStructBaseStruct',
+ 'cxxStructBaseUnion',
+ 'cxxStructNestedClass',
+ 'cxxStructNestedStruct',
+ 'cxxStructNestedUnion',
+ 'cxxStructEnumerationInherited',
+ 'cxxStructEnumeratorInherited',
+ 'cxxStructFunctionInherited',
+ 'cxxStructVariableInherited',
+ 'cxxTypedefReimplemented',
+ 'cxxUnionBaseClass',
+ 'cxxUnionBaseStruct',
+ 'cxxUnionBaseUnion',
+ 'cxxUnionNestedClass',
+ 'cxxUnionNestedStruct',
+ 'cxxUnionNestedUnion',
+ 'cxxUnionEnumerationInherited',
+ 'cxxUnionFunctionInherited',
+ 'cxxUnionVariableInherited',
+ 'cxxVariableReimplemented',
+ )
+)
+
+class UrlAccessCache(object):
+ def __init__(self):
+ # {URL : True/False, ...}
+ self._cache = {}
+
+ def clear(self):
+ self._cache = {}
+
+ def canAccess(self, theUrl):
+ if not self._cache.has_key(theUrl):
+ try:
+ u = urllib.urlopen(theUrl)#, data, proxies)
+ u.read()
+ self._cache[theUrl] = True
+ logging.debug('URL: %s for %s' % (True, theUrl))
+ except IOError:
+ self._cache[theUrl] = False
+ logging.debug('URL: %s for %s' % (False, theUrl))
+ return self._cache[theUrl]
+
+GlobalUrlCache = UrlAccessCache()
+
+class DitaLinkCheckBase(object):
+ """Base class that holds some common functionality."""
+ def __init__(self, theIdentity):#=None):
+ self.__identity = theIdentity
+ # Set of error strings, lazily evaluated
+ self._errS = None
+
+ @property
+ def identity(self):
+ return self.__identity
+
+ def __cmp__(self, other):
+ assert(self.identity is not None)
+ assert(other.identity is not None)
+ return cmp(self.identity, other.identity)
+
+ def __eq__(self, other):
+ assert(self.identity is not None)
+ assert(other.identity is not None)
+ return self.identity == other.identity
+
+ def __hash__(self):
+ assert(self.identity is not None)
+ return hash(self.identity)
+
+ def __str__(self):
+ return str(self.__identity)
+
+ def debugDump(self, s=sys.stdout, prefix=''):
+ """Dump of IR for debug purposes."""
+ raise NotImplementedError
+
+ def addError(self, errCode, argTuple):
+ assert(errCode in PROBLEM_CODE_FORMAT.keys()), 'No error code: %s' % errCode
+ assert(PROBLEM_CODE_FORMAT[errCode][1] == len(argTuple)), \
+ 'Length missmatch for error code %d: %d != %d for %s' \
+ % (errCode, PROBLEM_CODE_FORMAT[errCode][1], len(argTuple), str(argTuple))
+ if self._errS is None:
+ self._errS = {}
+ try:
+ self._errS[errCode].add(argTuple)
+ except KeyError:
+ self._errS[errCode] = set((argTuple,))
+
+ def errStrings(self, generic, theFilter):
+ """Return a sorted list of error messages without duplicates."""
+ if self._errS is not None:
+ mySet = set()
+ for ec in self._errS.keys():
+ if theFilter is None or ec in theFilter:
+ assert(ec in PROBLEM_CODE_FORMAT.keys())
+ for tu in self._errS[ec]:
+ if generic:
+ mySet.add(genericStringForErrorCode(ec))
+ else:
+ f, c = PROBLEM_CODE_FORMAT[ec]
+ assert(len(tu) == c)
+ mySet.add(f % tu)
+ l = list(mySet)
+ l.sort()
+ return l
+ return []
+
+ def updateErrorCount(self, theMap):
+ """Updates a map of {error_code, : count, ...}.
+ Overridden for file and file set."""
+ if self._errS is not None:
+ for e in self._errS.keys():
+ theMap[e] += len(self._errS[e])
+
+ def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout):
+ """Can be overridden in child classes to recurse into
+ their data structures."""
+ theStream.write('\n'.join(self.errStrings(isGeneric, theFilter)))
+
+class DitaId(DitaLinkCheckBase):
+ """Represents a node with an id."""
+ def __init__(self, theN):
+ assert(theN.get('id', None) is not None)
+ super(DitaId, self).__init__(theN.get('id', None))
+ self._elem = theN.tag
+ if '#' in self.id:
+ self.addError(100, (self.id,))
+ # TODO: NMTOKENS
+
+ @property
+ def elem(self):
+ return self._elem
+
+ @property
+ def id(self):
+ return self.identity
+
+ def checkGuid(self):
+ """optionally applies additional checks for GUID requirements."""
+ if RE_GUID.match(self.id) is None:
+ self.addError(102, (self.id,))
+
+ def debugDump(self, s=sys.stdout, prefix=''):
+ """Dump of IR for debug purposes."""
+ s.write('%sID: <%s id="%s" />\n' % (prefix, self.elem, self.id))
+
+class DitaRef(DitaLinkCheckBase):
+ """Represents a reference node."""
+ def __init__(self, theN):
+ self._elem = theN.tag
+ self._href = theN.get('href', None)
+ super(DitaRef, self).__init__('%s %s' % (self._elem, self._href))
+ # This is used when figuring out of the target is the correct element
+ # e.g. in Vanilla DITA
+ # <topicref href="batcaring.dita" type="task"></topicref>
+ self._refType = theN.get('type', None)
+ # Format attribute, this can be format="ditamap"
+ self._format = theN.get('format', None)
+ if self._href is None:
+ self.addError(201, (self._elem,))
+ self._url = None
+ else:
+ self._url = urlparse.urlparse(self._href)
+ if '#' in self._url.fragment:
+ self.addError(200, (self._href,))
+
+ @property
+ def elem(self):
+ return self._elem
+
+ @property
+ def href(self):
+ """The value of the href attribute."""
+ return self._href
+
+ @property
+ def refType(self):
+ """The value of the type attribute."""
+ return self._refType
+
+ @property
+ def format(self):
+ """The value of the format attribute."""
+ return self._format
+
+ @property
+ def path(self):
+ """The value of the path part of the href attribute."""
+ return self._url.path
+
+ @property
+ def fragment(self):
+ """The value of the fragment part of the href attribute."""
+ return self._url.fragment
+
+ @property
+ def scheme(self):
+ """The URI scheme e.g. 'http' or '' if no scheme."""
+ return self._url.scheme
+
+ def fileFragment(self, theRefFile):
+ """The absolute path of the file and the fragment identifier or (None, None)."""
+ if self.scheme not in ('', 'file'):
+ return (None, None)
+ if len(self.path) == 0:
+ myPath = theRefFile
+ else:
+ myPath = os.path.join(os.path.dirname(theRefFile), self.path)
+ return normalisePath(myPath), self.fragment
+
+ def checkGuid(self):
+ """optionally applies additional checks for GUID requirements."""
+ if RE_GUID.match(self.path) is None:
+ self.addError(203, (self.path,))
+ if RE_GUID.match(self.fragment) is None:
+ self.addError(204, (self.fragment,))
+
+ def checkUrl(self):
+ if self.scheme:
+ myU = urlparse.urlunparse(self._url)
+ if not GlobalUrlCache.canAccess(myU):
+ self.addError(300, (myU,))
+
+ def debugDump(self, s=sys.stdout, prefix=''):
+ """Dump of IR for debug purposes."""
+ s.write('%sREF: <%s href="%s" />\n' % (prefix, self.elem, self._href))
+
+class DitaFileObj(DitaLinkCheckBase):
+ """Base class for a DITA topic or map."""
+ def __init__(self, theFileObj, theFileName=None):
+ """Initialiser with a file object and a file path"""
+ #print '\nDitaFileObj(%s, %s)' % (theFileObj, theFileName)
+ if theFileName is not None:
+ super(DitaFileObj, self).__init__(normalisePath(theFileName))
+ elif theFileObj is not None:
+ super(DitaFileObj, self).__init__(theFileObj.name)
+ else:
+ super(DitaFileObj, self).__init__(None)
+ self._rootId = None
+ self._doctype = None
+ # Sets of class DitaId
+ self._idS = set()
+ self._dupeIdS = set()
+ # Set of class DitaRef
+ self._xrefS = set()
+ # Ouptut control
+ self._hasWritten = False
+ # Size of input
+ try:
+ self._bytes = os.path.getsize(theFileName)
+ except Exception:
+ # Try as if a StringIO
+ try:
+ self._bytes = theFileObj.len
+ except AttributeError:
+ # Give up
+ self._bytes = 0
+ # Process the file object
+ if theFileObj is not None:
+ try:
+ # TODO: use iterparse?
+ theTree = etree.parse(theFileObj)
+ except SyntaxError, err:
+ self.addError(404, (str(err),))
+ else:
+ # Walk the tree
+ for i, e in enumerate(theTree.getiterator()):
+ #print 'TRACE: e', e
+ # Element [0] is the root element
+ if i == 0:
+ assert(self._rootId is None)
+ assert(self._doctype is None)
+ self._doctype = e.tag
+ if e.get('id', None) is not None:
+ self._rootId = DitaId(e)
+ self._addId(self._rootId)
+ else:
+ self.addError(402, ())
+ else:
+ # NOTE: Elements with id attributes can also have href
+ # attributes. For example a <topicref> in a <bookmap>
+ # Thus these tests are not exclusive
+ if e.get('id', None) is not None:
+ self._addId(DitaId(e))
+ if e.get('href', None) is not None:
+ # TODO: Do we limit ourselves to only a certain set of elements?
+ self._xrefS.add(DitaRef(e))
+ else:
+ self.addError(400, (self.identity,))
+
+ def _addId(self, theId):
+ #print 'TRACE: adding %s' % theId
+ #print 'TRACE: self._idS %s' % self._idS
+ if theId in self._idS:
+ # Remove from self._idS
+ #print 'TRACE: removing %s' % theId
+ self._idS.remove(theId)
+ self._dupeIdS.add(theId)
+ self.addError(401, (theId.identity,))
+ elif theId not in self._dupeIdS:
+ self._idS.add(theId)
+
+ @property
+ def bytes(self):
+ return self._bytes
+
+ @property
+ def doctype(self):
+ return self._doctype
+
+ @property
+ def rootId(self):
+ if self._rootId is not None:
+ return self._rootId.id
+
+ @property
+ def isMap(self):
+ return self.doctype == "map" \
+ or self.doctype == 'bookmap' \
+ or (self.doctype is not None and self.doctype.endswith('Map'))
+
+ @property
+ def idS(self):
+ """The set of IDs."""
+ return self._idS
+
+ @property
+ def refS(self):
+ """The set of DitaRef objects."""
+ return self._xrefS
+
+ def idElemMap(self):
+ """Returns a map {id : elem name, ...}."""
+ retVal = {}
+ for anId in self._idS:
+ retVal[anId.id] = anId.elem
+ return retVal
+
+ def hasId(self, theString):
+ for anId in self._idS:
+ if theString == anId.id:
+ return True
+ return False
+
+ def idElem(self, theString):
+ for anId in self._idS:
+ if theString == anId.id:
+ return anId.elem
+ return None
+
+ def idObj(self, theString):
+ for anId in self._idS:
+ if theString == anId.id:
+ return anId
+ return None
+
+ def updateErrorCount(self, theMap):
+ """Updates a map of {error_code, : count, ...}."""
+ if self._errS is not None:
+ for e in self._errS.keys():
+ theMap[e] += len(self._errS[e])
+ for idObj in self.idS:
+ idObj.updateErrorCount(theMap)
+ for refObj in self.refS:
+ refObj.updateErrorCount(theMap)
+
+ def writeErrorList(self, theList, theSubHead='', theS=sys.stdout):
+ if len(theList) > 0:
+ theList.sort()
+ if not self._hasWritten:
+ theS.write('File: %s\n' % self.identity)
+ self._hasWritten = True
+ if len(theSubHead) > 0:
+ theS.write('%s [%d]:\n' % (theSubHead, len(theList)))
+ theS.write('\n'.join(theList))
+ theS.write('\n')
+
+ def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout):
+ """Writes out errors for me, my IDs and my Refs."""
+ self._hasWritten = False
+ self.writeErrorList(self.errStrings(isGeneric, theFilter), 'File errors:', theStream)
+#===============================================================================
+# # Duplicate IDs
+# myList = (list(self._dupeIdS))
+# if len(myList):
+# self.writeErrorList(
+# [i.identity for i in myList],
+# 'Duplicate ID',
+# theStream)
+#===============================================================================
+ # Now IDs
+ myList = (list(self.idS))
+ myList.sort()
+ for anId in myList:
+ self.writeErrorList(anId.errStrings(isGeneric, theFilter), 'ID=%s' % anId.identity, theStream)
+ # Now Refs
+ myList = (list(self._xrefS))
+ myList.sort()
+ for anId in myList:
+ self.writeErrorList(anId.errStrings(isGeneric, theFilter), 'Ref=%s' % anId.identity, theStream)
+ if self._hasWritten:
+ theStream.write('\n')
+
+ def debugDump(self, s=sys.stdout, prefix=''):
+ """Dump of IR for debug purposes."""
+ s.write('%sFile: %s\n' % (prefix, self.identity))
+ for anId in self._idS:
+ anId.debugDump(s, prefix=prefix+' ')
+ for aRef in self._xrefS:
+ aRef.debugDump(s, prefix=prefix+' ')
+
+class DitaFilePath(DitaFileObj):
+ """Base class for a DITA topic or map from the file system."""
+ def __init__(self, theFilePath):
+ """Initialiser with a file path"""
+ try:
+ f = open(theFilePath)
+ except IOError:
+ f = None
+ #print 'DitaFilePath(%s)' % theFilePath
+ super(DitaFilePath, self).__init__(f, theFilePath)
+ if f is None:
+ self.addError(400, (theFilePath,))
+
+
+class DitaFileMapBase(object):
+ """Base class for holding a map of {file path : class DitaFile, ...}
+ Actual implementation can be in-memory or via a database e.g. the
+ shelve module."""
+ def keys(self):
+ """Returns an unsorted list of keys in the map."""
+ raise NotImplementedError()
+
+ def has_key(self, thePath):
+ """Return True if the key exists."""
+ raise NotImplementedError()
+
+ def remove(self, thePath):
+ """Remove the entry corresponding to thePath, may raise KeyError."""
+ raise NotImplementedError()
+
+ def getDitaFileObj(self, thePath):
+ """Return a DitaFileObj that corresponds to thePath, may raise KeyError."""
+ raise NotImplementedError()
+
+ def setDitaFileObj(self, thePath, theObj):
+ """Load a DitaFileObj or update a mutated DitaFileObj."""
+ raise NotImplementedError()
+
+class DitaFileMapInMemory(DitaFileMapBase):
+ """Holds map of {file path : class DitaFile, ...} in memory."""
+ def __init__(self):
+ # Map of {file path : class DitaFile, ...}
+ self._fileMap = {}
+
+ def keys(self):
+ """Returns an unsorted list of keys in the map."""
+ return self._fileMap.keys()
+
+ def has_key(self, thePath):
+ """Return True if the key exists."""
+ return self._fileMap.has_key(thePath)
+
+ def remove(self, thePath):
+ """Remove the entry corresponding to thePath, may raise KeyError."""
+ del self._fileMap[thePath]
+
+ def getDitaFileObj(self, thePath):
+ """Return a DitaFileObj that corresponds to thePath, may raise KeyError."""
+ return self._fileMap[thePath]
+
+ def setDitaFileObj(self, thePath, theObj):
+ """Load a DitaFileObj or update a mutated DitaFileObj."""
+ self._fileMap[thePath] = theObj
+
+class DitaFileMapShelve(DitaFileMapBase):
+ """Holds map of {file path : class DitaFile, ...} in a shelve database."""
+ DBASE_FILENAME = 'linkchecker.dbase'
+ def __init__(self):
+ if os.path.exists(self.DBASE_FILENAME):
+ os.remove(self.DBASE_FILENAME)
+ self._db = shelve.open(self.DBASE_FILENAME)
+ # Use this as a 'cache' as shelf.keys() is slow
+ self._keys = set()
+
+ def keys(self):
+ """Returns an unsorted list of keys in the map."""
+ return list(self._keys)
+
+ def has_key(self, thePath):
+ """Return True if the key exists."""
+ return thePath in self._keys
+
+ def remove(self, thePath):
+ """Remove the entry corresponding to thePath, may raise KeyError."""
+ del self._db[thePath]
+ self._keys.remove(thePath)
+
+ def getDitaFileObj(self, thePath):
+ """Return a DitaFileObj that corresponds to thePath, may raise KeyError."""
+ return self._db[thePath]
+
+ def setDitaFileObj(self, thePath, theObj):
+ """Load a DitaFileObj or update a mutated DitaFileObj."""
+ self._db[thePath] = theObj
+ self._keys.add(thePath)
+
+class DitaFileSet(DitaLinkCheckBase):
+ """Holds information about a set of DITA files."""
+ STATS_KEYS = ('Maps', 'Non-maps', 'Files', 'Bytes', 'IDs', 'Refs')
+ def __init__(self,
+ theDir,
+ procDir=True,
+ thePatterns=None,
+ recursive=False,
+ testExt=False,
+ useDbase=False):
+ """Constructor. theDir is the root directory of DITA XML.
+ procDir - If True then process this directory immediately, otherwise
+ the directory can be processed independently and
+ _addFileObj() or _addDitaFileObj() invoked.
+ thePatterns - If supplied this should be a space separated string of
+ fnmatch extensions.
+ recursive - If True and procDir True the directory is processed recursively.
+ testExt - If True then test external URLs.
+ useDbase - If True then store all DitaFile objects in an external dbase
+ (slower but less memory issues).
+ """
+ if thePatterns is None:
+ thePatterns = FNMATCH_STRING.split(' ')
+ if theDir is not None:
+ theDir = normalisePath(theDir)
+ super(DitaFileSet, self).__init__(theDir)
+ logging.info('DitaFileSet starting to read...')
+ GlobalUrlCache.clear()
+ self._testExt = testExt
+ # Set up how we store the DitaFile objects
+ if useDbase:
+ self._fileMap = DitaFileMapShelve()
+ else:
+ self._fileMap = DitaFileMapInMemory()
+ # Map of (str(rootId) : filepath, ...) with no duplicates
+ # Keys will be in self._uniqueRootIds
+ self._rootIdToFilePathMap = {}
+ # Path to the unique DITA map
+ self._uniqueMapPath = None
+ # Count of {error_code : count, ...}
+ self._errCountMap = CountDict()
+ # Statistics
+ self._statsMap = CountDict()
+ ## and initialise
+ #for k in self.STATS_KEYS:
+ # self._statsMap[k]
+ # Finalisation control (weak)
+ self._hasFinalised = False
+ # Timers
+ self._timeRead = time.clock()
+ self._timeAnalyse = 0.0
+ if procDir:
+ if theDir is not None and os.path.isdir(theDir):
+ self._readDir(theDir, thePatterns, recursive)
+ else:
+ self.addError(500, (theDir,))
+ # Finalise and run all the tests
+ self.finalise()
+
+ @property
+ def errCountMap(self):
+ return self._errCountMap
+
+ @property
+ def statsMap(self):
+ return self._statsMap
+
+ def writeStatistics(self, s=sys.stdout):
+ """Writes out read statistics."""
+ s.write(' Statistics '.center(PRINT_WIDTH, '='))
+ s.write('\n')
+ if len(self._statsMap) > 0:
+ o = self.STATS_KEYS
+ #assert(set(o) == set(self._statsMap.keys())), \
+ # '%s != %s' % (o, self._statsMap.keys())
+ for k in o:
+ try:
+ m = self._statsMap[k] / (1024.0*1024.0)
+ s.write('%20s: %10d [%10.3f M]\n' % (k, self._statsMap[k], m))
+ except KeyError:
+ s.write('%20s: %10s \n' % (k, 'Not seen'))
+ s.write('%20s: %10.3f (s)\n' % ('Read time', self._timeRead))
+ s.write('%20s: %10.3f (s)\n' % ('Analysis time', self._timeAnalyse))
+ s.write('='*PRINT_WIDTH)
+ else:
+ s.write('Nothing processed.')
+ s.write('\n')
+
+ def writeErrorSummary(self, s=sys.stdout):
+ s.write(' Error Summary '.center(PRINT_WIDTH, '='))
+ s.write('\n')
+ if len(self._errCountMap):
+ s.write('%4s %10s %s\n' % ('Code', 'Count', 'Error'))
+ s.write('%4s %10s %s\n' % ('----', '-----', '-----'))
+ errCodeS = self._errCountMap.keys()
+ errCodeS.sort()
+ for c in errCodeS:
+ s.write('%4d %10d %s\n' \
+ % (c, self._errCountMap[c], genericStringForErrorCode(c)))
+ else:
+ s.write('No errors\n')
+ s.write('='*PRINT_WIDTH)
+ s.write('\n')
+
+ def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout):
+ """Writes out errors for me and my files."""
+ theStream.write('\n'.join(self.errStrings(isGeneric, theFilter)))
+ fileS = self._fileMap.keys()
+ fileS.sort()
+ for aFile in fileS:
+ # Immutable call so just use get
+ self._fileMap.getDitaFileObj(aFile).writeErrors(isGeneric, theFilter, theStream)
+
+ def allErrStrings(self, isGeneric, theFilter):
+ """Return a sorted list of error messages without duplicates including
+ files."""
+ retSet = set(self.errStrings(isGeneric, theFilter))
+ fileS = self._fileMap.keys()
+ fileS.sort()
+ for aFilePath in self._fileMap.keys():
+ # Immutable call so just use get
+ for anErr in self._fileMap.getDitaFileObj(aFilePath).errStrings(isGeneric, theFilter):
+ retSet.add(anErr)
+ retList = list(retSet)
+ retList.sort()
+ return retList
+
+ def _readDir(self, theDir, thePatS, recursive):
+ assert(os.path.isdir(theDir))
+ for aName in os.listdir(theDir):
+ aPath = os.path.join(theDir, aName)
+ if os.path.isdir(aPath) and recursive:
+ self._readDir(aPath, thePatS, recursive)
+ elif os.path.isfile(aPath):
+ for aPat in thePatS:
+ if fnmatch.fnmatch(aName, aPat):
+ assert(not self._fileMap.has_key(aPath))
+ logging.debug(' Reading %s' % aPath)
+ try:
+ f = open(aPath)
+ except IOError:
+ f = None
+ self._addFileObj(f, aPath)
+ break
+
+ def _addFileObj(self, theFileObj, theFilePath):
+ myObj = DitaFileObj(theFileObj, theFilePath)
+ self._addDitaFileObj(myObj)
+
+ def _addDitaFileObj(self, theDitaFileObj):
+ if self._fileMap.has_key(theDitaFileObj.identity):
+ self.addError(504, (theDitaFileObj.identity,))
+ else:
+ # Mutable call so use set
+ self._fileMap.setDitaFileObj(theDitaFileObj.identity, theDitaFileObj)
+ # Update statistics (files, bytes, ids, refs) etc.
+ self._statsMap['Files'] += 1
+ self._statsMap['Bytes'] += theDitaFileObj.bytes
+ self._statsMap['IDs'] += len(theDitaFileObj.idS)
+ self._statsMap['Refs'] += len(theDitaFileObj.refS)
+ if theDitaFileObj.isMap:
+ self._statsMap['Maps'] += 1
+ else:
+ self._statsMap['Non-maps'] += 1
+
+ def finalise(self):
+ """Creates the environment for all checks and then runs them."""
+ logging.info('DitaFileSet.finalise() start...')
+ if not self._hasFinalised:
+ self._timeRead = time.clock() - self._timeRead
+ self._timeAnalyse = time.clock()
+ self._initRootIdToFilePathMap()
+ self._checkDupeIdS()
+ self._setMapCycles()
+ self._checkLonely()
+ self._checkRefArcs()
+ self._errCountMap = CountDict()
+ self.updateErrorCount(self._errCountMap)
+ self._hasFinalised = True
+ self._timeAnalyse = time.clock() - self._timeAnalyse
+ logging.info('DitaFileSet.finalise() done.')
+
+ def _initRootIdToFilePathMap(self):
+ # Map of (str(rootId) : filepath, ...) with no duplicates
+ self._rootIdToFilePathMap = {}
+ # Temporary map of (str(rootId) : [filepath, ...], ...)
+ myDupeIdFiles = {}
+ for fPath in self._fileMap.keys():
+ # fObj is not written to so we don't need to use set
+ fObj = self._fileMap.getDitaFileObj(fPath)
+ #print 'TRACE: _initRootIdToFilePathMap() fPath:', fPath
+ rId = fObj.rootId
+ if rId is not None:
+ if myDupeIdFiles.has_key(rId):
+ #print 'TRACE: _initRootIdToFilePathMap() another dupe:', fPath
+ myDupeIdFiles[rId].append(fObj.identity)
+ elif self._rootIdToFilePathMap.has_key(rId):
+ #print 'TRACE: _initRootIdToFilePathMap() first dupe:', fPath
+ # Remove from map and add to myDupeIdFiles
+ myFile = self._rootIdToFilePathMap.pop(rId)
+ try:
+ myDupeIdFiles[rId].append(myFile)
+ except KeyError:
+ myDupeIdFiles[rId] = [myFile,]
+ myDupeIdFiles[rId].append(fPath)
+ else:
+ #print 'TRACE: _initRootIdToFilePathMap() adding:', fPath
+ self._rootIdToFilePathMap[rId] = fObj.identity
+ # Set duplicate errors
+ for k in myDupeIdFiles.keys():
+ myDupeIdFiles[k].sort()
+ self.addError(501, (k, tuple(myDupeIdFiles[k])))
+ #self.addError(501, (k, str([str(a) for a in myDupeIdFiles[k]])))
+
+ def _checkDupeIdS(self):
+ """Checks if there are any duplicate IDs anywhere."""
+ # {ID : [fileS, ...], ...}
+ myDupeIdMap = {}
+ # Temporary data structure
+ # {ID : first file ID is seen in, ...}
+ seenIdMap = {}
+ for f in self._fileMap.keys():
+ # o is not written to so we don't need set...
+ o = self._fileMap.getDitaFileObj(f)
+ for anId in o.idS:
+ if seenIdMap.has_key(anId):
+ try:
+ myDupeIdMap[anId].append(f)
+ except KeyError:
+ myDupeIdMap[anId] = [seenIdMap[anId],]
+ myDupeIdMap[anId].append(f)
+ else:
+ seenIdMap[anId] = f
+ # Now add to errs as a 505 error message
+ # Sort the files in the map
+ for k in myDupeIdMap.keys():
+ myDupeIdMap[k].sort()
+ self.addError(505, (k, tuple(myDupeIdMap[k])))
+ #self.addError(505, (k, str([str(a) for a in myDupeIdMap[k]])))
+
+ def _retMapAdjList(self):
+ """Create an adjacency list {file_path : set(refs), ...} (all strings)"""
+ adjList = {}
+ for f in self._fileMap.keys():
+ fObj = self._fileMap.getDitaFileObj(f)
+ if fObj.isMap:# and fObj.rootId is not None:
+ assert(fObj.identity not in adjList.keys())
+ refSet = set()
+ for r in fObj.refS:
+ refSet.add(r.fileFragment(fObj.identity)[0])
+ adjList[fObj.identity] = refSet
+ return adjList
+
+ def _setMapCycles(self):
+ """Sets any cyclic references seen in DITA maps."""
+ adjList = self._retMapAdjList()
+ # A branch
+ myBr = []
+ myCycles = set()
+ for aPath, aSet in adjList.items():
+ myBr.append(aPath)
+ self._recurseCycles(adjList, myBr, myCycles)
+ myBr.pop()
+ self._setCycleErrors(myCycles)
+
+ def _recurseCycles(self, a, b, c):
+ assert(len(b) > 0)
+ try:
+ myPath = b[-1]
+ for r in a[myPath]:
+ #print '_recurseCycles() testing r', r
+ #print '_recurseCycles() testing b', b
+ if r in b:
+ #print 'Adding cycle', tuple(b[b.index(r):])
+ c.add(tuple(b[b.index(r):]))
+ else:
+ b.append(r)
+ self._recurseCycles(a, b, c)
+ b.pop()
+ except KeyError:
+ pass
+
+ def _setCycleErrors(self, theC):
+ for aT in theC:
+ self.addError(701, (str(aT),))
+ myL = list(aT)
+ assert(len(myL) > 0)
+ i = 0
+ while i < len(myL):
+ myL.append(myL[0])
+ # Should this be in the file thus, or in the files set?
+ # As we are mutating the file object we need to use both
+ # getDitaFileObj() and setDitaFileObj()
+ fObj = self._fileMap.getDitaFileObj(myL[0])
+ fObj.addError(701, (str(myL),))
+ self._fileMap.setDitaFileObj(myL[0], fObj)
+ myL.pop()
+ myL.append(myL.pop(0))
+ i += 1
+
+ def _checkLonely(self):
+ self._checkLonelyMaps()
+ self._checkLonelyTopics()
+
+ def _checkLonelyMaps(self):
+ """Checks for lonely maps."""
+ mapPathSet = set()
+ pathSetRemain = set()
+ for f in self._fileMap.keys():
+ if self._fileMap.getDitaFileObj(f).isMap:
+ mapPathSet.add(f)
+ pathSetRemain.add(f)
+ for aPath in mapPathSet:
+ myMapObj = self._fileMap.getDitaFileObj(aPath)
+ for r in myMapObj.refS:
+ refFile, frag = r.fileFragment(f)
+ try:
+ pathSetRemain.remove(refFile)
+ except KeyError:
+ # refFile is a topic or an already seen map
+ pass
+ if len(pathSetRemain) > 1:
+ for aPath in pathSetRemain:
+ self.addError(700, (aPath,))
+ elif len(pathSetRemain) == 1:
+ self._uniqueMapPath = pathSetRemain.pop()
+
+ def _checkLonelyTopics(self):
+ """Checks for topics that are not referenced by any map."""
+ mapPathSet = set()
+ pathSetRemain = set()
+ for f in self._fileMap.keys():
+ #print 'TRACE: f:', f
+ if self._fileMap.getDitaFileObj(f).isMap:
+ mapPathSet.add(f)
+ else:
+ pathSetRemain.add(f)
+ #print 'TRACE: mapPathSet', mapPathSet
+ #print 'TRACE: pathSetRemain', pathSetRemain
+ for aMapPath in mapPathSet:
+ myMapObj = self._fileMap.getDitaFileObj(aMapPath)
+ for r in myMapObj.refS:
+ refFile, frag = r.fileFragment(aMapPath)
+ #print 'TRACE: removing:', refFile
+ try:
+ pathSetRemain.remove(refFile)
+ except KeyError:
+ # topic has already been seen in another map
+ pass
+ if len(pathSetRemain) > 0:
+ for aPath in pathSetRemain:
+ self.addError(600, (aPath,))
+
+ def _checkRefArcs(self):
+ """Checks all references are reachable."""
+ for fPath in self._fileMap.keys():
+ fObjSrc = self._fileMap.getDitaFileObj(fPath)
+ hasMutated = False
+ for rObjSrc in fObjSrc.refS:
+ if rObjSrc.scheme:
+ # Decide whether to test and external URL
+ if self._testExt:
+ rObjSrc.checkUrl()
+ else:
+ fi, fr = rObjSrc.fileFragment(fPath)
+ assert(fi is not None), 'fi is None for rObjSrc: %s in file: %s' % (rObjSrc, fPath)
+ assert(fr is not None), 'fr is None for rObjSrc: %s in file: %s' % (rObjSrc, fPath)
+ ## If a url then fileFragment() returns (None, None)
+ #if fi is None:
+ # print 'fPath', fPath
+ # print 'rObjSrc', rObjSrc
+ # print 'fi', fi
+ # print 'fr', fr
+ try:
+ fObjTgt = self._fileMap.getDitaFileObj(fi)
+ except KeyError:
+ # Target file can not be found in the IR
+ # check the file system to see if it is a non-DITA resource
+ if not os.path.isfile(fi):
+ #print 'TRACE: adding 410 to', fObj.identity
+ fObjSrc.addError(410, (fi,))
+ hasMutated = True
+ else:
+ if len(fr) > 0:
+ # Target file is found, test fragment
+ if not fObjTgt.hasId(fr):
+ # Fragment not found
+ fObjSrc.addError(411, (fi, fr))
+ hasMutated = True
+ if self._checkRefArcElemName(fObjSrc, rObjSrc, fObjTgt, fr):
+ hasMutated = True
+ if hasMutated:
+ self._fileMap.setDitaFileObj(fPath, fObjSrc)
+
+ def _checkRefArcElemName(self, fObjSrc, rObjSrc, fObjTgt, frag):
+ """Test source and target element names
+ e.g. Source <cxxClassRef> should match target <cxxClass>
+ And in vanilla DITA:
+ <topicref href="batcaring.dita" type="task"></topicref>
+ or:
+ <topicref href="batcaring.dita" format="ditamap"></topicref>
+ Should match target element <task>."""
+ isRootTgt = False
+ hasMutated = False
+ if len(frag) == 0:
+ # iObjTgt is the root element of fObjTgt
+ if fObjTgt.rootId is None or fObjTgt.idElem(fObjTgt.rootId) is None:
+ # Covered by other error codes
+ return
+ iObjTgt = fObjTgt.idObj(fObjTgt.rootId)
+ isRootTgt = True
+ elif fObjTgt.hasId(frag):
+ iObjTgt = fObjTgt.idObj(frag)
+ else:
+ # frag not found that will be a 411 error (handled by caller).
+ return
+ # Have an rObjSrc + iObjTgt so check elements
+ # First case:
+ if rObjSrc.elem.endswith('Ref'):
+ if rObjSrc.elem[:-3] != iObjTgt.elem:
+ if isRootTgt:
+ fObjSrc.addError(412, (rObjSrc.elem, iObjTgt.elem))
+ else:
+ fObjSrc.addError(413, (fObjTgt.idElem(frag), rObjSrc.elem, frag))
+ hasMutated = True
+ # Second case(s) for vanilla DITA
+ elif rObjSrc.elem == 'topicref':
+ # Check DITA map links
+ if rObjSrc.format == 'ditamap' and iObjTgt.elem != 'map':
+ # Target must be a root element (actually we don't care)
+ fObjSrc.addError(414, (iObjTgt.elem,))
+ hasMutated = True
+ elif iObjTgt.elem == 'map' and rObjSrc.format != 'ditamap':
+ fObjSrc.addError(415, (rObjSrc.format,))
+ hasMutated = True
+ elif not (rObjSrc.format == 'ditamap' and iObjTgt.elem == 'map'):
+ # Treat refType None as type="topic", see DITA standard for <topicref>
+ # Well, also look at the type attribute in chapter 25
+ # "When the type attribute is unspecified, it should be
+ # determined by inspecting the target if possible. If the
+ # target cannot be inspected for some reason, the value
+ # should default to "topic".
+ # Note: DITA 1.2 takes a different view...
+ # Was:
+ #if (rObjSrc.refType is None and iObjTgt.elem != 'topic') \
+ #or (rObjSrc.refType is not None and rObjSrc.refType != iObjTgt.elem):
+ if rObjSrc.refType is not None and rObjSrc.refType != iObjTgt.elem:
+ if isRootTgt:
+ fObjSrc.addError(416, (rObjSrc.refType, iObjTgt.elem,))
+ hasMutated = True
+ else:
+ fObjSrc.addError(417, (rObjSrc.refType, iObjTgt.elem, frag,))
+ hasMutated = True
+ # Otherwise topicref looks OK
+ elif rObjSrc.elem != 'xref' and rObjSrc.elem not in XREF_DESCENDENTS:
+ # Unknown referencing element
+ if isRootTgt:
+ fObjSrc.addError(418, (rObjSrc.elem, fObjTgt.doctype))
+ hasMutated = True
+ else:
+ fObjSrc.addError(419, (rObjSrc.elem, fObjTgt.idElem(frag), frag))
+ hasMutated = True
+ return hasMutated
+
+ def updateErrorCount(self, theMap):
+ """Updates a map of {error_code, : count, ...}."""
+ if self._errS is not None:
+ for e in self._errS.keys():
+ theMap[e] += len(self._errS[e])
+ for fPath in self._fileMap.keys():
+ fObj = self._fileMap.getDitaFileObj(fPath)
+ # Mutable call so need to update
+ fObj.updateErrorCount(theMap)
+ self._fileMap.setDitaFileObj(fPath, fObj)
+
+ def debugDump(self, s=sys.stdout, prefix=''):
+ """Dump of IR for debug purposes."""
+ s.write(' Debug Dump '.center(PRINT_WIDTH, '+'))
+ s.write('\n')
+ fileS = self._fileMap.keys()
+ fileS.sort()
+ for f in fileS:
+ self._fileMap.getDitaFileObj(f).debugDump(s, prefix)
+ s.write(' END Debug Dump '.center(PRINT_WIDTH, '+'))
+ s.write('\n\n')
+
+#####################################
+# Multiprocessing code
+#####################################
+def retDitaFileObj(thePath):
+ return DitaFilePath(thePath)
+
+def genDitaPath(theDir, thePatS, recursive):
+ assert(os.path.isdir(theDir))
+ for aName in os.listdir(theDir):
+ aPath = os.path.join(theDir, aName)
+ if os.path.isdir(aPath) and recursive:
+ for p in genDitaPath(aPath, thePatS, recursive):
+ yield p
+ elif os.path.isfile(aPath):
+ for aPat in thePatS:
+ if fnmatch.fnmatch(aName, aPat):
+ #logging.info('genDitaPath(): %s' % aPath)
+ yield aPath
+ break
+
+def retMpDitaFileSetObj(theDir,
+ thePatterns,
+ recursive,
+ numJobs,
+ checkExt,
+ useDb):
+ assert(os.path.isdir(theDir))
+ assert(numJobs >= 0)
+ retObj = DitaFileSet(theDir, procDir=False, testExt=checkExt, useDbase=useDb)
+ myNumJobs = numJobs
+ if numJobs == 0:
+ myNumJobs = multiprocessing.cpu_count()
+ logging.info('Set multiprocessing number of jobs to %d' % myNumJobs)
+ myPool = multiprocessing.Pool(processes=myNumJobs)
+ for result in [
+ myPool.apply_async(retDitaFileObj, (f,))
+ for f in genDitaPath(theDir, thePatterns, recursive)
+ ]:
+ myObj = result.get()
+ logging.debug('Got %s' % myObj.identity)
+ retObj._addDitaFileObj(myObj)
+ # Note: finalise() is a serial process
+ logging.info('retMpDitaFileSetObj(): finalising')
+ retObj.finalise()
+ return retObj
+
+######################################
+# Test code
+######################################
+try:
+ import cStringIO as StringIO
+except ImportError:
+ import StringIO
+
+class NullClass(unittest.TestCase):
+ pass
+
+class TestCountDict(unittest.TestCase):
+ def setUp(self):
+ pass
+
+ def tearDown(self):
+ pass
+
+ def testSetUpTearDown(self):
+ """TestCountDict: test setUp() and tearDown()."""
+ pass
+
+ def test_basic(self):
+ """TestCountDict: test basic functionality."""
+ myMap = CountDict()
+ self.assertEqual(myMap.has_key('wtf'), False)
+ self.assertEqual(myMap['wtf'], 0)
+ self.assertEqual(myMap.has_key('wtf'), True)
+ myMap['wtf'] += 1
+ self.assertEqual(myMap['wtf'], 1)
+
+class TestDitaId(unittest.TestCase):
+ def setUp(self):
+ pass
+
+ def tearDown(self):
+ pass
+
+ def testSetUpTearDown(self):
+ """DitaId: test setUp() and tearDown()."""
+ pass
+
+ def test_basic(self):
+ """DitaId: basic read of an node with an id"""
+ myXml = """<cxxClass id="class_big_endian"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaId(myTree.getroot())
+ self.assertEqual(myObj.id, 'class_big_endian')
+ self.assertEqual(str(myObj), 'class_big_endian')
+ self.assertEqual(myObj.errStrings(True, None), [])
+ self.assertEqual(myObj.errStrings(False, None), [])
+
+ def test_guid_00(self):
+ """DitaId: basic read of an node with an GUID id"""
+ myXml = """<cxxClass id="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaId(myTree.getroot())
+ self.assertEqual(myObj.id, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
+ myObj.checkGuid()
+ self.assertEqual(myObj.errStrings(True, None), [])
+ self.assertEqual(myObj.errStrings(False, None), [])
+
+ def test_guid_01(self):
+ """DitaId: basic read of an node with an GUID id fails"""
+ myXml = """<cxxClass id="25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaId(myTree.getroot())
+ self.assertEqual(myObj.id, '25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
+ myObj.checkGuid()
+ self.assertEqual(
+ myObj.errStrings(False, None),
+ [
+ 'GUID specification does not match id="25825EC4-341F-3EA4-94AA-7DCE380E6D2E"'
+ ])
+ self.assertEqual(
+ myObj.errStrings(True, None),
+ [
+ 'GUID specification does not match id="%s"' % GENERIC_STRING,
+ ])
+
+ def test_cmp_eq_00(self):
+ """DitaId: cmp(), == of two identical nodes"""
+ myXml = """<cxxClass id="class_big_endian"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj_00 = DitaId(myTree.getroot())
+ myObj_01 = DitaId(myTree.getroot())
+ self.assertEqual(cmp(myObj_00, myObj_01), 0)
+ self.assertEqual((myObj_00 == myObj_01), True)
+
+ def test_cmp_eq_01(self):
+ """DitaId: cmp(), == of two identical nodes from different elements."""
+ myXml_00 = """<cxxClass id="big_endian"/>"""
+ myTree_00 = etree.parse(StringIO.StringIO(myXml_00))
+ myObj_00 = DitaId(myTree_00.getroot())
+ myXml_01 = """<cxxStruct id="big_endian"/>"""
+ myTree_01 = etree.parse(StringIO.StringIO(myXml_01))
+ myObj_01 = DitaId(myTree_01.getroot())
+ self.assertEqual(cmp(myObj_00, myObj_01), 0)
+ self.assertEqual((myObj_00 == myObj_01), True)
+
+ def test_set(self):
+ """DitaId: read of an node with an id several times into a set and check unique,"""
+ myXml = """<cxxClass id="class_big_endian"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ s = set()
+ i = 0
+ while i < 8:
+ s.add(DitaId(myTree.getroot()))
+ i += 1
+ self.assertEqual(len(s), 1)
+ self.assertEqual(DitaId(myTree.getroot()) in s, True)
+
+ def test_map(self):
+ """DitaId: read of an node with an id several times into a map and check unique,"""
+ myXml = """<cxxClass id="class_big_endian"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ m = {}
+ i = 0
+ while i < 8:
+ m[DitaId(myTree.getroot())] = 1
+ i += 1
+ self.assertEqual(len(m), 1)
+ self.assertEqual(m.has_key(DitaId(myTree.getroot())), True)
+
+ def test_error_hash(self):
+ """DitaId: error with a '#' in an id"""
+ myXml = """<cxxClass id="class_#big_endian"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaId(myTree.getroot())
+ self.assertEqual(myObj.id, 'class_#big_endian')
+ self.assertEqual(str(myObj), 'class_#big_endian')
+ self.assertEqual(
+ myObj.errStrings(True, None),
+ [
+ genericStringForErrorCode(100),
+ ]
+ )
+ self.assertEqual(
+ myObj.errStrings(False, None),
+ [
+ 'Character \'#\' not allowed in id="class_#big_endian"',
+ ]
+ )
+
+
+
+class TestDitaRef(unittest.TestCase):
+ def setUp(self):
+ pass
+
+ def tearDown(self):
+ pass
+
+ def testSetUpTearDown(self):
+ """DitaRef: test setUp() and tearDown()."""
+ pass
+
+ def test_basic(self):
+ """DitaRef: basic read of an xref node, no fragment"""
+ myXml = """<xref href="class_big_endian"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaRef(myTree.getroot())
+ self.assertEqual(myObj.href, 'class_big_endian')
+ self.assertEqual(myObj.path, 'class_big_endian')
+ self.assertEqual(myObj.elem, 'xref')
+ self.assertEqual(str(myObj), 'xref class_big_endian')
+ self.assertEqual(myObj.fragment, '')
+ self.assertEqual(myObj.scheme, '')
+ self.assertEqual(myObj.errStrings(False, None), [])
+ self.assertEqual(myObj.errStrings(True, None), [])
+
+ def test_basic_frag(self):
+ """DitaRef: basic read of an xref node, with fragment"""
+ myXml = """<xref href="class_big_endian.xml#function"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaRef(myTree.getroot())
+ self.assertEqual(myObj.href, 'class_big_endian.xml#function')
+ self.assertEqual(myObj.path, 'class_big_endian.xml')
+ self.assertEqual(myObj.fragment, 'function')
+ self.assertEqual(myObj.scheme, '')
+ self.assertEqual(myObj.errStrings(False, None), [])
+ self.assertEqual(myObj.errStrings(True, None), [])
+
+ def test_file_frag_00(self):
+ """DitaRef: accessing an xref node, with a file and a fragment"""
+ myXml = """<xref href="class_big_endian.xml#function"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaRef(myTree.getroot())
+ self.assertEqual(myObj.href, 'class_big_endian.xml#function')
+ self.assertEqual(myObj.path, 'class_big_endian.xml')
+ self.assertEqual(myObj.fragment, 'function')
+ self.assertEqual(myObj.scheme, '')
+ srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
+ expPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'class_big_endian.xml'))
+ self.assertEqual(
+ myObj.fileFragment(srcPath),
+ (expPath, 'function')
+ )
+ self.assertEqual(myObj.errStrings(False, None), [])
+ self.assertEqual(myObj.errStrings(True, None), [])
+
+ def test_file_frag_01(self):
+ """DitaRef: accessing an xref node, with a file and a fragment and relative path with '\\'."""
+ myXml = """<xref href="..\\chips\\class_big_endian.xml#function"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaRef(myTree.getroot())
+ srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
+ expPath = normalisePath(os.path.join('C:%s' % os.sep, 'chips', 'class_big_endian.xml'))
+ self.assertEqual(
+ myObj.fileFragment(srcPath),
+ (expPath, 'function')
+ )
+ self.assertEqual(myObj.errStrings(False, None), [])
+ self.assertEqual(myObj.errStrings(True, None), [])
+
+ def test_file_frag_02(self):
+ """DitaRef: accessing an xref node, with a file and a fragment and relative path with '/'."""
+ myXml = """<xref href="../chips/class_big_endian.xml#function"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaRef(myTree.getroot())
+ srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
+ expPath = normalisePath(os.path.join('C:%s' % os.sep, 'chips', 'class_big_endian.xml'))
+ self.assertEqual(
+ myObj.fileFragment(srcPath),
+ (expPath, 'function')
+ )
+ self.assertEqual(myObj.errStrings(False, None), [])
+ self.assertEqual(myObj.errStrings(True, None), [])
+
+ def test_file_frag_03(self):
+ """DitaRef: accessing an xref node, with a no file but with a fragment"""
+ myXml = """<xref href="#function"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaRef(myTree.getroot())
+ self.assertEqual(myObj.href, '#function')
+ self.assertEqual(myObj.path, '')
+ self.assertEqual(myObj.fragment, 'function')
+ self.assertEqual(myObj.scheme, '')
+ srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
+ expPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
+ self.assertEqual(
+ myObj.fileFragment(srcPath),
+ (expPath, 'function')
+ )
+ self.assertEqual(myObj.errStrings(False, None), [])
+ self.assertEqual(myObj.errStrings(True, None), [])
+
+ def test_basic_scheme(self):
+ """DitaRef: an xref node with a URI scheme"""
+ myXml = """<xref href="http://www.cwi.nl:80/%7Eguido/Python.html#fragment"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaRef(myTree.getroot())
+ self.assertEqual(myObj.href, 'http://www.cwi.nl:80/%7Eguido/Python.html#fragment')
+ self.assertEqual(myObj.path, '/%7Eguido/Python.html')
+ self.assertEqual(myObj.fragment, 'fragment')
+ self.assertEqual(myObj.scheme, 'http')
+ self.assertEqual(myObj.errStrings(False, None), [])
+ self.assertEqual(myObj.errStrings(True, None), [])
+
+ def test_basic_scheme_file_frag(self):
+ """DitaRef: an xref node with a URI scheme, invoking fileFragment()"""
+ myXml = """<xref href="http://www.cwi.nl:80/%7Eguido/Python.html#fragment"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaRef(myTree.getroot())
+ self.assertEqual(myObj.href, 'http://www.cwi.nl:80/%7Eguido/Python.html#fragment')
+ self.assertEqual(myObj.path, '/%7Eguido/Python.html')
+ self.assertEqual(myObj.fragment, 'fragment')
+ self.assertEqual(myObj.scheme, 'http')
+ srcPath = os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml')
+ self.assertEqual(
+ myObj.fileFragment(srcPath),
+ (None, None)
+ )
+ self.assertEqual(myObj.errStrings(False, None), [])
+ self.assertEqual(myObj.errStrings(True, None), [])
+
+ def test_fail_no_href(self):
+ """DitaRef: Fails on an xref node with no href attribute"""
+ myXml = """<xref />"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaRef(myTree.getroot())
+ self.assertEqual(
+ myObj.errStrings(False, None),
+ [
+ 'Reference element "xref" is missing href=... attribute',
+ ]
+ )
+ self.assertEqual(
+ myObj.errStrings(True, None),
+ [
+ 'Reference element "%s" is missing href=... attribute' % GENERIC_STRING,
+ ]
+ )
+
+ def test_fail_bad_frag(self):
+ """DitaRef: Fails on an xref node with href attribute that has multiple '#' characters"""
+ myXml = """<xref href="a#b#c" />"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaRef(myTree.getroot())
+ self.assertEqual(
+ myObj.errStrings(False, None),
+ [
+ 'Multiple \'#\' not allowed in reference "a#b#c"',
+ ]
+ )
+ self.assertEqual(
+ myObj.errStrings(True, None),
+ [
+ 'Multiple \'#\' not allowed in reference "%s"' % GENERIC_STRING,
+ ]
+ )
+
+ def test_guid_00(self):
+ """DitaRef: basic read of an node with an GUID file/fragment reference"""
+ myXml = """<xref href="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaRef(myTree.getroot())
+ self.assertEqual(myObj.href, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
+ self.assertEqual(myObj.path, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml')
+ self.assertEqual(myObj.elem, 'xref')
+ self.assertEqual(str(myObj), 'xref GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
+ self.assertEqual(myObj.fragment, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
+ self.assertEqual(myObj.scheme, '')
+ self.assertEqual(myObj.errStrings(False, None), [])
+ self.assertEqual(myObj.errStrings(True, None), [])
+
+ def test_guid_01(self):
+ """DitaRef: basic read of an node with an GUID file part fails"""
+ myXml = """<xref href="GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaRef(myTree.getroot())
+ self.assertEqual(myObj.href, 'GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
+ self.assertEqual(myObj.path, 'GUID-.xml')
+ self.assertEqual(myObj.elem, 'xref')
+ self.assertEqual(str(myObj), 'xref GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
+ self.assertEqual(myObj.fragment, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
+ self.assertEqual(myObj.errStrings(False, None), [])
+ self.assertEqual(myObj.errStrings(True, None), [])
+ myObj.checkGuid()
+ self.assertEqual(
+ myObj.errStrings(False, None),
+ [
+ 'GUID specification does not match file reference "GUID-.xml"'
+ ])
+ self.assertEqual(
+ myObj.errStrings(True, None),
+ [
+ genericStringForErrorCode(203),
+ ]
+ )
+
+ def test_guid_02(self):
+ """DitaRef: basic read of an node with an GUID fragment part fails"""
+ myXml = """<xref href="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4"/>"""
+ myTree = etree.parse(StringIO.StringIO(myXml))
+ myObj = DitaRef(myTree.getroot())
+ self.assertEqual(myObj.href, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4')
+ self.assertEqual(myObj.path, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml')
+ self.assertEqual(myObj.elem, 'xref')
+ self.assertEqual(str(myObj), 'xref GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4')
+ self.assertEqual(myObj.fragment, 'GUID-25825EC4')
+ self.assertEqual(myObj.errStrings(False, None), [])
+ self.assertEqual(myObj.errStrings(True, None), [])
+ myObj.checkGuid()
+ self.assertEqual(
+ myObj.errStrings(False, None),
+ [
+ 'GUID specification does not match fragment reference "GUID-25825EC4"'
+ ])
+ self.assertEqual(
+ myObj.errStrings(True, None),
+ [
+ genericStringForErrorCode(204),
+ ]
+ )
+
+class TestDitaFile(unittest.TestCase):
+ def setUp(self):
+ pass
+
+ def tearDown(self):
+ pass
+
+ def testSetUpTearDown(self):
+ """DitaFile: test setUp() and tearDown()."""
+ pass
+
+ def test_Basic(self):
+ """DitaFile: basic read of an XML file"""
+ myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
+<!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
+<cxxClass id="class_big_endian">
+ <apiName>BigEndian</apiName>
+ <shortdesc/>
+ <cxxClassDetail>
+ <cxxClassDefinition>
+ <cxxClassAccessSpecifier value="public"/>
+ <cxxClassAPIItemLocation>
+ <cxxClassDeclarationFile name="filePath" value="K:/sf/os/commsfw/datacommsserver/esockserver/inc/es_sock.h"/>
+ <cxxClassDeclarationFileLine name="lineNumber" value="1520"/>
+ <cxxClassDefinitionFile name="filePath" value="K:/sf/os/commsfw/datacommsserver/esockserver/inc/es_sock.h"/>
+ <cxxClassDefinitionFileLineStart name="lineNumber" value="1516"/>
+ <cxxClassDefinitionFileLineEnd name="lineNumber" value="1526"/>
+ </cxxClassAPIItemLocation>
+ </cxxClassDefinition>
+ <apiDesc>
+ <p>Inserts and extracts integers in big-endian format. </p>
+ </apiDesc>
+ </cxxClassDetail>
+ <cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f">
+ </cxxFunction>
+ <cxxFunction id="class_big_endian_1aedf702f5c0118e4294d1a6d9684f8441">
+ </cxxFunction>
+ <cxxFunction id="class_big_endian_1ae266722f7bb965c971155a3315bad484">
+ </cxxFunction>
+ <cxxFunction id="class_big_endian_1a497d5248ea259f8490fb40ac4f2aafb2">
+ </cxxFunction>
+</cxxClass>"""
+ myFile = StringIO.StringIO(myXml)
+ myObj = DitaFileObj(myFile, 'foo')
+ self.assertEqual(myObj.identity, normalisePath('foo'))
+ self.assertEqual(myObj.doctype, 'cxxClass')
+ self.assertEqual(myObj.rootId, 'class_big_endian')
+ #print myObj.idMap()
+ self.assertEqual(
+ myObj.idElemMap(),
+ {
+ 'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f' : 'cxxFunction',
+ 'class_big_endian_1aedf702f5c0118e4294d1a6d9684f8441' : 'cxxFunction',
+ 'class_big_endian' : 'cxxClass',
+ 'class_big_endian_1a497d5248ea259f8490fb40ac4f2aafb2' : 'cxxFunction',
+ 'class_big_endian_1ae266722f7bb965c971155a3315bad484' : 'cxxFunction',
+ }
+ )
+ self.assertEqual(myObj.errStrings(False, None), [])
+ self.assertEqual(myObj.errStrings(True, None), [])
+
+ def test_missing_file(self):
+ """DitaFile: read an missing XML file"""
+ myObj = DitaFileObj(None, 'foo')
+ self.assertEqual(
+ myObj.errStrings(False, None),
+ [
+ 'Failed to open: "%s"' % normalisePath('foo'),
+ ]
+ )
+ self.assertEqual(
+ myObj.errStrings(True, None),
+ [
+ genericStringForErrorCode(400),
+ ]
+ )
+
+ def test_IllFormedFile(self):
+ """DitaFile: read an ill-formed XML file"""
+ myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
+<!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
+<cxxClass id="class_big_endian">
+"""
+ myFile = StringIO.StringIO(myXml)
+ myObj = DitaFileObj(myFile, 'foo')
+ self.assertEqual(myObj.identity, normalisePath('foo'))
+ self.assertEqual(myObj.doctype, None)
+ self.assertEqual(myObj.rootId, None)
+ #print myObj.idMap()
+ self.assertEqual(myObj.idElemMap(), {})
+ self.assertEqual(
+ myObj.errStrings(False, None),
+ [
+ 'Can not parse: "no element found: line 4, column 0"',
+ ]
+ )
+ self.assertEqual(
+ myObj.errStrings(True, None),
+ [
+ genericStringForErrorCode(404),
+ ]
+ )
+
+ def test_missing_root_id(self):
+ """DitaFile: read of an XML file with no id on root element"""
+ myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
+<!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
+<cxxClass>
+ <xref href="OtherClass">OtherClass</xref>
+ <cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f"/>
+</cxxClass>"""
+ myFile = StringIO.StringIO(myXml)
+ myObj = DitaFileObj(myFile, 'foo')
+ self.assertEqual(myObj.identity, normalisePath('foo'))
+ self.assertEqual(myObj.doctype, 'cxxClass')
+ self.assertEqual(myObj.rootId, None)
+ self.assertEqual(
+ myObj.idElemMap(),
+ {
+ 'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f' : 'cxxFunction',
+ }
+ )
+ self.assertEqual(myObj.errStrings(False, None), [genericStringForErrorCode(402)])
+ self.assertEqual(myObj.errStrings(True, None), [genericStringForErrorCode(402)])
+
+ def test_duplicate_id(self):
+ """DitaFile: duplicate IDs"""
+ myXml = """<root id="AnID">
+<elem id="AnID"/>
+</root>"""
+ myFile = StringIO.StringIO(myXml)
+ myObj = DitaFileObj(myFile, 'spam.xml')
+ self.assertEqual(myObj.identity, normalisePath('spam.xml'))
+ self.assertEqual(myObj.doctype, 'root')
+ self.assertEqual(myObj.rootId, 'AnID')
+ self.assertEqual(myObj.idElemMap(), {})
+ self.assertEqual(
+ myObj.errStrings(False, None),
+ [
+ 'Multiple id="AnID"',
+ ]
+ )
+ self.assertEqual(myObj.errStrings(True, None), [genericStringForErrorCode(401)])
+
+ def test_ismap_00(self):
+ """DitaFile: Is a map for <map>."""
+ myXml = """<map id="myMap"/>"""
+ myFile = StringIO.StringIO(myXml)
+ myObj = DitaFileObj(myFile, 'spam.xml')
+ self.assertEqual(myObj.isMap, True)
+
+ def test_ismap_01(self):
+ """DitaFile: Is a map for <cxxAPIMap>."""
+ myXml = """<cxxAPIMap id="myMap"/>"""
+ myFile = StringIO.StringIO(myXml)
+ myObj = DitaFileObj(myFile, 'spam.xml')
+ self.assertEqual(myObj.isMap, True)
+
+ def test_Basic_01(self):
+ """DitaFile: read of an simple XML file with id and xref"""
+ myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
+<!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
+<cxxClass id="class_big_endian">
+ <xref href="OtherClass">OtherClass</xref>
+ <cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f"/>
+</cxxClass>"""
+ myFile = StringIO.StringIO(myXml)
+ myObj = DitaFileObj(myFile, 'foo')
+ self.assertEqual(myObj.identity, normalisePath('foo'))
+ self.assertEqual(myObj.doctype, 'cxxClass')
+ self.assertEqual(myObj.rootId, 'class_big_endian')
+ self.assertEqual(myObj.isMap, False)
+ self.assertEqual(len(myObj.idS), 2)
+ self.assertEqual(len(myObj.refS), 1)
+ self.assertEqual(myObj.hasId('class_big_endian'), True)
+ self.assertEqual(myObj.hasId('class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'), True)
+ self.assertEqual(myObj.hasId('noID'), False)
+ self.assertEqual(myObj.idElem('class_big_endian'), 'cxxClass')
+ self.assertEqual(myObj.idElem('noID'), None)
+ self.assertEqual(
+ myObj.idElem('class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'),
+ 'cxxFunction'
+ )
+ #print myObj.idMap()
+ self.assertEqual(
+ myObj.idElemMap(),
+ {
+ 'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f' : 'cxxFunction',
+ 'class_big_endian' : 'cxxClass',
+ }
+ )
+ self.assertEqual(myObj.errStrings(False, None), [])
+ self.assertEqual(myObj.errStrings(True, None), [])
+
+class TestDitaFileSet(unittest.TestCase):
+ def setUp(self):
+ pass
+
+ def tearDown(self):
+ pass
+
+ def testSetUpTearDown(self):
+ """DitaFileSet: test setUp() and tearDown()."""
+ pass
+
+ def test_None(self):
+ """DitaFileSet: read of None."""
+ myO = DitaFileSet(None)
+ myO.finalise()
+ self.assertEqual(myO.errStrings(False, None), ['Not a directory: None'])
+ self.assertEqual(myO.errStrings(True, None), ['Not a directory: %s' % GENERIC_STRING, ])
+ self.assertEqual(myO.errCountMap, {500 : 1})
+
+ def test_basic(self):
+ """DitaFileSet: Test reading a map and a couple of files."""
+ myO = DitaFileSet(None, procDir=False)
+ myO._addFileObj(
+ StringIO.StringIO(
+"""<map id="map_00">
+ <topicref href="spam.dita" />
+ <topicref href="eggs.dita" />
+</map>"""
+ ),
+ 'map.ditamap'
+ )
+ myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita')
+ myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'eggs.dita')
+ myO.finalise()
+ #print 'HI'
+ #myO.writeErrors(False)
+ self.assertEqual(myO.allErrStrings(False, None), [])
+ self.assertEqual(myO.allErrStrings(True, None), [])
+ self.assertEqual(myO.errCountMap, {})
+
+ def test_duplicate_paths(self):
+ """DitaFileSet: Test reading a couple of files in duplicate paths."""
+ myO = DitaFileSet(None, procDir=False)
+ myO._addFileObj(
+ StringIO.StringIO(
+"""<map id="map_00">
+ <topicref href="spam.dita" />
+</map>"""
+ ),
+ 'map.ditamap'
+ )
+ myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita')
+ myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'spam.dita')
+ myO.finalise()
+ self.assertEqual(
+ myO.errStrings(False, None),
+ [
+ 'Duplicate file path: "%s"' % normalisePath('spam.dita'),
+ ]
+ )
+ self.assertEqual(myO.errStrings(True, None), [genericStringForErrorCode(504),])
+ self.assertEqual(myO.errCountMap, {504 : 1})
+
+ def test_duplicate_ids(self):
+ """DitaFileSet: Test reading a map and a couple of files with duplicate IDs."""
+ myO = DitaFileSet(None, procDir=False)
+ myO._addFileObj(
+ StringIO.StringIO(
+"""<map id="map_00">
+ <topicref href="spam.dita" />
+ <topicref href="eggs.dita" />
+ <topicref href="chips.dita" />
+</map>"""
+ ),
+ 'map.ditamap'
+ )
+ myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'spam.dita')
+ myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'eggs.dita')
+ myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'chips.dita')
+ myO.finalise()
+ #print 'HI'
+ #myO.writeErrors(False)
+ #pprint.pprint(myO.errStrings(False, None))
+ self.assertEqual(
+ myO.errStrings(True, None),
+ [
+ genericStringForErrorCode(505),
+ genericStringForErrorCode(501),
+ ]
+ )
+ expErrs = [
+ """Duplicate id="chips" in files: ('%s', '%s', '%s')""" \
+ % (normalisePath('chips.dita'), normalisePath('eggs.dita'), normalisePath('spam.dita')),
+ """Duplicate root id="chips" in files: ('%s', '%s', '%s')""" \
+ % (normalisePath('chips.dita'), normalisePath('eggs.dita'), normalisePath('spam.dita')),
+ ]
+ myErrs = myO.errStrings(False, None)
+#===============================================================================
+# for i in range(2):
+# if myErrs[i] != expErrs[i]:
+# print myErrs[i]
+# print expErrs[i]
+# print
+#===============================================================================
+ self.assertEqual(myErrs, expErrs)
+ self.assertEqual(myO.errCountMap, {505: 1, 501: 1})
+
+ def test_lonely_topics(self):
+ """DitaFileSet: Test a couple of lonely topics."""
+ myO = DitaFileSet(None, procDir=False)
+ myO._addFileObj(StringIO.StringIO('<spam id="spam"/>'), 'spam')
+ myO._addFileObj(StringIO.StringIO('<eggs id="eggs"/>'), 'eggs')
+ myO.finalise()
+ self.assertEqual(
+ myO.errStrings(False, None),
+ [
+ 'Topic id="%s" is not referenced by any map' % normalisePath('eggs'),
+ 'Topic id="%s" is not referenced by any map' % normalisePath('spam'),
+ ]
+ )
+ self.assertEqual(
+ myO.errStrings(True, None),
+ [
+ genericStringForErrorCode(600),
+ ]
+ )
+
+ def test_map_cycles_00(self):
+ """DitaFileSet: Cyclic references between two maps."""
+ myO = DitaFileSet(None, procDir=False)
+ myO._addFileObj(
+ StringIO.StringIO(
+"""<map id="map_00">
+ <topicref href="map_01.ditamap" format="ditamap" />
+</map>"""
+ ),
+ 'map_00.ditamap'
+ )
+ myO._addFileObj(
+ StringIO.StringIO(
+"""<map id="map_01">
+ <topicref href="map_00.ditamap" format="ditamap" />
+</map>"""
+ ),
+ 'map_01.ditamap'
+ )
+ myO.finalise()
+ #print 'HI test_map_cycles_00()'
+ #pprint.pprint(myO._retMapAdjList())
+ self.assertEqual(
+ myO.errStrings(False, None),
+ [
+ 'Maps "%s" are in a a cycle.' % str(
+ (
+ normalisePath('map_00.ditamap'),
+ normalisePath('map_01.ditamap'),
+ )
+ ),
+ 'Maps "%s" are in a a cycle.' % str(
+ (
+ normalisePath('map_01.ditamap'),
+ normalisePath('map_00.ditamap'),
+ )
+ ),
+ ]
+ )
+ #print
+ #pprint.pprint(myO.allErrStrings(False, None))
+ self.assertEqual(myO.allErrStrings(True, None), [genericStringForErrorCode(701)])
+ self.assertEqual(myO.errCountMap, {701 : 4})
+
+ def test_map_cycles_01(self):
+ """DitaFileSet: Cyclic references between three maps."""
+ myO = DitaFileSet(None, procDir=False)
+ myO._addFileObj(
+ StringIO.StringIO(
+"""<map id="map_00">
+ <topicref href="map_01.ditamap" format="ditamap" />
+</map>"""
+ ),
+ 'map_00.ditamap'
+ )
+ myO._addFileObj(
+ StringIO.StringIO(
+"""<map id="map_01">
+ <topicref href="map_02.ditamap" format="ditamap" />
+</map>"""
+ ),
+ 'map_01.ditamap'
+ )
+ myO._addFileObj(
+ StringIO.StringIO(
+"""<map id="map_02">
+ <topicref href="map_00.ditamap" format="ditamap" />
+</map>"""
+ ),
+ 'map_02.ditamap'
+ )
+ myO.finalise()
+ #print 'HI test_map_cycles_00()'
+ #pprint.pprint(myO._retMapAdjList())
+ self.assertEqual(
+ myO.errStrings(False, None),
+ [
+ 'Maps "%s" are in a a cycle.' % str(
+ (
+ normalisePath('map_00.ditamap'),
+ normalisePath('map_01.ditamap'),
+ normalisePath('map_02.ditamap'),
+ )
+ ),
+ 'Maps "%s" are in a a cycle.' % str(
+ (
+ normalisePath('map_01.ditamap'),
+ normalisePath('map_02.ditamap'),
+ normalisePath('map_00.ditamap'),
+ )
+ ),
+ 'Maps "%s" are in a a cycle.' % str(
+ (
+ normalisePath('map_02.ditamap'),
+ normalisePath('map_00.ditamap'),
+ normalisePath('map_01.ditamap'),
+ )
+ ),
+ ]
+ )
+ self.assertEqual(myO.errStrings(True, None), [genericStringForErrorCode(701)])
+ self.assertEqual(myO.errCountMap, {701 : 6})
+
+ def test_refarc_00(self):
+ """DitaFileSet: Test ref arcing - all resolve."""
+ myO = DitaFileSet(None, procDir=False)
+ myO._addFileObj(
+ StringIO.StringIO(
+"""<map id="map_00">
+ <topicref href="spam.dita#spam" />
+ <topicref href="eggs.dita#eggs" />
+</map>"""
+ ),
+ 'map.ditamap'
+ )
+ myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita')
+ myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'eggs.dita')
+ myO.finalise()
+ self.assertEqual(myO.errCountMap, {})
+ self.assertEqual(myO.allErrStrings(False, None), [])
+ self.assertEqual(myO.allErrStrings(True, None), [])
+ self.assertEqual(myO.errStrings(False, None), [])
+ self.assertEqual(myO.errStrings(True, None), [])
+
+ def test_refarc_fail_00(self):
+ """DitaFileSet: Test ref arcing - can't find file."""
+ myO = DitaFileSet(None, procDir=False)
+ myO._addFileObj(
+ StringIO.StringIO(
+"""<map id="map_00">
+ <topicref href="spam_.dita" />
+ <topicref href="eggs_for_tea.dita" />
+</map>"""
+ ),
+ 'map.ditamap'
+ )
+ myO.finalise()
+ self.assertEqual(myO.errCountMap, {410: 2})
+ #print 'HI'
+ #pprint.pprint(myO.allErrStrings(False, None))
+ self.assertEqual(
+ myO.allErrStrings(False, None),
+ [
+ 'Can not resolve reference to file "%s"' % normalisePath('eggs_for_tea.dita'),
+ 'Can not resolve reference to file "%s"' % normalisePath('spam_.dita'),
+ ]
+ )
+ self.assertEqual(
+ myO.allErrStrings(True, None),
+ [
+ 'Can not resolve reference to file "..."',
+ ]
+ )
+ self.assertEqual(myO.errStrings(False, None), [])
+ self.assertEqual(myO.errStrings(True, None), [])
+
+ def test_refarc_fail_01(self):
+ """DitaFileSet: Test ref arcing - can't find fragment."""
+ myO = DitaFileSet(None, procDir=False)
+ myO._addFileObj(
+ StringIO.StringIO(
+"""<map id="map_00">
+ <topicref href="spam.dita#spam_" />
+ <topicref href="eggs.dita#eggs_" />
+</map>"""
+ ),
+ 'map.ditamap'
+ )
+ myO._addFileObj(StringIO.StringIO('<spam id="spam"/>'), 'spam.dita')
+ myO._addFileObj(StringIO.StringIO('<eggs id="eggs"/>'), 'eggs.dita')
+ myO.finalise()
+ self.assertEqual(myO.errCountMap, {411: 2})
+ #print 'HI'
+ #pprint.pprint(myO.allErrStrings(False, None))
+ self.assertEqual(
+ myO.allErrStrings(False, None),
+ [
+ 'Can resolve reference to file "%s" but not to fragment "eggs_"' % normalisePath('eggs.dita'),
+ 'Can resolve reference to file "%s" but not to fragment "spam_"' % normalisePath('spam.dita'),
+ ]
+ )
+ self.assertEqual(
+ myO.allErrStrings(True, None),
+ [
+ 'Can resolve reference to file "%s" but not to fragment "%s"' % (GENERIC_STRING, GENERIC_STRING),
+ ]
+ )
+ self.assertEqual(myO.errStrings(False, None), [])
+ self.assertEqual(myO.errStrings(True, None), [])
+
+ def test_refarc_url_00(self):
+ """DitaFileSet: Test ref arcing - URL."""
+ myO = DitaFileSet(None, procDir=False, testExt=True)
+ myO._addFileObj(
+ StringIO.StringIO(
+"""<map id="map_00">
+ <topicref href="spam.dita#spam" />
+ <topicref href="eggs.dita#eggs" />
+</map>"""
+ ),
+ 'map.ditamap'
+ )
+ myO._addFileObj(StringIO.StringIO("""<topic id="spam">
+ <xref href="http://www.nokia.com">Nokia</xref>
+</topic>"""), 'spam.dita')
+ myO._addFileObj(StringIO.StringIO("""<topic id="eggs">
+ <xref href="http://www.google.com">Google</xref>
+</topic>"""), 'eggs.dita')
+ myO.finalise()
+ #print 'HI'
+ #pprint.pprint(myO.allErrStrings(False, None))
+ self.assertEqual(myO.errCountMap, {})
+ self.assertEqual(
+ myO.allErrStrings(False, None),
+ [
+ ]
+ )
+ self.assertEqual(
+ myO.allErrStrings(True, None),
+ [
+ ]
+ )
+ self.assertEqual(myO.errStrings(False, None), [])
+ self.assertEqual(myO.errStrings(True, None), [])
+
+class TestDitaBookmapFileSet(unittest.TestCase):
+ def setUp(self):
+ pass
+
+ def tearDown(self):
+ pass
+
+ def testSetUpTearDown(self):
+ """TestDitaBookmapFileSet: test setUp() and tearDown()."""
+ pass
+
+ def test_basic(self):
+ """TestDitaBookmapFileSet: Test reading a bookmap and a topic."""
+ myO = DitaFileSet(None, procDir=False)
+ myO._addFileObj(
+ StringIO.StringIO(
+"""<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE bookmap PUBLIC "-//OASIS//DTD DITA BookMap//EN"
+"bookmap.dtd">
+<bookmap id="GUID-5BDFDB6B-7801-4804-9F41-2BDC5BE53DDF">
+ <booktitle>
+ <mainbooktitle>My Bookmap</mainbooktitle>
+ <booktitlealt>Alternate title</booktitlealt>
+ </booktitle>
+ <frontmatter id="GUID-DA857913-F826-4CF7-A135-93F2AEB48353">
+ <topicref href="GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB.dita" id="GUID-994B1764-393F-401F-8571-CE0955AB6CA6" />
+ </frontmatter>
+</bookmap>
+"""
+ ),
+ 'bookmap.ditamap'
+ )
+ myO._addFileObj(StringIO.StringIO("""<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB" xml:lang="en">
+ <title>How to read and write a file</title>
+</concept>
+"""), 'GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB.dita')
+ myO.finalise()
+ #print
+ #myO.debugDump()
+ #print 'HI'
+ #myO.writeErrors(False)
+ self.assertEqual(myO.allErrStrings(False, None), [])
+ self.assertEqual(myO.allErrStrings(True, None), [])
+ self.assertEqual(myO.errCountMap, {})
+
+class Special(unittest.TestCase):
+ pass
+
+def unitTest(theVerbosity=2):
+ suite = unittest.TestLoader().loadTestsFromTestCase(NullClass)
+ suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCountDict))
+ suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaId))
+ suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaRef))
+ suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaFile))
+ suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaFileSet))
+ suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaBookmapFileSet))
+ suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Special))
+ myResult = unittest.TextTestRunner(verbosity=theVerbosity).run(suite)
+ return (myResult.testsRun, len(myResult.errors), len(myResult.failures))
+
+######################################
+# main() stuff
+######################################
+def main():
+ print 'CMD: %s' % ' '.join(sys.argv)
+ usage = "usage: %prog [options] <Directory of XML content>"
+ parser = OptionParser(usage, version='%prog ' + __version__)
+ parser.add_option("-d", action="store_true", dest="dump", default=False,
+ help="Dump internal representation. [default: %default]")
+ parser.add_option(
+ "-e", "--errors",
+ type="str",
+ dest="error_codes",
+ default='All',
+ help="Only report on certain error codes (space seperated list). [default: \"%default\"]"
+ )
+ parser.add_option("-f", "--file", dest="file", type="str", default='None',
+ help="Report of errors by file either 'None', 'generic', 'specific'. [default: %default]")
+ parser.add_option("-g", action="store_true", dest="guid", default=False,
+ help="Enforce GUID specification. [default: %default]")
+ parser.add_option(
+ "-j", "--jobs",
+ type="int",
+ dest="jobs",
+ default=-1,
+ help="Max processes when multiprocessing. 0 takes CPUs, -1 no MP. [default: %default]"
+ )
+ parser.add_option(
+ "-l", "--loglevel",
+ type="int",
+ dest="loglevel",
+ default=20,
+ help="Log Level (debug=10, info=20, warning=30, [error=40], critical=50) [default: %default]"
+ )
+ parser.add_option(
+ "-p", "--pattern",
+ type="str",
+ dest="pattern",
+ default=FNMATCH_STRING,
+ help="Pattern match. [default: \"%default\"]"
+ )
+ parser.add_option("-r", action="store_true", dest="recursive", default=False,
+ help="Recursive. [default: %default]")
+ parser.add_option("-s", action="store_true", dest="shelve", default=False,
+ help="Use the shelve dBase rather than storing the internal representation in memory. This is slower but is useful for large data sets where a memory error might occur. [default: %default]")
+ parser.add_option("-u", action="store_true", dest="unit_test", default=False,
+ help="Execute unit tests and exit. [default: %default]")
+ parser.add_option("-x", action="store_true", dest="ext_url", default=False,
+ help="Test external |URLs. [default: %default]")
+ parser.add_option("-?", action="store_true", dest="query_errors", default=False,
+ help="Display the error types that are detected. [default: %default]")
+ (options, args) = parser.parse_args()
+ logging.basicConfig(
+ level=options.loglevel,
+ format='%(asctime)s %(levelname)-8s %(message)s',
+ stream=sys.stdout,
+ )
+ if options.file not in ('None', 'generic', 'specific'):
+ parser.error("--file option must be: 'None' | 'generic' | 'specific'")
+ return 1
+ if options.unit_test:
+ unitTest()
+ if options.query_errors:
+ writeGenericStringsForErrorCodes()
+ if len(args) < 1 and not options.unit_test:
+ parser.print_help()
+ parser.error("I can't do much without a path to the XML content.")
+ return 1
+ elif len(args) == 1:
+ if options.jobs > -1:
+ myObj = retMpDitaFileSetObj(
+ args[0],
+ options.pattern.split(' '),
+ options.recursive,
+ options.jobs,
+ options.ext_url,
+ options.shelve,
+ )
+ else:
+ myObj = DitaFileSet(args[0],
+ procDir=True,
+ thePatterns=options.pattern.split(' '),
+ recursive=options.recursive,
+ testExt=options.ext_url,
+ useDbase=options.shelve,
+ )
+ #print 'MyObj:', myObj
+ if options.dump:
+ myObj.debugDump()
+ myObj.writeStatistics()
+ myObj.writeErrorSummary()
+ #pprint.pprint(myObj.statsMap)
+ # TODO: Write out the results in different ways
+ errFilter = set(PROBLEM_CODE_FORMAT.keys())
+ if options.error_codes != 'All':
+ errFilter = set([int(i) for i in options.error_codes.split()])
+ if options.file == 'generic':
+ print 'Generic problems:'
+ myObj.writeErrors(True, errFilter)
+ elif options.file == 'specific':
+ print 'Specific problems:'
+ myObj.writeErrors(False, errFilter)
+ elif len(args) > 1:
+ parser.error("Too many arguments, I need only one.")
+ return 1
+ return 0
+
+if __name__ == '__main__':
+ multiprocessing.freeze_support()
+ sys.exit(main())