diff -r 82f11024044a -r 932c358ece3e mpdot/linkcheck.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mpdot/linkcheck.py Fri Apr 23 20:45:58 2010 +0100 @@ -0,0 +1,2336 @@ +# Copyright (c) 2007-2010 Nokia Corporation and/or its subsidiary(-ies) All rights reserved. +# This component and the accompanying materials are made available under the terms of the License +# "Eclipse Public License v1.0" which accompanies this distribution, +# and is available at the URL "http://www.eclipse.org/legal/epl-v10.html". +# +# Initial Contributors: +# Nokia Corporation - initial contribution. +# +# Contributors: +# +# Description: +# Checks links in DITA XML and reports issues. +""" +Created on 12 Feb 2010 + +@author: p2ross + +Definitions +=========== +Doctype +------- +See: http://www.w3.org/TR/2008/REC-xml-20081126/#dt-root +Note: this is sometimes called the Doctype because of http://www.w3.org/TR/2008/REC-xml-20081126/#vc-roottype + +ID +-- +The value of the 'id' attribute of an element. + +Root ID +------- +The value of the 'id' attribute of the root element. +Note: A development would allow differently named attributes provided that they +were ID types. See http://www.w3.org/TR/2008/REC-xml-20081126/#sec-attribute-types +for validity constraints for ID types. + +Reference +--------- +The value of the href attribute of an element. + +Map +--- +An XML file whose root element name is 'map' or ends with 'Map'. + +Topic +----- +An XML file that is not a Map. + +Lonely topic +------------ +A topic whose root ID is not referenced by any map. + +Lonely map +---------- +A map whose root ID is not referenced by any map. + +Map Cycle +--------- +A sequence of map references whose members are not unique. + +""" + +import os +import unittest +import sys +import logging +import pprint +import fnmatch +import re +import urllib +import time +from optparse import OptionParser, check_choice +try: + from xml.etree import cElementTree as etree +except ImportError: + from xml.etree import ElementTree as etree +import urlparse +import multiprocessing +# used for DitaFileObj persistence +import shelve + +__version__ = '0.1.5' + +class ExceptionLinkCheck(Exception): + pass + +class CountDict(dict): + """Dictionary with a default value of 0 for unknown keys.""" + def __getitem__(self, key): + if key not in self: + self[key] = 0 + return self.get(key) + +# Matches stuff like: GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E +RE_GUID = re.compile(r'GUID-[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}', re.IGNORECASE) + +# Of the form {integer_error_code : (format_string, num_args), ...} +PROBLEM_CODE_FORMAT = { + # 'id_syntax' + 100 : ('Character \'#\' not allowed in id="%s"', 1), + 101 : ('NMTOKEN character \'%s\' not allowed in id="%s"', 2), + 102 : ('GUID specification does not match id="%s"', 1), + # 'ref_syntax' + 200 : ('Multiple \'#\' not allowed in reference "%s"', 1), + 201 : ('Reference element "%s" is missing href=... attribute', 1), + 202 : ('URL has missing type/format in reference "%s"', 1), + 203 : ('GUID specification does not match file reference "%s"', 1), + 204 : ('GUID specification does not match fragment reference "%s"', 1), + # 'ref' + 300 : ('Can not resolve URI "%s"', 1), + # 'file' + 400 : ('Failed to open: "%s"', 1), + 401 : ('Multiple id="%s"', 1), + 402 : ('No id attribute on root element', 0), + 403 : ('Root ID in cycle: %s', 1), + 404 : ('Can not parse: "%s"', 1), + 410 : ('Can not resolve reference to file "%s"', 1), + 411 : ('Can resolve reference to file "%s" but not to fragment "%s"', 2), + 412 : ('Referencing element "%s" does not match target root element "%s"', 2), + 413 : ('Referencing element "%s" does not match target element "%s" for id="%s"', 3), + 414 : ('topicref element with format="ditamap" does not match target root element "%s"', 1), + 415 : ('topicref to does not have format="ditamap" but format="%s"', 1), + 416 : ('topicref element type="%s" does not match target root element "%s"', 2), + 417 : ('topicref element type="%s" does not match target element "%s" for id="%s"', 3), + 418 : ('Unknown referencing element "%s" does not match target root element "%s"', 2), + 419 : ('Unknown referencing element "%s" does not match target element "%s" for id="%s"', 3), + # 'file_set' + 500 : ('Not a directory: %s', 1), + 501 : ('Duplicate root id="%s" in files: %s', 2), + #502 : ('Can not resolve reference to "%s"', 1), + #503 : ('Reference type "%s" does not match target type "%s" for id="%s"', 3), + 504 : ('Duplicate file path: "%s"', 1), + 505 : ('Duplicate id="%s" in files: %s', 2), + # 'topic_set' + 600 : ('Topic id="%s" is not referenced by any map', 1), + # 'map_set' + 700 : ('More than one top level map exists: %s', 1), + 701 : ('Maps "%s" are in a a cycle.', 1), +} + +GENERIC_STRING = '...' +PRINT_WIDTH = 75 + +def genericStringForErrorCode(ec): + assert(PROBLEM_CODE_FORMAT.has_key(ec)) + f, c = PROBLEM_CODE_FORMAT[ec] + if c == 0: + return f + return f % ((GENERIC_STRING,) * c) + +def writeGenericStringsForErrorCodes(s=sys.stdout): + s.write(' All Error Codes '.center(PRINT_WIDTH, '=')) + s.write('\n') + s.write('%4s %s\n' % ('Code', 'Error')) + s.write('%4s %s\n' % ('----', '-----')) + ecS = PROBLEM_CODE_FORMAT.keys() + ecS.sort() + for ec in ecS: + s.write('%4d %s\n' % (ec, genericStringForErrorCode(ec))) + s.write('='*PRINT_WIDTH) + s.write('\n\n') + +def normalisePath(thePath): + # TODO: How come this does not work? + #return os.path.abspath(thePath) + return os.path.abspath(thePath).replace('\\', '/') + +FNMATCH_PATTERNS = ['*.xml', '*.dita', '*.ditamap'] +FNMATCH_STRING = ' '.join(FNMATCH_PATTERNS) + +# These elements descend from topic/xref so can be treated as referencing elements +XREF_DESCENDENTS = set( + ( + # From the api specialisation + 'apiRelation', + 'apiBaseClassifier', + 'apiOtherClassifier', + 'apiOperationClassifier', + 'apiValueClassifier', + # From the C++ specialisation + 'cxxfile', + 'cxxclass', + 'cxxstruct', + 'cxxunion', + 'cxxfunction', + 'cxxdefine', + 'cxxtypedef', + 'cxxvariable', + 'cxxenumeration', + 'cxxClassBaseClass', + 'cxxClassBaseStruct', + 'cxxClassBaseUnion', + 'cxxClassNestedClass', + 'cxxClassNestedStruct', + 'cxxClassNestedUnion', + 'cxxClassEnumerationInherited', + 'cxxClassEnumeratorInherited', + 'cxxClassFunctionInherited', + 'cxxClassVariableInherited', + 'cxxDefineReimplemented', + 'cxxEnumerationReimplemented', + 'cxxFunctionReimplemented', + 'cxxStructBaseClass', + 'cxxStructBaseStruct', + 'cxxStructBaseUnion', + 'cxxStructNestedClass', + 'cxxStructNestedStruct', + 'cxxStructNestedUnion', + 'cxxStructEnumerationInherited', + 'cxxStructEnumeratorInherited', + 'cxxStructFunctionInherited', + 'cxxStructVariableInherited', + 'cxxTypedefReimplemented', + 'cxxUnionBaseClass', + 'cxxUnionBaseStruct', + 'cxxUnionBaseUnion', + 'cxxUnionNestedClass', + 'cxxUnionNestedStruct', + 'cxxUnionNestedUnion', + 'cxxUnionEnumerationInherited', + 'cxxUnionFunctionInherited', + 'cxxUnionVariableInherited', + 'cxxVariableReimplemented', + ) +) + +class UrlAccessCache(object): + def __init__(self): + # {URL : True/False, ...} + self._cache = {} + + def clear(self): + self._cache = {} + + def canAccess(self, theUrl): + if not self._cache.has_key(theUrl): + try: + u = urllib.urlopen(theUrl)#, data, proxies) + u.read() + self._cache[theUrl] = True + logging.debug('URL: %s for %s' % (True, theUrl)) + except IOError: + self._cache[theUrl] = False + logging.debug('URL: %s for %s' % (False, theUrl)) + return self._cache[theUrl] + +GlobalUrlCache = UrlAccessCache() + +class DitaLinkCheckBase(object): + """Base class that holds some common functionality.""" + def __init__(self, theIdentity):#=None): + self.__identity = theIdentity + # Set of error strings, lazily evaluated + self._errS = None + + @property + def identity(self): + return self.__identity + + def __cmp__(self, other): + assert(self.identity is not None) + assert(other.identity is not None) + return cmp(self.identity, other.identity) + + def __eq__(self, other): + assert(self.identity is not None) + assert(other.identity is not None) + return self.identity == other.identity + + def __hash__(self): + assert(self.identity is not None) + return hash(self.identity) + + def __str__(self): + return str(self.__identity) + + def debugDump(self, s=sys.stdout, prefix=''): + """Dump of IR for debug purposes.""" + raise NotImplementedError + + def addError(self, errCode, argTuple): + assert(errCode in PROBLEM_CODE_FORMAT.keys()), 'No error code: %s' % errCode + assert(PROBLEM_CODE_FORMAT[errCode][1] == len(argTuple)), \ + 'Length missmatch for error code %d: %d != %d for %s' \ + % (errCode, PROBLEM_CODE_FORMAT[errCode][1], len(argTuple), str(argTuple)) + if self._errS is None: + self._errS = {} + try: + self._errS[errCode].add(argTuple) + except KeyError: + self._errS[errCode] = set((argTuple,)) + + def errStrings(self, generic, theFilter): + """Return a sorted list of error messages without duplicates.""" + if self._errS is not None: + mySet = set() + for ec in self._errS.keys(): + if theFilter is None or ec in theFilter: + assert(ec in PROBLEM_CODE_FORMAT.keys()) + for tu in self._errS[ec]: + if generic: + mySet.add(genericStringForErrorCode(ec)) + else: + f, c = PROBLEM_CODE_FORMAT[ec] + assert(len(tu) == c) + mySet.add(f % tu) + l = list(mySet) + l.sort() + return l + return [] + + def updateErrorCount(self, theMap): + """Updates a map of {error_code, : count, ...}. + Overridden for file and file set.""" + if self._errS is not None: + for e in self._errS.keys(): + theMap[e] += len(self._errS[e]) + + def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout): + """Can be overridden in child classes to recurse into + their data structures.""" + theStream.write('\n'.join(self.errStrings(isGeneric, theFilter))) + +class DitaId(DitaLinkCheckBase): + """Represents a node with an id.""" + def __init__(self, theN): + assert(theN.get('id', None) is not None) + super(DitaId, self).__init__(theN.get('id', None)) + self._elem = theN.tag + if '#' in self.id: + self.addError(100, (self.id,)) + # TODO: NMTOKENS + + @property + def elem(self): + return self._elem + + @property + def id(self): + return self.identity + + def checkGuid(self): + """optionally applies additional checks for GUID requirements.""" + if RE_GUID.match(self.id) is None: + self.addError(102, (self.id,)) + + def debugDump(self, s=sys.stdout, prefix=''): + """Dump of IR for debug purposes.""" + s.write('%sID: <%s id="%s" />\n' % (prefix, self.elem, self.id)) + +class DitaRef(DitaLinkCheckBase): + """Represents a reference node.""" + def __init__(self, theN): + self._elem = theN.tag + self._href = theN.get('href', None) + super(DitaRef, self).__init__('%s %s' % (self._elem, self._href)) + # This is used when figuring out of the target is the correct element + # e.g. in Vanilla DITA + # + self._refType = theN.get('type', None) + # Format attribute, this can be format="ditamap" + self._format = theN.get('format', None) + if self._href is None: + self.addError(201, (self._elem,)) + self._url = None + else: + self._url = urlparse.urlparse(self._href) + if '#' in self._url.fragment: + self.addError(200, (self._href,)) + + @property + def elem(self): + return self._elem + + @property + def href(self): + """The value of the href attribute.""" + return self._href + + @property + def refType(self): + """The value of the type attribute.""" + return self._refType + + @property + def format(self): + """The value of the format attribute.""" + return self._format + + @property + def path(self): + """The value of the path part of the href attribute.""" + return self._url.path + + @property + def fragment(self): + """The value of the fragment part of the href attribute.""" + return self._url.fragment + + @property + def scheme(self): + """The URI scheme e.g. 'http' or '' if no scheme.""" + return self._url.scheme + + def fileFragment(self, theRefFile): + """The absolute path of the file and the fragment identifier or (None, None).""" + if self.scheme not in ('', 'file'): + return (None, None) + if len(self.path) == 0: + myPath = theRefFile + else: + myPath = os.path.join(os.path.dirname(theRefFile), self.path) + return normalisePath(myPath), self.fragment + + def checkGuid(self): + """optionally applies additional checks for GUID requirements.""" + if RE_GUID.match(self.path) is None: + self.addError(203, (self.path,)) + if RE_GUID.match(self.fragment) is None: + self.addError(204, (self.fragment,)) + + def checkUrl(self): + if self.scheme: + myU = urlparse.urlunparse(self._url) + if not GlobalUrlCache.canAccess(myU): + self.addError(300, (myU,)) + + def debugDump(self, s=sys.stdout, prefix=''): + """Dump of IR for debug purposes.""" + s.write('%sREF: <%s href="%s" />\n' % (prefix, self.elem, self._href)) + +class DitaFileObj(DitaLinkCheckBase): + """Base class for a DITA topic or map.""" + def __init__(self, theFileObj, theFileName=None): + """Initialiser with a file object and a file path""" + #print '\nDitaFileObj(%s, %s)' % (theFileObj, theFileName) + if theFileName is not None: + super(DitaFileObj, self).__init__(normalisePath(theFileName)) + elif theFileObj is not None: + super(DitaFileObj, self).__init__(theFileObj.name) + else: + super(DitaFileObj, self).__init__(None) + self._rootId = None + self._doctype = None + # Sets of class DitaId + self._idS = set() + self._dupeIdS = set() + # Set of class DitaRef + self._xrefS = set() + # Ouptut control + self._hasWritten = False + # Size of input + try: + self._bytes = os.path.getsize(theFileName) + except Exception: + # Try as if a StringIO + try: + self._bytes = theFileObj.len + except AttributeError: + # Give up + self._bytes = 0 + # Process the file object + if theFileObj is not None: + try: + # TODO: use iterparse? + theTree = etree.parse(theFileObj) + except SyntaxError, err: + self.addError(404, (str(err),)) + else: + # Walk the tree + for i, e in enumerate(theTree.getiterator()): + #print 'TRACE: e', e + # Element [0] is the root element + if i == 0: + assert(self._rootId is None) + assert(self._doctype is None) + self._doctype = e.tag + if e.get('id', None) is not None: + self._rootId = DitaId(e) + self._addId(self._rootId) + else: + self.addError(402, ()) + else: + # NOTE: Elements with id attributes can also have href + # attributes. For example a in a + # Thus these tests are not exclusive + if e.get('id', None) is not None: + self._addId(DitaId(e)) + if e.get('href', None) is not None: + # TODO: Do we limit ourselves to only a certain set of elements? + self._xrefS.add(DitaRef(e)) + else: + self.addError(400, (self.identity,)) + + def _addId(self, theId): + #print 'TRACE: adding %s' % theId + #print 'TRACE: self._idS %s' % self._idS + if theId in self._idS: + # Remove from self._idS + #print 'TRACE: removing %s' % theId + self._idS.remove(theId) + self._dupeIdS.add(theId) + self.addError(401, (theId.identity,)) + elif theId not in self._dupeIdS: + self._idS.add(theId) + + @property + def bytes(self): + return self._bytes + + @property + def doctype(self): + return self._doctype + + @property + def rootId(self): + if self._rootId is not None: + return self._rootId.id + + @property + def isMap(self): + return self.doctype == "map" \ + or self.doctype == 'bookmap' \ + or (self.doctype is not None and self.doctype.endswith('Map')) + + @property + def idS(self): + """The set of IDs.""" + return self._idS + + @property + def refS(self): + """The set of DitaRef objects.""" + return self._xrefS + + def idElemMap(self): + """Returns a map {id : elem name, ...}.""" + retVal = {} + for anId in self._idS: + retVal[anId.id] = anId.elem + return retVal + + def hasId(self, theString): + for anId in self._idS: + if theString == anId.id: + return True + return False + + def idElem(self, theString): + for anId in self._idS: + if theString == anId.id: + return anId.elem + return None + + def idObj(self, theString): + for anId in self._idS: + if theString == anId.id: + return anId + return None + + def updateErrorCount(self, theMap): + """Updates a map of {error_code, : count, ...}.""" + if self._errS is not None: + for e in self._errS.keys(): + theMap[e] += len(self._errS[e]) + for idObj in self.idS: + idObj.updateErrorCount(theMap) + for refObj in self.refS: + refObj.updateErrorCount(theMap) + + def writeErrorList(self, theList, theSubHead='', theS=sys.stdout): + if len(theList) > 0: + theList.sort() + if not self._hasWritten: + theS.write('File: %s\n' % self.identity) + self._hasWritten = True + if len(theSubHead) > 0: + theS.write('%s [%d]:\n' % (theSubHead, len(theList))) + theS.write('\n'.join(theList)) + theS.write('\n') + + def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout): + """Writes out errors for me, my IDs and my Refs.""" + self._hasWritten = False + self.writeErrorList(self.errStrings(isGeneric, theFilter), 'File errors:', theStream) +#=============================================================================== +# # Duplicate IDs +# myList = (list(self._dupeIdS)) +# if len(myList): +# self.writeErrorList( +# [i.identity for i in myList], +# 'Duplicate ID', +# theStream) +#=============================================================================== + # Now IDs + myList = (list(self.idS)) + myList.sort() + for anId in myList: + self.writeErrorList(anId.errStrings(isGeneric, theFilter), 'ID=%s' % anId.identity, theStream) + # Now Refs + myList = (list(self._xrefS)) + myList.sort() + for anId in myList: + self.writeErrorList(anId.errStrings(isGeneric, theFilter), 'Ref=%s' % anId.identity, theStream) + if self._hasWritten: + theStream.write('\n') + + def debugDump(self, s=sys.stdout, prefix=''): + """Dump of IR for debug purposes.""" + s.write('%sFile: %s\n' % (prefix, self.identity)) + for anId in self._idS: + anId.debugDump(s, prefix=prefix+' ') + for aRef in self._xrefS: + aRef.debugDump(s, prefix=prefix+' ') + +class DitaFilePath(DitaFileObj): + """Base class for a DITA topic or map from the file system.""" + def __init__(self, theFilePath): + """Initialiser with a file path""" + try: + f = open(theFilePath) + except IOError: + f = None + #print 'DitaFilePath(%s)' % theFilePath + super(DitaFilePath, self).__init__(f, theFilePath) + if f is None: + self.addError(400, (theFilePath,)) + + +class DitaFileMapBase(object): + """Base class for holding a map of {file path : class DitaFile, ...} + Actual implementation can be in-memory or via a database e.g. the + shelve module.""" + def keys(self): + """Returns an unsorted list of keys in the map.""" + raise NotImplementedError() + + def has_key(self, thePath): + """Return True if the key exists.""" + raise NotImplementedError() + + def remove(self, thePath): + """Remove the entry corresponding to thePath, may raise KeyError.""" + raise NotImplementedError() + + def getDitaFileObj(self, thePath): + """Return a DitaFileObj that corresponds to thePath, may raise KeyError.""" + raise NotImplementedError() + + def setDitaFileObj(self, thePath, theObj): + """Load a DitaFileObj or update a mutated DitaFileObj.""" + raise NotImplementedError() + +class DitaFileMapInMemory(DitaFileMapBase): + """Holds map of {file path : class DitaFile, ...} in memory.""" + def __init__(self): + # Map of {file path : class DitaFile, ...} + self._fileMap = {} + + def keys(self): + """Returns an unsorted list of keys in the map.""" + return self._fileMap.keys() + + def has_key(self, thePath): + """Return True if the key exists.""" + return self._fileMap.has_key(thePath) + + def remove(self, thePath): + """Remove the entry corresponding to thePath, may raise KeyError.""" + del self._fileMap[thePath] + + def getDitaFileObj(self, thePath): + """Return a DitaFileObj that corresponds to thePath, may raise KeyError.""" + return self._fileMap[thePath] + + def setDitaFileObj(self, thePath, theObj): + """Load a DitaFileObj or update a mutated DitaFileObj.""" + self._fileMap[thePath] = theObj + +class DitaFileMapShelve(DitaFileMapBase): + """Holds map of {file path : class DitaFile, ...} in a shelve database.""" + DBASE_FILENAME = 'linkchecker.dbase' + def __init__(self): + if os.path.exists(self.DBASE_FILENAME): + os.remove(self.DBASE_FILENAME) + self._db = shelve.open(self.DBASE_FILENAME) + # Use this as a 'cache' as shelf.keys() is slow + self._keys = set() + + def keys(self): + """Returns an unsorted list of keys in the map.""" + return list(self._keys) + + def has_key(self, thePath): + """Return True if the key exists.""" + return thePath in self._keys + + def remove(self, thePath): + """Remove the entry corresponding to thePath, may raise KeyError.""" + del self._db[thePath] + self._keys.remove(thePath) + + def getDitaFileObj(self, thePath): + """Return a DitaFileObj that corresponds to thePath, may raise KeyError.""" + return self._db[thePath] + + def setDitaFileObj(self, thePath, theObj): + """Load a DitaFileObj or update a mutated DitaFileObj.""" + self._db[thePath] = theObj + self._keys.add(thePath) + +class DitaFileSet(DitaLinkCheckBase): + """Holds information about a set of DITA files.""" + STATS_KEYS = ('Maps', 'Non-maps', 'Files', 'Bytes', 'IDs', 'Refs') + def __init__(self, + theDir, + procDir=True, + thePatterns=None, + recursive=False, + testExt=False, + useDbase=False): + """Constructor. theDir is the root directory of DITA XML. + procDir - If True then process this directory immediately, otherwise + the directory can be processed independently and + _addFileObj() or _addDitaFileObj() invoked. + thePatterns - If supplied this should be a space separated string of + fnmatch extensions. + recursive - If True and procDir True the directory is processed recursively. + testExt - If True then test external URLs. + useDbase - If True then store all DitaFile objects in an external dbase + (slower but less memory issues). + """ + if thePatterns is None: + thePatterns = FNMATCH_STRING.split(' ') + if theDir is not None: + theDir = normalisePath(theDir) + super(DitaFileSet, self).__init__(theDir) + logging.info('DitaFileSet starting to read...') + GlobalUrlCache.clear() + self._testExt = testExt + # Set up how we store the DitaFile objects + if useDbase: + self._fileMap = DitaFileMapShelve() + else: + self._fileMap = DitaFileMapInMemory() + # Map of (str(rootId) : filepath, ...) with no duplicates + # Keys will be in self._uniqueRootIds + self._rootIdToFilePathMap = {} + # Path to the unique DITA map + self._uniqueMapPath = None + # Count of {error_code : count, ...} + self._errCountMap = CountDict() + # Statistics + self._statsMap = CountDict() + ## and initialise + #for k in self.STATS_KEYS: + # self._statsMap[k] + # Finalisation control (weak) + self._hasFinalised = False + # Timers + self._timeRead = time.clock() + self._timeAnalyse = 0.0 + if procDir: + if theDir is not None and os.path.isdir(theDir): + self._readDir(theDir, thePatterns, recursive) + else: + self.addError(500, (theDir,)) + # Finalise and run all the tests + self.finalise() + + @property + def errCountMap(self): + return self._errCountMap + + @property + def statsMap(self): + return self._statsMap + + def writeStatistics(self, s=sys.stdout): + """Writes out read statistics.""" + s.write(' Statistics '.center(PRINT_WIDTH, '=')) + s.write('\n') + if len(self._statsMap) > 0: + o = self.STATS_KEYS + #assert(set(o) == set(self._statsMap.keys())), \ + # '%s != %s' % (o, self._statsMap.keys()) + for k in o: + try: + m = self._statsMap[k] / (1024.0*1024.0) + s.write('%20s: %10d [%10.3f M]\n' % (k, self._statsMap[k], m)) + except KeyError: + s.write('%20s: %10s \n' % (k, 'Not seen')) + s.write('%20s: %10.3f (s)\n' % ('Read time', self._timeRead)) + s.write('%20s: %10.3f (s)\n' % ('Analysis time', self._timeAnalyse)) + s.write('='*PRINT_WIDTH) + else: + s.write('Nothing processed.') + s.write('\n') + + def writeErrorSummary(self, s=sys.stdout): + s.write(' Error Summary '.center(PRINT_WIDTH, '=')) + s.write('\n') + if len(self._errCountMap): + s.write('%4s %10s %s\n' % ('Code', 'Count', 'Error')) + s.write('%4s %10s %s\n' % ('----', '-----', '-----')) + errCodeS = self._errCountMap.keys() + errCodeS.sort() + for c in errCodeS: + s.write('%4d %10d %s\n' \ + % (c, self._errCountMap[c], genericStringForErrorCode(c))) + else: + s.write('No errors\n') + s.write('='*PRINT_WIDTH) + s.write('\n') + + def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout): + """Writes out errors for me and my files.""" + theStream.write('\n'.join(self.errStrings(isGeneric, theFilter))) + fileS = self._fileMap.keys() + fileS.sort() + for aFile in fileS: + # Immutable call so just use get + self._fileMap.getDitaFileObj(aFile).writeErrors(isGeneric, theFilter, theStream) + + def allErrStrings(self, isGeneric, theFilter): + """Return a sorted list of error messages without duplicates including + files.""" + retSet = set(self.errStrings(isGeneric, theFilter)) + fileS = self._fileMap.keys() + fileS.sort() + for aFilePath in self._fileMap.keys(): + # Immutable call so just use get + for anErr in self._fileMap.getDitaFileObj(aFilePath).errStrings(isGeneric, theFilter): + retSet.add(anErr) + retList = list(retSet) + retList.sort() + return retList + + def _readDir(self, theDir, thePatS, recursive): + assert(os.path.isdir(theDir)) + for aName in os.listdir(theDir): + aPath = os.path.join(theDir, aName) + if os.path.isdir(aPath) and recursive: + self._readDir(aPath, thePatS, recursive) + elif os.path.isfile(aPath): + for aPat in thePatS: + if fnmatch.fnmatch(aName, aPat): + assert(not self._fileMap.has_key(aPath)) + logging.debug(' Reading %s' % aPath) + try: + f = open(aPath) + except IOError: + f = None + self._addFileObj(f, aPath) + break + + def _addFileObj(self, theFileObj, theFilePath): + myObj = DitaFileObj(theFileObj, theFilePath) + self._addDitaFileObj(myObj) + + def _addDitaFileObj(self, theDitaFileObj): + if self._fileMap.has_key(theDitaFileObj.identity): + self.addError(504, (theDitaFileObj.identity,)) + else: + # Mutable call so use set + self._fileMap.setDitaFileObj(theDitaFileObj.identity, theDitaFileObj) + # Update statistics (files, bytes, ids, refs) etc. + self._statsMap['Files'] += 1 + self._statsMap['Bytes'] += theDitaFileObj.bytes + self._statsMap['IDs'] += len(theDitaFileObj.idS) + self._statsMap['Refs'] += len(theDitaFileObj.refS) + if theDitaFileObj.isMap: + self._statsMap['Maps'] += 1 + else: + self._statsMap['Non-maps'] += 1 + + def finalise(self): + """Creates the environment for all checks and then runs them.""" + logging.info('DitaFileSet.finalise() start...') + if not self._hasFinalised: + self._timeRead = time.clock() - self._timeRead + self._timeAnalyse = time.clock() + self._initRootIdToFilePathMap() + self._checkDupeIdS() + self._setMapCycles() + self._checkLonely() + self._checkRefArcs() + self._errCountMap = CountDict() + self.updateErrorCount(self._errCountMap) + self._hasFinalised = True + self._timeAnalyse = time.clock() - self._timeAnalyse + logging.info('DitaFileSet.finalise() done.') + + def _initRootIdToFilePathMap(self): + # Map of (str(rootId) : filepath, ...) with no duplicates + self._rootIdToFilePathMap = {} + # Temporary map of (str(rootId) : [filepath, ...], ...) + myDupeIdFiles = {} + for fPath in self._fileMap.keys(): + # fObj is not written to so we don't need to use set + fObj = self._fileMap.getDitaFileObj(fPath) + #print 'TRACE: _initRootIdToFilePathMap() fPath:', fPath + rId = fObj.rootId + if rId is not None: + if myDupeIdFiles.has_key(rId): + #print 'TRACE: _initRootIdToFilePathMap() another dupe:', fPath + myDupeIdFiles[rId].append(fObj.identity) + elif self._rootIdToFilePathMap.has_key(rId): + #print 'TRACE: _initRootIdToFilePathMap() first dupe:', fPath + # Remove from map and add to myDupeIdFiles + myFile = self._rootIdToFilePathMap.pop(rId) + try: + myDupeIdFiles[rId].append(myFile) + except KeyError: + myDupeIdFiles[rId] = [myFile,] + myDupeIdFiles[rId].append(fPath) + else: + #print 'TRACE: _initRootIdToFilePathMap() adding:', fPath + self._rootIdToFilePathMap[rId] = fObj.identity + # Set duplicate errors + for k in myDupeIdFiles.keys(): + myDupeIdFiles[k].sort() + self.addError(501, (k, tuple(myDupeIdFiles[k]))) + #self.addError(501, (k, str([str(a) for a in myDupeIdFiles[k]]))) + + def _checkDupeIdS(self): + """Checks if there are any duplicate IDs anywhere.""" + # {ID : [fileS, ...], ...} + myDupeIdMap = {} + # Temporary data structure + # {ID : first file ID is seen in, ...} + seenIdMap = {} + for f in self._fileMap.keys(): + # o is not written to so we don't need set... + o = self._fileMap.getDitaFileObj(f) + for anId in o.idS: + if seenIdMap.has_key(anId): + try: + myDupeIdMap[anId].append(f) + except KeyError: + myDupeIdMap[anId] = [seenIdMap[anId],] + myDupeIdMap[anId].append(f) + else: + seenIdMap[anId] = f + # Now add to errs as a 505 error message + # Sort the files in the map + for k in myDupeIdMap.keys(): + myDupeIdMap[k].sort() + self.addError(505, (k, tuple(myDupeIdMap[k]))) + #self.addError(505, (k, str([str(a) for a in myDupeIdMap[k]]))) + + def _retMapAdjList(self): + """Create an adjacency list {file_path : set(refs), ...} (all strings)""" + adjList = {} + for f in self._fileMap.keys(): + fObj = self._fileMap.getDitaFileObj(f) + if fObj.isMap:# and fObj.rootId is not None: + assert(fObj.identity not in adjList.keys()) + refSet = set() + for r in fObj.refS: + refSet.add(r.fileFragment(fObj.identity)[0]) + adjList[fObj.identity] = refSet + return adjList + + def _setMapCycles(self): + """Sets any cyclic references seen in DITA maps.""" + adjList = self._retMapAdjList() + # A branch + myBr = [] + myCycles = set() + for aPath, aSet in adjList.items(): + myBr.append(aPath) + self._recurseCycles(adjList, myBr, myCycles) + myBr.pop() + self._setCycleErrors(myCycles) + + def _recurseCycles(self, a, b, c): + assert(len(b) > 0) + try: + myPath = b[-1] + for r in a[myPath]: + #print '_recurseCycles() testing r', r + #print '_recurseCycles() testing b', b + if r in b: + #print 'Adding cycle', tuple(b[b.index(r):]) + c.add(tuple(b[b.index(r):])) + else: + b.append(r) + self._recurseCycles(a, b, c) + b.pop() + except KeyError: + pass + + def _setCycleErrors(self, theC): + for aT in theC: + self.addError(701, (str(aT),)) + myL = list(aT) + assert(len(myL) > 0) + i = 0 + while i < len(myL): + myL.append(myL[0]) + # Should this be in the file thus, or in the files set? + # As we are mutating the file object we need to use both + # getDitaFileObj() and setDitaFileObj() + fObj = self._fileMap.getDitaFileObj(myL[0]) + fObj.addError(701, (str(myL),)) + self._fileMap.setDitaFileObj(myL[0], fObj) + myL.pop() + myL.append(myL.pop(0)) + i += 1 + + def _checkLonely(self): + self._checkLonelyMaps() + self._checkLonelyTopics() + + def _checkLonelyMaps(self): + """Checks for lonely maps.""" + mapPathSet = set() + pathSetRemain = set() + for f in self._fileMap.keys(): + if self._fileMap.getDitaFileObj(f).isMap: + mapPathSet.add(f) + pathSetRemain.add(f) + for aPath in mapPathSet: + myMapObj = self._fileMap.getDitaFileObj(aPath) + for r in myMapObj.refS: + refFile, frag = r.fileFragment(f) + try: + pathSetRemain.remove(refFile) + except KeyError: + # refFile is a topic or an already seen map + pass + if len(pathSetRemain) > 1: + for aPath in pathSetRemain: + self.addError(700, (aPath,)) + elif len(pathSetRemain) == 1: + self._uniqueMapPath = pathSetRemain.pop() + + def _checkLonelyTopics(self): + """Checks for topics that are not referenced by any map.""" + mapPathSet = set() + pathSetRemain = set() + for f in self._fileMap.keys(): + #print 'TRACE: f:', f + if self._fileMap.getDitaFileObj(f).isMap: + mapPathSet.add(f) + else: + pathSetRemain.add(f) + #print 'TRACE: mapPathSet', mapPathSet + #print 'TRACE: pathSetRemain', pathSetRemain + for aMapPath in mapPathSet: + myMapObj = self._fileMap.getDitaFileObj(aMapPath) + for r in myMapObj.refS: + refFile, frag = r.fileFragment(aMapPath) + #print 'TRACE: removing:', refFile + try: + pathSetRemain.remove(refFile) + except KeyError: + # topic has already been seen in another map + pass + if len(pathSetRemain) > 0: + for aPath in pathSetRemain: + self.addError(600, (aPath,)) + + def _checkRefArcs(self): + """Checks all references are reachable.""" + for fPath in self._fileMap.keys(): + fObjSrc = self._fileMap.getDitaFileObj(fPath) + hasMutated = False + for rObjSrc in fObjSrc.refS: + if rObjSrc.scheme: + # Decide whether to test and external URL + if self._testExt: + rObjSrc.checkUrl() + else: + fi, fr = rObjSrc.fileFragment(fPath) + assert(fi is not None), 'fi is None for rObjSrc: %s in file: %s' % (rObjSrc, fPath) + assert(fr is not None), 'fr is None for rObjSrc: %s in file: %s' % (rObjSrc, fPath) + ## If a url then fileFragment() returns (None, None) + #if fi is None: + # print 'fPath', fPath + # print 'rObjSrc', rObjSrc + # print 'fi', fi + # print 'fr', fr + try: + fObjTgt = self._fileMap.getDitaFileObj(fi) + except KeyError: + # Target file can not be found in the IR + # check the file system to see if it is a non-DITA resource + if not os.path.isfile(fi): + #print 'TRACE: adding 410 to', fObj.identity + fObjSrc.addError(410, (fi,)) + hasMutated = True + else: + if len(fr) > 0: + # Target file is found, test fragment + if not fObjTgt.hasId(fr): + # Fragment not found + fObjSrc.addError(411, (fi, fr)) + hasMutated = True + if self._checkRefArcElemName(fObjSrc, rObjSrc, fObjTgt, fr): + hasMutated = True + if hasMutated: + self._fileMap.setDitaFileObj(fPath, fObjSrc) + + def _checkRefArcElemName(self, fObjSrc, rObjSrc, fObjTgt, frag): + """Test source and target element names + e.g. Source should match target + And in vanilla DITA: + + or: + + Should match target element .""" + isRootTgt = False + hasMutated = False + if len(frag) == 0: + # iObjTgt is the root element of fObjTgt + if fObjTgt.rootId is None or fObjTgt.idElem(fObjTgt.rootId) is None: + # Covered by other error codes + return + iObjTgt = fObjTgt.idObj(fObjTgt.rootId) + isRootTgt = True + elif fObjTgt.hasId(frag): + iObjTgt = fObjTgt.idObj(frag) + else: + # frag not found that will be a 411 error (handled by caller). + return + # Have an rObjSrc + iObjTgt so check elements + # First case: + if rObjSrc.elem.endswith('Ref'): + if rObjSrc.elem[:-3] != iObjTgt.elem: + if isRootTgt: + fObjSrc.addError(412, (rObjSrc.elem, iObjTgt.elem)) + else: + fObjSrc.addError(413, (fObjTgt.idElem(frag), rObjSrc.elem, frag)) + hasMutated = True + # Second case(s) for vanilla DITA + elif rObjSrc.elem == 'topicref': + # Check DITA map links + if rObjSrc.format == 'ditamap' and iObjTgt.elem != 'map': + # Target must be a root element (actually we don't care) + fObjSrc.addError(414, (iObjTgt.elem,)) + hasMutated = True + elif iObjTgt.elem == 'map' and rObjSrc.format != 'ditamap': + fObjSrc.addError(415, (rObjSrc.format,)) + hasMutated = True + elif not (rObjSrc.format == 'ditamap' and iObjTgt.elem == 'map'): + # Treat refType None as type="topic", see DITA standard for + # Well, also look at the type attribute in chapter 25 + # "When the type attribute is unspecified, it should be + # determined by inspecting the target if possible. If the + # target cannot be inspected for some reason, the value + # should default to "topic". + # Note: DITA 1.2 takes a different view... + # Was: + #if (rObjSrc.refType is None and iObjTgt.elem != 'topic') \ + #or (rObjSrc.refType is not None and rObjSrc.refType != iObjTgt.elem): + if rObjSrc.refType is not None and rObjSrc.refType != iObjTgt.elem: + if isRootTgt: + fObjSrc.addError(416, (rObjSrc.refType, iObjTgt.elem,)) + hasMutated = True + else: + fObjSrc.addError(417, (rObjSrc.refType, iObjTgt.elem, frag,)) + hasMutated = True + # Otherwise topicref looks OK + elif rObjSrc.elem != 'xref' and rObjSrc.elem not in XREF_DESCENDENTS: + # Unknown referencing element + if isRootTgt: + fObjSrc.addError(418, (rObjSrc.elem, fObjTgt.doctype)) + hasMutated = True + else: + fObjSrc.addError(419, (rObjSrc.elem, fObjTgt.idElem(frag), frag)) + hasMutated = True + return hasMutated + + def updateErrorCount(self, theMap): + """Updates a map of {error_code, : count, ...}.""" + if self._errS is not None: + for e in self._errS.keys(): + theMap[e] += len(self._errS[e]) + for fPath in self._fileMap.keys(): + fObj = self._fileMap.getDitaFileObj(fPath) + # Mutable call so need to update + fObj.updateErrorCount(theMap) + self._fileMap.setDitaFileObj(fPath, fObj) + + def debugDump(self, s=sys.stdout, prefix=''): + """Dump of IR for debug purposes.""" + s.write(' Debug Dump '.center(PRINT_WIDTH, '+')) + s.write('\n') + fileS = self._fileMap.keys() + fileS.sort() + for f in fileS: + self._fileMap.getDitaFileObj(f).debugDump(s, prefix) + s.write(' END Debug Dump '.center(PRINT_WIDTH, '+')) + s.write('\n\n') + +##################################### +# Multiprocessing code +##################################### +def retDitaFileObj(thePath): + return DitaFilePath(thePath) + +def genDitaPath(theDir, thePatS, recursive): + assert(os.path.isdir(theDir)) + for aName in os.listdir(theDir): + aPath = os.path.join(theDir, aName) + if os.path.isdir(aPath) and recursive: + for p in genDitaPath(aPath, thePatS, recursive): + yield p + elif os.path.isfile(aPath): + for aPat in thePatS: + if fnmatch.fnmatch(aName, aPat): + #logging.info('genDitaPath(): %s' % aPath) + yield aPath + break + +def retMpDitaFileSetObj(theDir, + thePatterns, + recursive, + numJobs, + checkExt, + useDb): + assert(os.path.isdir(theDir)) + assert(numJobs >= 0) + retObj = DitaFileSet(theDir, procDir=False, testExt=checkExt, useDbase=useDb) + myNumJobs = numJobs + if numJobs == 0: + myNumJobs = multiprocessing.cpu_count() + logging.info('Set multiprocessing number of jobs to %d' % myNumJobs) + myPool = multiprocessing.Pool(processes=myNumJobs) + for result in [ + myPool.apply_async(retDitaFileObj, (f,)) + for f in genDitaPath(theDir, thePatterns, recursive) + ]: + myObj = result.get() + logging.debug('Got %s' % myObj.identity) + retObj._addDitaFileObj(myObj) + # Note: finalise() is a serial process + logging.info('retMpDitaFileSetObj(): finalising') + retObj.finalise() + return retObj + +###################################### +# Test code +###################################### +try: + import cStringIO as StringIO +except ImportError: + import StringIO + +class NullClass(unittest.TestCase): + pass + +class TestCountDict(unittest.TestCase): + def setUp(self): + pass + + def tearDown(self): + pass + + def testSetUpTearDown(self): + """TestCountDict: test setUp() and tearDown().""" + pass + + def test_basic(self): + """TestCountDict: test basic functionality.""" + myMap = CountDict() + self.assertEqual(myMap.has_key('wtf'), False) + self.assertEqual(myMap['wtf'], 0) + self.assertEqual(myMap.has_key('wtf'), True) + myMap['wtf'] += 1 + self.assertEqual(myMap['wtf'], 1) + +class TestDitaId(unittest.TestCase): + def setUp(self): + pass + + def tearDown(self): + pass + + def testSetUpTearDown(self): + """DitaId: test setUp() and tearDown().""" + pass + + def test_basic(self): + """DitaId: basic read of an node with an id""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaId(myTree.getroot()) + self.assertEqual(myObj.id, 'class_big_endian') + self.assertEqual(str(myObj), 'class_big_endian') + self.assertEqual(myObj.errStrings(True, None), []) + self.assertEqual(myObj.errStrings(False, None), []) + + def test_guid_00(self): + """DitaId: basic read of an node with an GUID id""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaId(myTree.getroot()) + self.assertEqual(myObj.id, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E') + myObj.checkGuid() + self.assertEqual(myObj.errStrings(True, None), []) + self.assertEqual(myObj.errStrings(False, None), []) + + def test_guid_01(self): + """DitaId: basic read of an node with an GUID id fails""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaId(myTree.getroot()) + self.assertEqual(myObj.id, '25825EC4-341F-3EA4-94AA-7DCE380E6D2E') + myObj.checkGuid() + self.assertEqual( + myObj.errStrings(False, None), + [ + 'GUID specification does not match id="25825EC4-341F-3EA4-94AA-7DCE380E6D2E"' + ]) + self.assertEqual( + myObj.errStrings(True, None), + [ + 'GUID specification does not match id="%s"' % GENERIC_STRING, + ]) + + def test_cmp_eq_00(self): + """DitaId: cmp(), == of two identical nodes""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj_00 = DitaId(myTree.getroot()) + myObj_01 = DitaId(myTree.getroot()) + self.assertEqual(cmp(myObj_00, myObj_01), 0) + self.assertEqual((myObj_00 == myObj_01), True) + + def test_cmp_eq_01(self): + """DitaId: cmp(), == of two identical nodes from different elements.""" + myXml_00 = """""" + myTree_00 = etree.parse(StringIO.StringIO(myXml_00)) + myObj_00 = DitaId(myTree_00.getroot()) + myXml_01 = """""" + myTree_01 = etree.parse(StringIO.StringIO(myXml_01)) + myObj_01 = DitaId(myTree_01.getroot()) + self.assertEqual(cmp(myObj_00, myObj_01), 0) + self.assertEqual((myObj_00 == myObj_01), True) + + def test_set(self): + """DitaId: read of an node with an id several times into a set and check unique,""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + s = set() + i = 0 + while i < 8: + s.add(DitaId(myTree.getroot())) + i += 1 + self.assertEqual(len(s), 1) + self.assertEqual(DitaId(myTree.getroot()) in s, True) + + def test_map(self): + """DitaId: read of an node with an id several times into a map and check unique,""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + m = {} + i = 0 + while i < 8: + m[DitaId(myTree.getroot())] = 1 + i += 1 + self.assertEqual(len(m), 1) + self.assertEqual(m.has_key(DitaId(myTree.getroot())), True) + + def test_error_hash(self): + """DitaId: error with a '#' in an id""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaId(myTree.getroot()) + self.assertEqual(myObj.id, 'class_#big_endian') + self.assertEqual(str(myObj), 'class_#big_endian') + self.assertEqual( + myObj.errStrings(True, None), + [ + genericStringForErrorCode(100), + ] + ) + self.assertEqual( + myObj.errStrings(False, None), + [ + 'Character \'#\' not allowed in id="class_#big_endian"', + ] + ) + + + +class TestDitaRef(unittest.TestCase): + def setUp(self): + pass + + def tearDown(self): + pass + + def testSetUpTearDown(self): + """DitaRef: test setUp() and tearDown().""" + pass + + def test_basic(self): + """DitaRef: basic read of an xref node, no fragment""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaRef(myTree.getroot()) + self.assertEqual(myObj.href, 'class_big_endian') + self.assertEqual(myObj.path, 'class_big_endian') + self.assertEqual(myObj.elem, 'xref') + self.assertEqual(str(myObj), 'xref class_big_endian') + self.assertEqual(myObj.fragment, '') + self.assertEqual(myObj.scheme, '') + self.assertEqual(myObj.errStrings(False, None), []) + self.assertEqual(myObj.errStrings(True, None), []) + + def test_basic_frag(self): + """DitaRef: basic read of an xref node, with fragment""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaRef(myTree.getroot()) + self.assertEqual(myObj.href, 'class_big_endian.xml#function') + self.assertEqual(myObj.path, 'class_big_endian.xml') + self.assertEqual(myObj.fragment, 'function') + self.assertEqual(myObj.scheme, '') + self.assertEqual(myObj.errStrings(False, None), []) + self.assertEqual(myObj.errStrings(True, None), []) + + def test_file_frag_00(self): + """DitaRef: accessing an xref node, with a file and a fragment""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaRef(myTree.getroot()) + self.assertEqual(myObj.href, 'class_big_endian.xml#function') + self.assertEqual(myObj.path, 'class_big_endian.xml') + self.assertEqual(myObj.fragment, 'function') + self.assertEqual(myObj.scheme, '') + srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml')) + expPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'class_big_endian.xml')) + self.assertEqual( + myObj.fileFragment(srcPath), + (expPath, 'function') + ) + self.assertEqual(myObj.errStrings(False, None), []) + self.assertEqual(myObj.errStrings(True, None), []) + + def test_file_frag_01(self): + """DitaRef: accessing an xref node, with a file and a fragment and relative path with '\\'.""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaRef(myTree.getroot()) + srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml')) + expPath = normalisePath(os.path.join('C:%s' % os.sep, 'chips', 'class_big_endian.xml')) + self.assertEqual( + myObj.fileFragment(srcPath), + (expPath, 'function') + ) + self.assertEqual(myObj.errStrings(False, None), []) + self.assertEqual(myObj.errStrings(True, None), []) + + def test_file_frag_02(self): + """DitaRef: accessing an xref node, with a file and a fragment and relative path with '/'.""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaRef(myTree.getroot()) + srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml')) + expPath = normalisePath(os.path.join('C:%s' % os.sep, 'chips', 'class_big_endian.xml')) + self.assertEqual( + myObj.fileFragment(srcPath), + (expPath, 'function') + ) + self.assertEqual(myObj.errStrings(False, None), []) + self.assertEqual(myObj.errStrings(True, None), []) + + def test_file_frag_03(self): + """DitaRef: accessing an xref node, with a no file but with a fragment""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaRef(myTree.getroot()) + self.assertEqual(myObj.href, '#function') + self.assertEqual(myObj.path, '') + self.assertEqual(myObj.fragment, 'function') + self.assertEqual(myObj.scheme, '') + srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml')) + expPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml')) + self.assertEqual( + myObj.fileFragment(srcPath), + (expPath, 'function') + ) + self.assertEqual(myObj.errStrings(False, None), []) + self.assertEqual(myObj.errStrings(True, None), []) + + def test_basic_scheme(self): + """DitaRef: an xref node with a URI scheme""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaRef(myTree.getroot()) + self.assertEqual(myObj.href, 'http://www.cwi.nl:80/%7Eguido/Python.html#fragment') + self.assertEqual(myObj.path, '/%7Eguido/Python.html') + self.assertEqual(myObj.fragment, 'fragment') + self.assertEqual(myObj.scheme, 'http') + self.assertEqual(myObj.errStrings(False, None), []) + self.assertEqual(myObj.errStrings(True, None), []) + + def test_basic_scheme_file_frag(self): + """DitaRef: an xref node with a URI scheme, invoking fileFragment()""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaRef(myTree.getroot()) + self.assertEqual(myObj.href, 'http://www.cwi.nl:80/%7Eguido/Python.html#fragment') + self.assertEqual(myObj.path, '/%7Eguido/Python.html') + self.assertEqual(myObj.fragment, 'fragment') + self.assertEqual(myObj.scheme, 'http') + srcPath = os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml') + self.assertEqual( + myObj.fileFragment(srcPath), + (None, None) + ) + self.assertEqual(myObj.errStrings(False, None), []) + self.assertEqual(myObj.errStrings(True, None), []) + + def test_fail_no_href(self): + """DitaRef: Fails on an xref node with no href attribute""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaRef(myTree.getroot()) + self.assertEqual( + myObj.errStrings(False, None), + [ + 'Reference element "xref" is missing href=... attribute', + ] + ) + self.assertEqual( + myObj.errStrings(True, None), + [ + 'Reference element "%s" is missing href=... attribute' % GENERIC_STRING, + ] + ) + + def test_fail_bad_frag(self): + """DitaRef: Fails on an xref node with href attribute that has multiple '#' characters""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaRef(myTree.getroot()) + self.assertEqual( + myObj.errStrings(False, None), + [ + 'Multiple \'#\' not allowed in reference "a#b#c"', + ] + ) + self.assertEqual( + myObj.errStrings(True, None), + [ + 'Multiple \'#\' not allowed in reference "%s"' % GENERIC_STRING, + ] + ) + + def test_guid_00(self): + """DitaRef: basic read of an node with an GUID file/fragment reference""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaRef(myTree.getroot()) + self.assertEqual(myObj.href, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E') + self.assertEqual(myObj.path, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml') + self.assertEqual(myObj.elem, 'xref') + self.assertEqual(str(myObj), 'xref GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E') + self.assertEqual(myObj.fragment, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E') + self.assertEqual(myObj.scheme, '') + self.assertEqual(myObj.errStrings(False, None), []) + self.assertEqual(myObj.errStrings(True, None), []) + + def test_guid_01(self): + """DitaRef: basic read of an node with an GUID file part fails""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaRef(myTree.getroot()) + self.assertEqual(myObj.href, 'GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E') + self.assertEqual(myObj.path, 'GUID-.xml') + self.assertEqual(myObj.elem, 'xref') + self.assertEqual(str(myObj), 'xref GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E') + self.assertEqual(myObj.fragment, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E') + self.assertEqual(myObj.errStrings(False, None), []) + self.assertEqual(myObj.errStrings(True, None), []) + myObj.checkGuid() + self.assertEqual( + myObj.errStrings(False, None), + [ + 'GUID specification does not match file reference "GUID-.xml"' + ]) + self.assertEqual( + myObj.errStrings(True, None), + [ + genericStringForErrorCode(203), + ] + ) + + def test_guid_02(self): + """DitaRef: basic read of an node with an GUID fragment part fails""" + myXml = """""" + myTree = etree.parse(StringIO.StringIO(myXml)) + myObj = DitaRef(myTree.getroot()) + self.assertEqual(myObj.href, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4') + self.assertEqual(myObj.path, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml') + self.assertEqual(myObj.elem, 'xref') + self.assertEqual(str(myObj), 'xref GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4') + self.assertEqual(myObj.fragment, 'GUID-25825EC4') + self.assertEqual(myObj.errStrings(False, None), []) + self.assertEqual(myObj.errStrings(True, None), []) + myObj.checkGuid() + self.assertEqual( + myObj.errStrings(False, None), + [ + 'GUID specification does not match fragment reference "GUID-25825EC4"' + ]) + self.assertEqual( + myObj.errStrings(True, None), + [ + genericStringForErrorCode(204), + ] + ) + +class TestDitaFile(unittest.TestCase): + def setUp(self): + pass + + def tearDown(self): + pass + + def testSetUpTearDown(self): + """DitaFile: test setUp() and tearDown().""" + pass + + def test_Basic(self): + """DitaFile: basic read of an XML file""" + myXml = """ + + + BigEndian + + + + + + + + + + + + + +

Inserts and extracts integers in big-endian format.

+
+
+ + + + + + + + +
""" + myFile = StringIO.StringIO(myXml) + myObj = DitaFileObj(myFile, 'foo') + self.assertEqual(myObj.identity, normalisePath('foo')) + self.assertEqual(myObj.doctype, 'cxxClass') + self.assertEqual(myObj.rootId, 'class_big_endian') + #print myObj.idMap() + self.assertEqual( + myObj.idElemMap(), + { + 'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f' : 'cxxFunction', + 'class_big_endian_1aedf702f5c0118e4294d1a6d9684f8441' : 'cxxFunction', + 'class_big_endian' : 'cxxClass', + 'class_big_endian_1a497d5248ea259f8490fb40ac4f2aafb2' : 'cxxFunction', + 'class_big_endian_1ae266722f7bb965c971155a3315bad484' : 'cxxFunction', + } + ) + self.assertEqual(myObj.errStrings(False, None), []) + self.assertEqual(myObj.errStrings(True, None), []) + + def test_missing_file(self): + """DitaFile: read an missing XML file""" + myObj = DitaFileObj(None, 'foo') + self.assertEqual( + myObj.errStrings(False, None), + [ + 'Failed to open: "%s"' % normalisePath('foo'), + ] + ) + self.assertEqual( + myObj.errStrings(True, None), + [ + genericStringForErrorCode(400), + ] + ) + + def test_IllFormedFile(self): + """DitaFile: read an ill-formed XML file""" + myXml = """ + + +""" + myFile = StringIO.StringIO(myXml) + myObj = DitaFileObj(myFile, 'foo') + self.assertEqual(myObj.identity, normalisePath('foo')) + self.assertEqual(myObj.doctype, None) + self.assertEqual(myObj.rootId, None) + #print myObj.idMap() + self.assertEqual(myObj.idElemMap(), {}) + self.assertEqual( + myObj.errStrings(False, None), + [ + 'Can not parse: "no element found: line 4, column 0"', + ] + ) + self.assertEqual( + myObj.errStrings(True, None), + [ + genericStringForErrorCode(404), + ] + ) + + def test_missing_root_id(self): + """DitaFile: read of an XML file with no id on root element""" + myXml = """ + + + OtherClass + +""" + myFile = StringIO.StringIO(myXml) + myObj = DitaFileObj(myFile, 'foo') + self.assertEqual(myObj.identity, normalisePath('foo')) + self.assertEqual(myObj.doctype, 'cxxClass') + self.assertEqual(myObj.rootId, None) + self.assertEqual( + myObj.idElemMap(), + { + 'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f' : 'cxxFunction', + } + ) + self.assertEqual(myObj.errStrings(False, None), [genericStringForErrorCode(402)]) + self.assertEqual(myObj.errStrings(True, None), [genericStringForErrorCode(402)]) + + def test_duplicate_id(self): + """DitaFile: duplicate IDs""" + myXml = """ + +""" + myFile = StringIO.StringIO(myXml) + myObj = DitaFileObj(myFile, 'spam.xml') + self.assertEqual(myObj.identity, normalisePath('spam.xml')) + self.assertEqual(myObj.doctype, 'root') + self.assertEqual(myObj.rootId, 'AnID') + self.assertEqual(myObj.idElemMap(), {}) + self.assertEqual( + myObj.errStrings(False, None), + [ + 'Multiple id="AnID"', + ] + ) + self.assertEqual(myObj.errStrings(True, None), [genericStringForErrorCode(401)]) + + def test_ismap_00(self): + """DitaFile: Is a map for .""" + myXml = """""" + myFile = StringIO.StringIO(myXml) + myObj = DitaFileObj(myFile, 'spam.xml') + self.assertEqual(myObj.isMap, True) + + def test_ismap_01(self): + """DitaFile: Is a map for .""" + myXml = """""" + myFile = StringIO.StringIO(myXml) + myObj = DitaFileObj(myFile, 'spam.xml') + self.assertEqual(myObj.isMap, True) + + def test_Basic_01(self): + """DitaFile: read of an simple XML file with id and xref""" + myXml = """ + + + OtherClass + +""" + myFile = StringIO.StringIO(myXml) + myObj = DitaFileObj(myFile, 'foo') + self.assertEqual(myObj.identity, normalisePath('foo')) + self.assertEqual(myObj.doctype, 'cxxClass') + self.assertEqual(myObj.rootId, 'class_big_endian') + self.assertEqual(myObj.isMap, False) + self.assertEqual(len(myObj.idS), 2) + self.assertEqual(len(myObj.refS), 1) + self.assertEqual(myObj.hasId('class_big_endian'), True) + self.assertEqual(myObj.hasId('class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'), True) + self.assertEqual(myObj.hasId('noID'), False) + self.assertEqual(myObj.idElem('class_big_endian'), 'cxxClass') + self.assertEqual(myObj.idElem('noID'), None) + self.assertEqual( + myObj.idElem('class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'), + 'cxxFunction' + ) + #print myObj.idMap() + self.assertEqual( + myObj.idElemMap(), + { + 'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f' : 'cxxFunction', + 'class_big_endian' : 'cxxClass', + } + ) + self.assertEqual(myObj.errStrings(False, None), []) + self.assertEqual(myObj.errStrings(True, None), []) + +class TestDitaFileSet(unittest.TestCase): + def setUp(self): + pass + + def tearDown(self): + pass + + def testSetUpTearDown(self): + """DitaFileSet: test setUp() and tearDown().""" + pass + + def test_None(self): + """DitaFileSet: read of None.""" + myO = DitaFileSet(None) + myO.finalise() + self.assertEqual(myO.errStrings(False, None), ['Not a directory: None']) + self.assertEqual(myO.errStrings(True, None), ['Not a directory: %s' % GENERIC_STRING, ]) + self.assertEqual(myO.errCountMap, {500 : 1}) + + def test_basic(self): + """DitaFileSet: Test reading a map and a couple of files.""" + myO = DitaFileSet(None, procDir=False) + myO._addFileObj( + StringIO.StringIO( +""" + + +""" + ), + 'map.ditamap' + ) + myO._addFileObj(StringIO.StringIO(''), 'spam.dita') + myO._addFileObj(StringIO.StringIO(''), 'eggs.dita') + myO.finalise() + #print 'HI' + #myO.writeErrors(False) + self.assertEqual(myO.allErrStrings(False, None), []) + self.assertEqual(myO.allErrStrings(True, None), []) + self.assertEqual(myO.errCountMap, {}) + + def test_duplicate_paths(self): + """DitaFileSet: Test reading a couple of files in duplicate paths.""" + myO = DitaFileSet(None, procDir=False) + myO._addFileObj( + StringIO.StringIO( +""" + +""" + ), + 'map.ditamap' + ) + myO._addFileObj(StringIO.StringIO(''), 'spam.dita') + myO._addFileObj(StringIO.StringIO(''), 'spam.dita') + myO.finalise() + self.assertEqual( + myO.errStrings(False, None), + [ + 'Duplicate file path: "%s"' % normalisePath('spam.dita'), + ] + ) + self.assertEqual(myO.errStrings(True, None), [genericStringForErrorCode(504),]) + self.assertEqual(myO.errCountMap, {504 : 1}) + + def test_duplicate_ids(self): + """DitaFileSet: Test reading a map and a couple of files with duplicate IDs.""" + myO = DitaFileSet(None, procDir=False) + myO._addFileObj( + StringIO.StringIO( +""" + + + +""" + ), + 'map.ditamap' + ) + myO._addFileObj(StringIO.StringIO(''), 'spam.dita') + myO._addFileObj(StringIO.StringIO(''), 'eggs.dita') + myO._addFileObj(StringIO.StringIO(''), 'chips.dita') + myO.finalise() + #print 'HI' + #myO.writeErrors(False) + #pprint.pprint(myO.errStrings(False, None)) + self.assertEqual( + myO.errStrings(True, None), + [ + genericStringForErrorCode(505), + genericStringForErrorCode(501), + ] + ) + expErrs = [ + """Duplicate id="chips" in files: ('%s', '%s', '%s')""" \ + % (normalisePath('chips.dita'), normalisePath('eggs.dita'), normalisePath('spam.dita')), + """Duplicate root id="chips" in files: ('%s', '%s', '%s')""" \ + % (normalisePath('chips.dita'), normalisePath('eggs.dita'), normalisePath('spam.dita')), + ] + myErrs = myO.errStrings(False, None) +#=============================================================================== +# for i in range(2): +# if myErrs[i] != expErrs[i]: +# print myErrs[i] +# print expErrs[i] +# print +#=============================================================================== + self.assertEqual(myErrs, expErrs) + self.assertEqual(myO.errCountMap, {505: 1, 501: 1}) + + def test_lonely_topics(self): + """DitaFileSet: Test a couple of lonely topics.""" + myO = DitaFileSet(None, procDir=False) + myO._addFileObj(StringIO.StringIO(''), 'spam') + myO._addFileObj(StringIO.StringIO(''), 'eggs') + myO.finalise() + self.assertEqual( + myO.errStrings(False, None), + [ + 'Topic id="%s" is not referenced by any map' % normalisePath('eggs'), + 'Topic id="%s" is not referenced by any map' % normalisePath('spam'), + ] + ) + self.assertEqual( + myO.errStrings(True, None), + [ + genericStringForErrorCode(600), + ] + ) + + def test_map_cycles_00(self): + """DitaFileSet: Cyclic references between two maps.""" + myO = DitaFileSet(None, procDir=False) + myO._addFileObj( + StringIO.StringIO( +""" + +""" + ), + 'map_00.ditamap' + ) + myO._addFileObj( + StringIO.StringIO( +""" + +""" + ), + 'map_01.ditamap' + ) + myO.finalise() + #print 'HI test_map_cycles_00()' + #pprint.pprint(myO._retMapAdjList()) + self.assertEqual( + myO.errStrings(False, None), + [ + 'Maps "%s" are in a a cycle.' % str( + ( + normalisePath('map_00.ditamap'), + normalisePath('map_01.ditamap'), + ) + ), + 'Maps "%s" are in a a cycle.' % str( + ( + normalisePath('map_01.ditamap'), + normalisePath('map_00.ditamap'), + ) + ), + ] + ) + #print + #pprint.pprint(myO.allErrStrings(False, None)) + self.assertEqual(myO.allErrStrings(True, None), [genericStringForErrorCode(701)]) + self.assertEqual(myO.errCountMap, {701 : 4}) + + def test_map_cycles_01(self): + """DitaFileSet: Cyclic references between three maps.""" + myO = DitaFileSet(None, procDir=False) + myO._addFileObj( + StringIO.StringIO( +""" + +""" + ), + 'map_00.ditamap' + ) + myO._addFileObj( + StringIO.StringIO( +""" + +""" + ), + 'map_01.ditamap' + ) + myO._addFileObj( + StringIO.StringIO( +""" + +""" + ), + 'map_02.ditamap' + ) + myO.finalise() + #print 'HI test_map_cycles_00()' + #pprint.pprint(myO._retMapAdjList()) + self.assertEqual( + myO.errStrings(False, None), + [ + 'Maps "%s" are in a a cycle.' % str( + ( + normalisePath('map_00.ditamap'), + normalisePath('map_01.ditamap'), + normalisePath('map_02.ditamap'), + ) + ), + 'Maps "%s" are in a a cycle.' % str( + ( + normalisePath('map_01.ditamap'), + normalisePath('map_02.ditamap'), + normalisePath('map_00.ditamap'), + ) + ), + 'Maps "%s" are in a a cycle.' % str( + ( + normalisePath('map_02.ditamap'), + normalisePath('map_00.ditamap'), + normalisePath('map_01.ditamap'), + ) + ), + ] + ) + self.assertEqual(myO.errStrings(True, None), [genericStringForErrorCode(701)]) + self.assertEqual(myO.errCountMap, {701 : 6}) + + def test_refarc_00(self): + """DitaFileSet: Test ref arcing - all resolve.""" + myO = DitaFileSet(None, procDir=False) + myO._addFileObj( + StringIO.StringIO( +""" + + +""" + ), + 'map.ditamap' + ) + myO._addFileObj(StringIO.StringIO(''), 'spam.dita') + myO._addFileObj(StringIO.StringIO(''), 'eggs.dita') + myO.finalise() + self.assertEqual(myO.errCountMap, {}) + self.assertEqual(myO.allErrStrings(False, None), []) + self.assertEqual(myO.allErrStrings(True, None), []) + self.assertEqual(myO.errStrings(False, None), []) + self.assertEqual(myO.errStrings(True, None), []) + + def test_refarc_fail_00(self): + """DitaFileSet: Test ref arcing - can't find file.""" + myO = DitaFileSet(None, procDir=False) + myO._addFileObj( + StringIO.StringIO( +""" + + +""" + ), + 'map.ditamap' + ) + myO.finalise() + self.assertEqual(myO.errCountMap, {410: 2}) + #print 'HI' + #pprint.pprint(myO.allErrStrings(False, None)) + self.assertEqual( + myO.allErrStrings(False, None), + [ + 'Can not resolve reference to file "%s"' % normalisePath('eggs_for_tea.dita'), + 'Can not resolve reference to file "%s"' % normalisePath('spam_.dita'), + ] + ) + self.assertEqual( + myO.allErrStrings(True, None), + [ + 'Can not resolve reference to file "..."', + ] + ) + self.assertEqual(myO.errStrings(False, None), []) + self.assertEqual(myO.errStrings(True, None), []) + + def test_refarc_fail_01(self): + """DitaFileSet: Test ref arcing - can't find fragment.""" + myO = DitaFileSet(None, procDir=False) + myO._addFileObj( + StringIO.StringIO( +""" + + +""" + ), + 'map.ditamap' + ) + myO._addFileObj(StringIO.StringIO(''), 'spam.dita') + myO._addFileObj(StringIO.StringIO(''), 'eggs.dita') + myO.finalise() + self.assertEqual(myO.errCountMap, {411: 2}) + #print 'HI' + #pprint.pprint(myO.allErrStrings(False, None)) + self.assertEqual( + myO.allErrStrings(False, None), + [ + 'Can resolve reference to file "%s" but not to fragment "eggs_"' % normalisePath('eggs.dita'), + 'Can resolve reference to file "%s" but not to fragment "spam_"' % normalisePath('spam.dita'), + ] + ) + self.assertEqual( + myO.allErrStrings(True, None), + [ + 'Can resolve reference to file "%s" but not to fragment "%s"' % (GENERIC_STRING, GENERIC_STRING), + ] + ) + self.assertEqual(myO.errStrings(False, None), []) + self.assertEqual(myO.errStrings(True, None), []) + + def test_refarc_url_00(self): + """DitaFileSet: Test ref arcing - URL.""" + myO = DitaFileSet(None, procDir=False, testExt=True) + myO._addFileObj( + StringIO.StringIO( +""" + + +""" + ), + 'map.ditamap' + ) + myO._addFileObj(StringIO.StringIO(""" + Nokia +"""), 'spam.dita') + myO._addFileObj(StringIO.StringIO(""" + Google +"""), 'eggs.dita') + myO.finalise() + #print 'HI' + #pprint.pprint(myO.allErrStrings(False, None)) + self.assertEqual(myO.errCountMap, {}) + self.assertEqual( + myO.allErrStrings(False, None), + [ + ] + ) + self.assertEqual( + myO.allErrStrings(True, None), + [ + ] + ) + self.assertEqual(myO.errStrings(False, None), []) + self.assertEqual(myO.errStrings(True, None), []) + +class TestDitaBookmapFileSet(unittest.TestCase): + def setUp(self): + pass + + def tearDown(self): + pass + + def testSetUpTearDown(self): + """TestDitaBookmapFileSet: test setUp() and tearDown().""" + pass + + def test_basic(self): + """TestDitaBookmapFileSet: Test reading a bookmap and a topic.""" + myO = DitaFileSet(None, procDir=False) + myO._addFileObj( + StringIO.StringIO( +""" + + + + My Bookmap + Alternate title + + + + + +""" + ), + 'bookmap.ditamap' + ) + myO._addFileObj(StringIO.StringIO(""" + + + How to read and write a file + +"""), 'GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB.dita') + myO.finalise() + #print + #myO.debugDump() + #print 'HI' + #myO.writeErrors(False) + self.assertEqual(myO.allErrStrings(False, None), []) + self.assertEqual(myO.allErrStrings(True, None), []) + self.assertEqual(myO.errCountMap, {}) + +class Special(unittest.TestCase): + pass + +def unitTest(theVerbosity=2): + suite = unittest.TestLoader().loadTestsFromTestCase(NullClass) + suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCountDict)) + suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaId)) + suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaRef)) + suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaFile)) + suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaFileSet)) + suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaBookmapFileSet)) + suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Special)) + myResult = unittest.TextTestRunner(verbosity=theVerbosity).run(suite) + return (myResult.testsRun, len(myResult.errors), len(myResult.failures)) + +###################################### +# main() stuff +###################################### +def main(): + print 'CMD: %s' % ' '.join(sys.argv) + usage = "usage: %prog [options] " + parser = OptionParser(usage, version='%prog ' + __version__) + parser.add_option("-d", action="store_true", dest="dump", default=False, + help="Dump internal representation. [default: %default]") + parser.add_option( + "-e", "--errors", + type="str", + dest="error_codes", + default='All', + help="Only report on certain error codes (space seperated list). [default: \"%default\"]" + ) + parser.add_option("-f", "--file", dest="file", type="str", default='None', + help="Report of errors by file either 'None', 'generic', 'specific'. [default: %default]") + parser.add_option("-g", action="store_true", dest="guid", default=False, + help="Enforce GUID specification. [default: %default]") + parser.add_option( + "-j", "--jobs", + type="int", + dest="jobs", + default=-1, + help="Max processes when multiprocessing. 0 takes CPUs, -1 no MP. [default: %default]" + ) + parser.add_option( + "-l", "--loglevel", + type="int", + dest="loglevel", + default=20, + help="Log Level (debug=10, info=20, warning=30, [error=40], critical=50) [default: %default]" + ) + parser.add_option( + "-p", "--pattern", + type="str", + dest="pattern", + default=FNMATCH_STRING, + help="Pattern match. [default: \"%default\"]" + ) + parser.add_option("-r", action="store_true", dest="recursive", default=False, + help="Recursive. [default: %default]") + parser.add_option("-s", action="store_true", dest="shelve", default=False, + help="Use the shelve dBase rather than storing the internal representation in memory. This is slower but is useful for large data sets where a memory error might occur. [default: %default]") + parser.add_option("-u", action="store_true", dest="unit_test", default=False, + help="Execute unit tests and exit. [default: %default]") + parser.add_option("-x", action="store_true", dest="ext_url", default=False, + help="Test external |URLs. [default: %default]") + parser.add_option("-?", action="store_true", dest="query_errors", default=False, + help="Display the error types that are detected. [default: %default]") + (options, args) = parser.parse_args() + logging.basicConfig( + level=options.loglevel, + format='%(asctime)s %(levelname)-8s %(message)s', + stream=sys.stdout, + ) + if options.file not in ('None', 'generic', 'specific'): + parser.error("--file option must be: 'None' | 'generic' | 'specific'") + return 1 + if options.unit_test: + unitTest() + if options.query_errors: + writeGenericStringsForErrorCodes() + if len(args) < 1 and not options.unit_test: + parser.print_help() + parser.error("I can't do much without a path to the XML content.") + return 1 + elif len(args) == 1: + if options.jobs > -1: + myObj = retMpDitaFileSetObj( + args[0], + options.pattern.split(' '), + options.recursive, + options.jobs, + options.ext_url, + options.shelve, + ) + else: + myObj = DitaFileSet(args[0], + procDir=True, + thePatterns=options.pattern.split(' '), + recursive=options.recursive, + testExt=options.ext_url, + useDbase=options.shelve, + ) + #print 'MyObj:', myObj + if options.dump: + myObj.debugDump() + myObj.writeStatistics() + myObj.writeErrorSummary() + #pprint.pprint(myObj.statsMap) + # TODO: Write out the results in different ways + errFilter = set(PROBLEM_CODE_FORMAT.keys()) + if options.error_codes != 'All': + errFilter = set([int(i) for i in options.error_codes.split()]) + if options.file == 'generic': + print 'Generic problems:' + myObj.writeErrors(True, errFilter) + elif options.file == 'specific': + print 'Specific problems:' + myObj.writeErrors(False, errFilter) + elif len(args) > 1: + parser.error("Too many arguments, I need only one.") + return 1 + return 0 + +if __name__ == '__main__': + multiprocessing.freeze_support() + sys.exit(main())