--- a/mpdot/linkcheck.py Fri Apr 23 20:47:58 2010 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,2336 +0,0 @@
-# Copyright (c) 2007-2010 Nokia Corporation and/or its subsidiary(-ies) All rights reserved.
-# This component and the accompanying materials are made available under the terms of the License
-# "Eclipse Public License v1.0" which accompanies this distribution,
-# and is available at the URL "http://www.eclipse.org/legal/epl-v10.html".
-#
-# Initial Contributors:
-# Nokia Corporation - initial contribution.
-#
-# Contributors:
-#
-# Description:
-# Checks links in DITA XML and reports issues.
-"""
-Created on 12 Feb 2010
-
-@author: p2ross
-
-Definitions
-===========
-Doctype
--------
-See: http://www.w3.org/TR/2008/REC-xml-20081126/#dt-root
-Note: this is sometimes called the Doctype because of http://www.w3.org/TR/2008/REC-xml-20081126/#vc-roottype
-
-ID
---
-The value of the 'id' attribute of an element.
-
-Root ID
--------
-The value of the 'id' attribute of the root element.
-Note: A development would allow differently named attributes provided that they
-were ID types. See http://www.w3.org/TR/2008/REC-xml-20081126/#sec-attribute-types
-for validity constraints for ID types.
-
-Reference
----------
-The value of the href attribute of an element.
-
-Map
----
-An XML file whose root element name is 'map' or ends with 'Map'.
-
-Topic
------
-An XML file that is not a Map.
-
-Lonely topic
-------------
-A topic whose root ID is not referenced by any map.
-
-Lonely map
-----------
-A map whose root ID is not referenced by any map.
-
-Map Cycle
----------
-A sequence of map references whose members are not unique.
-
-"""
-
-import os
-import unittest
-import sys
-import logging
-import pprint
-import fnmatch
-import re
-import urllib
-import time
-from optparse import OptionParser, check_choice
-try:
- from xml.etree import cElementTree as etree
-except ImportError:
- from xml.etree import ElementTree as etree
-import urlparse
-import multiprocessing
-# used for DitaFileObj persistence
-import shelve
-
-__version__ = '0.1.5'
-
-class ExceptionLinkCheck(Exception):
- pass
-
-class CountDict(dict):
- """Dictionary with a default value of 0 for unknown keys."""
- def __getitem__(self, key):
- if key not in self:
- self[key] = 0
- return self.get(key)
-
-# Matches stuff like: GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E
-RE_GUID = re.compile(r'GUID-[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}', re.IGNORECASE)
-
-# Of the form {integer_error_code : (format_string, num_args), ...}
-PROBLEM_CODE_FORMAT = {
- # 'id_syntax'
- 100 : ('Character \'#\' not allowed in id="%s"', 1),
- 101 : ('NMTOKEN character \'%s\' not allowed in id="%s"', 2),
- 102 : ('GUID specification does not match id="%s"', 1),
- # 'ref_syntax'
- 200 : ('Multiple \'#\' not allowed in reference "%s"', 1),
- 201 : ('Reference element "%s" is missing href=... attribute', 1),
- 202 : ('URL has missing type/format in reference "%s"', 1),
- 203 : ('GUID specification does not match file reference "%s"', 1),
- 204 : ('GUID specification does not match fragment reference "%s"', 1),
- # 'ref'
- 300 : ('Can not resolve URI "%s"', 1),
- # 'file'
- 400 : ('Failed to open: "%s"', 1),
- 401 : ('Multiple id="%s"', 1),
- 402 : ('No id attribute on root element', 0),
- 403 : ('Root ID in cycle: %s', 1),
- 404 : ('Can not parse: "%s"', 1),
- 410 : ('Can not resolve reference to file "%s"', 1),
- 411 : ('Can resolve reference to file "%s" but not to fragment "%s"', 2),
- 412 : ('Referencing element "%s" does not match target root element "%s"', 2),
- 413 : ('Referencing element "%s" does not match target element "%s" for id="%s"', 3),
- 414 : ('topicref element with format="ditamap" does not match target root element "%s"', 1),
- 415 : ('topicref to <map> does not have format="ditamap" but format="%s"', 1),
- 416 : ('topicref element type="%s" does not match target root element "%s"', 2),
- 417 : ('topicref element type="%s" does not match target element "%s" for id="%s"', 3),
- 418 : ('Unknown referencing element "%s" does not match target root element "%s"', 2),
- 419 : ('Unknown referencing element "%s" does not match target element "%s" for id="%s"', 3),
- # 'file_set'
- 500 : ('Not a directory: %s', 1),
- 501 : ('Duplicate root id="%s" in files: %s', 2),
- #502 : ('Can not resolve reference to "%s"', 1),
- #503 : ('Reference type "%s" does not match target type "%s" for id="%s"', 3),
- 504 : ('Duplicate file path: "%s"', 1),
- 505 : ('Duplicate id="%s" in files: %s', 2),
- # 'topic_set'
- 600 : ('Topic id="%s" is not referenced by any map', 1),
- # 'map_set'
- 700 : ('More than one top level map exists: %s', 1),
- 701 : ('Maps "%s" are in a a cycle.', 1),
-}
-
-GENERIC_STRING = '...'
-PRINT_WIDTH = 75
-
-def genericStringForErrorCode(ec):
- assert(PROBLEM_CODE_FORMAT.has_key(ec))
- f, c = PROBLEM_CODE_FORMAT[ec]
- if c == 0:
- return f
- return f % ((GENERIC_STRING,) * c)
-
-def writeGenericStringsForErrorCodes(s=sys.stdout):
- s.write(' All Error Codes '.center(PRINT_WIDTH, '='))
- s.write('\n')
- s.write('%4s %s\n' % ('Code', 'Error'))
- s.write('%4s %s\n' % ('----', '-----'))
- ecS = PROBLEM_CODE_FORMAT.keys()
- ecS.sort()
- for ec in ecS:
- s.write('%4d %s\n' % (ec, genericStringForErrorCode(ec)))
- s.write('='*PRINT_WIDTH)
- s.write('\n\n')
-
-def normalisePath(thePath):
- # TODO: How come this does not work?
- #return os.path.abspath(thePath)
- return os.path.abspath(thePath).replace('\\', '/')
-
-FNMATCH_PATTERNS = ['*.xml', '*.dita', '*.ditamap']
-FNMATCH_STRING = ' '.join(FNMATCH_PATTERNS)
-
-# These elements descend from topic/xref so can be treated as referencing elements
-XREF_DESCENDENTS = set(
- (
- # From the api specialisation
- 'apiRelation',
- 'apiBaseClassifier',
- 'apiOtherClassifier',
- 'apiOperationClassifier',
- 'apiValueClassifier',
- # From the C++ specialisation
- 'cxxfile',
- 'cxxclass',
- 'cxxstruct',
- 'cxxunion',
- 'cxxfunction',
- 'cxxdefine',
- 'cxxtypedef',
- 'cxxvariable',
- 'cxxenumeration',
- 'cxxClassBaseClass',
- 'cxxClassBaseStruct',
- 'cxxClassBaseUnion',
- 'cxxClassNestedClass',
- 'cxxClassNestedStruct',
- 'cxxClassNestedUnion',
- 'cxxClassEnumerationInherited',
- 'cxxClassEnumeratorInherited',
- 'cxxClassFunctionInherited',
- 'cxxClassVariableInherited',
- 'cxxDefineReimplemented',
- 'cxxEnumerationReimplemented',
- 'cxxFunctionReimplemented',
- 'cxxStructBaseClass',
- 'cxxStructBaseStruct',
- 'cxxStructBaseUnion',
- 'cxxStructNestedClass',
- 'cxxStructNestedStruct',
- 'cxxStructNestedUnion',
- 'cxxStructEnumerationInherited',
- 'cxxStructEnumeratorInherited',
- 'cxxStructFunctionInherited',
- 'cxxStructVariableInherited',
- 'cxxTypedefReimplemented',
- 'cxxUnionBaseClass',
- 'cxxUnionBaseStruct',
- 'cxxUnionBaseUnion',
- 'cxxUnionNestedClass',
- 'cxxUnionNestedStruct',
- 'cxxUnionNestedUnion',
- 'cxxUnionEnumerationInherited',
- 'cxxUnionFunctionInherited',
- 'cxxUnionVariableInherited',
- 'cxxVariableReimplemented',
- )
-)
-
-class UrlAccessCache(object):
- def __init__(self):
- # {URL : True/False, ...}
- self._cache = {}
-
- def clear(self):
- self._cache = {}
-
- def canAccess(self, theUrl):
- if not self._cache.has_key(theUrl):
- try:
- u = urllib.urlopen(theUrl)#, data, proxies)
- u.read()
- self._cache[theUrl] = True
- logging.debug('URL: %s for %s' % (True, theUrl))
- except IOError:
- self._cache[theUrl] = False
- logging.debug('URL: %s for %s' % (False, theUrl))
- return self._cache[theUrl]
-
-GlobalUrlCache = UrlAccessCache()
-
-class DitaLinkCheckBase(object):
- """Base class that holds some common functionality."""
- def __init__(self, theIdentity):#=None):
- self.__identity = theIdentity
- # Set of error strings, lazily evaluated
- self._errS = None
-
- @property
- def identity(self):
- return self.__identity
-
- def __cmp__(self, other):
- assert(self.identity is not None)
- assert(other.identity is not None)
- return cmp(self.identity, other.identity)
-
- def __eq__(self, other):
- assert(self.identity is not None)
- assert(other.identity is not None)
- return self.identity == other.identity
-
- def __hash__(self):
- assert(self.identity is not None)
- return hash(self.identity)
-
- def __str__(self):
- return str(self.__identity)
-
- def debugDump(self, s=sys.stdout, prefix=''):
- """Dump of IR for debug purposes."""
- raise NotImplementedError
-
- def addError(self, errCode, argTuple):
- assert(errCode in PROBLEM_CODE_FORMAT.keys()), 'No error code: %s' % errCode
- assert(PROBLEM_CODE_FORMAT[errCode][1] == len(argTuple)), \
- 'Length missmatch for error code %d: %d != %d for %s' \
- % (errCode, PROBLEM_CODE_FORMAT[errCode][1], len(argTuple), str(argTuple))
- if self._errS is None:
- self._errS = {}
- try:
- self._errS[errCode].add(argTuple)
- except KeyError:
- self._errS[errCode] = set((argTuple,))
-
- def errStrings(self, generic, theFilter):
- """Return a sorted list of error messages without duplicates."""
- if self._errS is not None:
- mySet = set()
- for ec in self._errS.keys():
- if theFilter is None or ec in theFilter:
- assert(ec in PROBLEM_CODE_FORMAT.keys())
- for tu in self._errS[ec]:
- if generic:
- mySet.add(genericStringForErrorCode(ec))
- else:
- f, c = PROBLEM_CODE_FORMAT[ec]
- assert(len(tu) == c)
- mySet.add(f % tu)
- l = list(mySet)
- l.sort()
- return l
- return []
-
- def updateErrorCount(self, theMap):
- """Updates a map of {error_code, : count, ...}.
- Overridden for file and file set."""
- if self._errS is not None:
- for e in self._errS.keys():
- theMap[e] += len(self._errS[e])
-
- def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout):
- """Can be overridden in child classes to recurse into
- their data structures."""
- theStream.write('\n'.join(self.errStrings(isGeneric, theFilter)))
-
-class DitaId(DitaLinkCheckBase):
- """Represents a node with an id."""
- def __init__(self, theN):
- assert(theN.get('id', None) is not None)
- super(DitaId, self).__init__(theN.get('id', None))
- self._elem = theN.tag
- if '#' in self.id:
- self.addError(100, (self.id,))
- # TODO: NMTOKENS
-
- @property
- def elem(self):
- return self._elem
-
- @property
- def id(self):
- return self.identity
-
- def checkGuid(self):
- """optionally applies additional checks for GUID requirements."""
- if RE_GUID.match(self.id) is None:
- self.addError(102, (self.id,))
-
- def debugDump(self, s=sys.stdout, prefix=''):
- """Dump of IR for debug purposes."""
- s.write('%sID: <%s id="%s" />\n' % (prefix, self.elem, self.id))
-
-class DitaRef(DitaLinkCheckBase):
- """Represents a reference node."""
- def __init__(self, theN):
- self._elem = theN.tag
- self._href = theN.get('href', None)
- super(DitaRef, self).__init__('%s %s' % (self._elem, self._href))
- # This is used when figuring out of the target is the correct element
- # e.g. in Vanilla DITA
- # <topicref href="batcaring.dita" type="task"></topicref>
- self._refType = theN.get('type', None)
- # Format attribute, this can be format="ditamap"
- self._format = theN.get('format', None)
- if self._href is None:
- self.addError(201, (self._elem,))
- self._url = None
- else:
- self._url = urlparse.urlparse(self._href)
- if '#' in self._url.fragment:
- self.addError(200, (self._href,))
-
- @property
- def elem(self):
- return self._elem
-
- @property
- def href(self):
- """The value of the href attribute."""
- return self._href
-
- @property
- def refType(self):
- """The value of the type attribute."""
- return self._refType
-
- @property
- def format(self):
- """The value of the format attribute."""
- return self._format
-
- @property
- def path(self):
- """The value of the path part of the href attribute."""
- return self._url.path
-
- @property
- def fragment(self):
- """The value of the fragment part of the href attribute."""
- return self._url.fragment
-
- @property
- def scheme(self):
- """The URI scheme e.g. 'http' or '' if no scheme."""
- return self._url.scheme
-
- def fileFragment(self, theRefFile):
- """The absolute path of the file and the fragment identifier or (None, None)."""
- if self.scheme not in ('', 'file'):
- return (None, None)
- if len(self.path) == 0:
- myPath = theRefFile
- else:
- myPath = os.path.join(os.path.dirname(theRefFile), self.path)
- return normalisePath(myPath), self.fragment
-
- def checkGuid(self):
- """optionally applies additional checks for GUID requirements."""
- if RE_GUID.match(self.path) is None:
- self.addError(203, (self.path,))
- if RE_GUID.match(self.fragment) is None:
- self.addError(204, (self.fragment,))
-
- def checkUrl(self):
- if self.scheme:
- myU = urlparse.urlunparse(self._url)
- if not GlobalUrlCache.canAccess(myU):
- self.addError(300, (myU,))
-
- def debugDump(self, s=sys.stdout, prefix=''):
- """Dump of IR for debug purposes."""
- s.write('%sREF: <%s href="%s" />\n' % (prefix, self.elem, self._href))
-
-class DitaFileObj(DitaLinkCheckBase):
- """Base class for a DITA topic or map."""
- def __init__(self, theFileObj, theFileName=None):
- """Initialiser with a file object and a file path"""
- #print '\nDitaFileObj(%s, %s)' % (theFileObj, theFileName)
- if theFileName is not None:
- super(DitaFileObj, self).__init__(normalisePath(theFileName))
- elif theFileObj is not None:
- super(DitaFileObj, self).__init__(theFileObj.name)
- else:
- super(DitaFileObj, self).__init__(None)
- self._rootId = None
- self._doctype = None
- # Sets of class DitaId
- self._idS = set()
- self._dupeIdS = set()
- # Set of class DitaRef
- self._xrefS = set()
- # Ouptut control
- self._hasWritten = False
- # Size of input
- try:
- self._bytes = os.path.getsize(theFileName)
- except Exception:
- # Try as if a StringIO
- try:
- self._bytes = theFileObj.len
- except AttributeError:
- # Give up
- self._bytes = 0
- # Process the file object
- if theFileObj is not None:
- try:
- # TODO: use iterparse?
- theTree = etree.parse(theFileObj)
- except SyntaxError, err:
- self.addError(404, (str(err),))
- else:
- # Walk the tree
- for i, e in enumerate(theTree.getiterator()):
- #print 'TRACE: e', e
- # Element [0] is the root element
- if i == 0:
- assert(self._rootId is None)
- assert(self._doctype is None)
- self._doctype = e.tag
- if e.get('id', None) is not None:
- self._rootId = DitaId(e)
- self._addId(self._rootId)
- else:
- self.addError(402, ())
- else:
- # NOTE: Elements with id attributes can also have href
- # attributes. For example a <topicref> in a <bookmap>
- # Thus these tests are not exclusive
- if e.get('id', None) is not None:
- self._addId(DitaId(e))
- if e.get('href', None) is not None:
- # TODO: Do we limit ourselves to only a certain set of elements?
- self._xrefS.add(DitaRef(e))
- else:
- self.addError(400, (self.identity,))
-
- def _addId(self, theId):
- #print 'TRACE: adding %s' % theId
- #print 'TRACE: self._idS %s' % self._idS
- if theId in self._idS:
- # Remove from self._idS
- #print 'TRACE: removing %s' % theId
- self._idS.remove(theId)
- self._dupeIdS.add(theId)
- self.addError(401, (theId.identity,))
- elif theId not in self._dupeIdS:
- self._idS.add(theId)
-
- @property
- def bytes(self):
- return self._bytes
-
- @property
- def doctype(self):
- return self._doctype
-
- @property
- def rootId(self):
- if self._rootId is not None:
- return self._rootId.id
-
- @property
- def isMap(self):
- return self.doctype == "map" \
- or self.doctype == 'bookmap' \
- or (self.doctype is not None and self.doctype.endswith('Map'))
-
- @property
- def idS(self):
- """The set of IDs."""
- return self._idS
-
- @property
- def refS(self):
- """The set of DitaRef objects."""
- return self._xrefS
-
- def idElemMap(self):
- """Returns a map {id : elem name, ...}."""
- retVal = {}
- for anId in self._idS:
- retVal[anId.id] = anId.elem
- return retVal
-
- def hasId(self, theString):
- for anId in self._idS:
- if theString == anId.id:
- return True
- return False
-
- def idElem(self, theString):
- for anId in self._idS:
- if theString == anId.id:
- return anId.elem
- return None
-
- def idObj(self, theString):
- for anId in self._idS:
- if theString == anId.id:
- return anId
- return None
-
- def updateErrorCount(self, theMap):
- """Updates a map of {error_code, : count, ...}."""
- if self._errS is not None:
- for e in self._errS.keys():
- theMap[e] += len(self._errS[e])
- for idObj in self.idS:
- idObj.updateErrorCount(theMap)
- for refObj in self.refS:
- refObj.updateErrorCount(theMap)
-
- def writeErrorList(self, theList, theSubHead='', theS=sys.stdout):
- if len(theList) > 0:
- theList.sort()
- if not self._hasWritten:
- theS.write('File: %s\n' % self.identity)
- self._hasWritten = True
- if len(theSubHead) > 0:
- theS.write('%s [%d]:\n' % (theSubHead, len(theList)))
- theS.write('\n'.join(theList))
- theS.write('\n')
-
- def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout):
- """Writes out errors for me, my IDs and my Refs."""
- self._hasWritten = False
- self.writeErrorList(self.errStrings(isGeneric, theFilter), 'File errors:', theStream)
-#===============================================================================
-# # Duplicate IDs
-# myList = (list(self._dupeIdS))
-# if len(myList):
-# self.writeErrorList(
-# [i.identity for i in myList],
-# 'Duplicate ID',
-# theStream)
-#===============================================================================
- # Now IDs
- myList = (list(self.idS))
- myList.sort()
- for anId in myList:
- self.writeErrorList(anId.errStrings(isGeneric, theFilter), 'ID=%s' % anId.identity, theStream)
- # Now Refs
- myList = (list(self._xrefS))
- myList.sort()
- for anId in myList:
- self.writeErrorList(anId.errStrings(isGeneric, theFilter), 'Ref=%s' % anId.identity, theStream)
- if self._hasWritten:
- theStream.write('\n')
-
- def debugDump(self, s=sys.stdout, prefix=''):
- """Dump of IR for debug purposes."""
- s.write('%sFile: %s\n' % (prefix, self.identity))
- for anId in self._idS:
- anId.debugDump(s, prefix=prefix+' ')
- for aRef in self._xrefS:
- aRef.debugDump(s, prefix=prefix+' ')
-
-class DitaFilePath(DitaFileObj):
- """Base class for a DITA topic or map from the file system."""
- def __init__(self, theFilePath):
- """Initialiser with a file path"""
- try:
- f = open(theFilePath)
- except IOError:
- f = None
- #print 'DitaFilePath(%s)' % theFilePath
- super(DitaFilePath, self).__init__(f, theFilePath)
- if f is None:
- self.addError(400, (theFilePath,))
-
-
-class DitaFileMapBase(object):
- """Base class for holding a map of {file path : class DitaFile, ...}
- Actual implementation can be in-memory or via a database e.g. the
- shelve module."""
- def keys(self):
- """Returns an unsorted list of keys in the map."""
- raise NotImplementedError()
-
- def has_key(self, thePath):
- """Return True if the key exists."""
- raise NotImplementedError()
-
- def remove(self, thePath):
- """Remove the entry corresponding to thePath, may raise KeyError."""
- raise NotImplementedError()
-
- def getDitaFileObj(self, thePath):
- """Return a DitaFileObj that corresponds to thePath, may raise KeyError."""
- raise NotImplementedError()
-
- def setDitaFileObj(self, thePath, theObj):
- """Load a DitaFileObj or update a mutated DitaFileObj."""
- raise NotImplementedError()
-
-class DitaFileMapInMemory(DitaFileMapBase):
- """Holds map of {file path : class DitaFile, ...} in memory."""
- def __init__(self):
- # Map of {file path : class DitaFile, ...}
- self._fileMap = {}
-
- def keys(self):
- """Returns an unsorted list of keys in the map."""
- return self._fileMap.keys()
-
- def has_key(self, thePath):
- """Return True if the key exists."""
- return self._fileMap.has_key(thePath)
-
- def remove(self, thePath):
- """Remove the entry corresponding to thePath, may raise KeyError."""
- del self._fileMap[thePath]
-
- def getDitaFileObj(self, thePath):
- """Return a DitaFileObj that corresponds to thePath, may raise KeyError."""
- return self._fileMap[thePath]
-
- def setDitaFileObj(self, thePath, theObj):
- """Load a DitaFileObj or update a mutated DitaFileObj."""
- self._fileMap[thePath] = theObj
-
-class DitaFileMapShelve(DitaFileMapBase):
- """Holds map of {file path : class DitaFile, ...} in a shelve database."""
- DBASE_FILENAME = 'linkchecker.dbase'
- def __init__(self):
- if os.path.exists(self.DBASE_FILENAME):
- os.remove(self.DBASE_FILENAME)
- self._db = shelve.open(self.DBASE_FILENAME)
- # Use this as a 'cache' as shelf.keys() is slow
- self._keys = set()
-
- def keys(self):
- """Returns an unsorted list of keys in the map."""
- return list(self._keys)
-
- def has_key(self, thePath):
- """Return True if the key exists."""
- return thePath in self._keys
-
- def remove(self, thePath):
- """Remove the entry corresponding to thePath, may raise KeyError."""
- del self._db[thePath]
- self._keys.remove(thePath)
-
- def getDitaFileObj(self, thePath):
- """Return a DitaFileObj that corresponds to thePath, may raise KeyError."""
- return self._db[thePath]
-
- def setDitaFileObj(self, thePath, theObj):
- """Load a DitaFileObj or update a mutated DitaFileObj."""
- self._db[thePath] = theObj
- self._keys.add(thePath)
-
-class DitaFileSet(DitaLinkCheckBase):
- """Holds information about a set of DITA files."""
- STATS_KEYS = ('Maps', 'Non-maps', 'Files', 'Bytes', 'IDs', 'Refs')
- def __init__(self,
- theDir,
- procDir=True,
- thePatterns=None,
- recursive=False,
- testExt=False,
- useDbase=False):
- """Constructor. theDir is the root directory of DITA XML.
- procDir - If True then process this directory immediately, otherwise
- the directory can be processed independently and
- _addFileObj() or _addDitaFileObj() invoked.
- thePatterns - If supplied this should be a space separated string of
- fnmatch extensions.
- recursive - If True and procDir True the directory is processed recursively.
- testExt - If True then test external URLs.
- useDbase - If True then store all DitaFile objects in an external dbase
- (slower but less memory issues).
- """
- if thePatterns is None:
- thePatterns = FNMATCH_STRING.split(' ')
- if theDir is not None:
- theDir = normalisePath(theDir)
- super(DitaFileSet, self).__init__(theDir)
- logging.info('DitaFileSet starting to read...')
- GlobalUrlCache.clear()
- self._testExt = testExt
- # Set up how we store the DitaFile objects
- if useDbase:
- self._fileMap = DitaFileMapShelve()
- else:
- self._fileMap = DitaFileMapInMemory()
- # Map of (str(rootId) : filepath, ...) with no duplicates
- # Keys will be in self._uniqueRootIds
- self._rootIdToFilePathMap = {}
- # Path to the unique DITA map
- self._uniqueMapPath = None
- # Count of {error_code : count, ...}
- self._errCountMap = CountDict()
- # Statistics
- self._statsMap = CountDict()
- ## and initialise
- #for k in self.STATS_KEYS:
- # self._statsMap[k]
- # Finalisation control (weak)
- self._hasFinalised = False
- # Timers
- self._timeRead = time.clock()
- self._timeAnalyse = 0.0
- if procDir:
- if theDir is not None and os.path.isdir(theDir):
- self._readDir(theDir, thePatterns, recursive)
- else:
- self.addError(500, (theDir,))
- # Finalise and run all the tests
- self.finalise()
-
- @property
- def errCountMap(self):
- return self._errCountMap
-
- @property
- def statsMap(self):
- return self._statsMap
-
- def writeStatistics(self, s=sys.stdout):
- """Writes out read statistics."""
- s.write(' Statistics '.center(PRINT_WIDTH, '='))
- s.write('\n')
- if len(self._statsMap) > 0:
- o = self.STATS_KEYS
- #assert(set(o) == set(self._statsMap.keys())), \
- # '%s != %s' % (o, self._statsMap.keys())
- for k in o:
- try:
- m = self._statsMap[k] / (1024.0*1024.0)
- s.write('%20s: %10d [%10.3f M]\n' % (k, self._statsMap[k], m))
- except KeyError:
- s.write('%20s: %10s \n' % (k, 'Not seen'))
- s.write('%20s: %10.3f (s)\n' % ('Read time', self._timeRead))
- s.write('%20s: %10.3f (s)\n' % ('Analysis time', self._timeAnalyse))
- s.write('='*PRINT_WIDTH)
- else:
- s.write('Nothing processed.')
- s.write('\n')
-
- def writeErrorSummary(self, s=sys.stdout):
- s.write(' Error Summary '.center(PRINT_WIDTH, '='))
- s.write('\n')
- if len(self._errCountMap):
- s.write('%4s %10s %s\n' % ('Code', 'Count', 'Error'))
- s.write('%4s %10s %s\n' % ('----', '-----', '-----'))
- errCodeS = self._errCountMap.keys()
- errCodeS.sort()
- for c in errCodeS:
- s.write('%4d %10d %s\n' \
- % (c, self._errCountMap[c], genericStringForErrorCode(c)))
- else:
- s.write('No errors\n')
- s.write('='*PRINT_WIDTH)
- s.write('\n')
-
- def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout):
- """Writes out errors for me and my files."""
- theStream.write('\n'.join(self.errStrings(isGeneric, theFilter)))
- fileS = self._fileMap.keys()
- fileS.sort()
- for aFile in fileS:
- # Immutable call so just use get
- self._fileMap.getDitaFileObj(aFile).writeErrors(isGeneric, theFilter, theStream)
-
- def allErrStrings(self, isGeneric, theFilter):
- """Return a sorted list of error messages without duplicates including
- files."""
- retSet = set(self.errStrings(isGeneric, theFilter))
- fileS = self._fileMap.keys()
- fileS.sort()
- for aFilePath in self._fileMap.keys():
- # Immutable call so just use get
- for anErr in self._fileMap.getDitaFileObj(aFilePath).errStrings(isGeneric, theFilter):
- retSet.add(anErr)
- retList = list(retSet)
- retList.sort()
- return retList
-
- def _readDir(self, theDir, thePatS, recursive):
- assert(os.path.isdir(theDir))
- for aName in os.listdir(theDir):
- aPath = os.path.join(theDir, aName)
- if os.path.isdir(aPath) and recursive:
- self._readDir(aPath, thePatS, recursive)
- elif os.path.isfile(aPath):
- for aPat in thePatS:
- if fnmatch.fnmatch(aName, aPat):
- assert(not self._fileMap.has_key(aPath))
- logging.debug(' Reading %s' % aPath)
- try:
- f = open(aPath)
- except IOError:
- f = None
- self._addFileObj(f, aPath)
- break
-
- def _addFileObj(self, theFileObj, theFilePath):
- myObj = DitaFileObj(theFileObj, theFilePath)
- self._addDitaFileObj(myObj)
-
- def _addDitaFileObj(self, theDitaFileObj):
- if self._fileMap.has_key(theDitaFileObj.identity):
- self.addError(504, (theDitaFileObj.identity,))
- else:
- # Mutable call so use set
- self._fileMap.setDitaFileObj(theDitaFileObj.identity, theDitaFileObj)
- # Update statistics (files, bytes, ids, refs) etc.
- self._statsMap['Files'] += 1
- self._statsMap['Bytes'] += theDitaFileObj.bytes
- self._statsMap['IDs'] += len(theDitaFileObj.idS)
- self._statsMap['Refs'] += len(theDitaFileObj.refS)
- if theDitaFileObj.isMap:
- self._statsMap['Maps'] += 1
- else:
- self._statsMap['Non-maps'] += 1
-
- def finalise(self):
- """Creates the environment for all checks and then runs them."""
- logging.info('DitaFileSet.finalise() start...')
- if not self._hasFinalised:
- self._timeRead = time.clock() - self._timeRead
- self._timeAnalyse = time.clock()
- self._initRootIdToFilePathMap()
- self._checkDupeIdS()
- self._setMapCycles()
- self._checkLonely()
- self._checkRefArcs()
- self._errCountMap = CountDict()
- self.updateErrorCount(self._errCountMap)
- self._hasFinalised = True
- self._timeAnalyse = time.clock() - self._timeAnalyse
- logging.info('DitaFileSet.finalise() done.')
-
- def _initRootIdToFilePathMap(self):
- # Map of (str(rootId) : filepath, ...) with no duplicates
- self._rootIdToFilePathMap = {}
- # Temporary map of (str(rootId) : [filepath, ...], ...)
- myDupeIdFiles = {}
- for fPath in self._fileMap.keys():
- # fObj is not written to so we don't need to use set
- fObj = self._fileMap.getDitaFileObj(fPath)
- #print 'TRACE: _initRootIdToFilePathMap() fPath:', fPath
- rId = fObj.rootId
- if rId is not None:
- if myDupeIdFiles.has_key(rId):
- #print 'TRACE: _initRootIdToFilePathMap() another dupe:', fPath
- myDupeIdFiles[rId].append(fObj.identity)
- elif self._rootIdToFilePathMap.has_key(rId):
- #print 'TRACE: _initRootIdToFilePathMap() first dupe:', fPath
- # Remove from map and add to myDupeIdFiles
- myFile = self._rootIdToFilePathMap.pop(rId)
- try:
- myDupeIdFiles[rId].append(myFile)
- except KeyError:
- myDupeIdFiles[rId] = [myFile,]
- myDupeIdFiles[rId].append(fPath)
- else:
- #print 'TRACE: _initRootIdToFilePathMap() adding:', fPath
- self._rootIdToFilePathMap[rId] = fObj.identity
- # Set duplicate errors
- for k in myDupeIdFiles.keys():
- myDupeIdFiles[k].sort()
- self.addError(501, (k, tuple(myDupeIdFiles[k])))
- #self.addError(501, (k, str([str(a) for a in myDupeIdFiles[k]])))
-
- def _checkDupeIdS(self):
- """Checks if there are any duplicate IDs anywhere."""
- # {ID : [fileS, ...], ...}
- myDupeIdMap = {}
- # Temporary data structure
- # {ID : first file ID is seen in, ...}
- seenIdMap = {}
- for f in self._fileMap.keys():
- # o is not written to so we don't need set...
- o = self._fileMap.getDitaFileObj(f)
- for anId in o.idS:
- if seenIdMap.has_key(anId):
- try:
- myDupeIdMap[anId].append(f)
- except KeyError:
- myDupeIdMap[anId] = [seenIdMap[anId],]
- myDupeIdMap[anId].append(f)
- else:
- seenIdMap[anId] = f
- # Now add to errs as a 505 error message
- # Sort the files in the map
- for k in myDupeIdMap.keys():
- myDupeIdMap[k].sort()
- self.addError(505, (k, tuple(myDupeIdMap[k])))
- #self.addError(505, (k, str([str(a) for a in myDupeIdMap[k]])))
-
- def _retMapAdjList(self):
- """Create an adjacency list {file_path : set(refs), ...} (all strings)"""
- adjList = {}
- for f in self._fileMap.keys():
- fObj = self._fileMap.getDitaFileObj(f)
- if fObj.isMap:# and fObj.rootId is not None:
- assert(fObj.identity not in adjList.keys())
- refSet = set()
- for r in fObj.refS:
- refSet.add(r.fileFragment(fObj.identity)[0])
- adjList[fObj.identity] = refSet
- return adjList
-
- def _setMapCycles(self):
- """Sets any cyclic references seen in DITA maps."""
- adjList = self._retMapAdjList()
- # A branch
- myBr = []
- myCycles = set()
- for aPath, aSet in adjList.items():
- myBr.append(aPath)
- self._recurseCycles(adjList, myBr, myCycles)
- myBr.pop()
- self._setCycleErrors(myCycles)
-
- def _recurseCycles(self, a, b, c):
- assert(len(b) > 0)
- try:
- myPath = b[-1]
- for r in a[myPath]:
- #print '_recurseCycles() testing r', r
- #print '_recurseCycles() testing b', b
- if r in b:
- #print 'Adding cycle', tuple(b[b.index(r):])
- c.add(tuple(b[b.index(r):]))
- else:
- b.append(r)
- self._recurseCycles(a, b, c)
- b.pop()
- except KeyError:
- pass
-
- def _setCycleErrors(self, theC):
- for aT in theC:
- self.addError(701, (str(aT),))
- myL = list(aT)
- assert(len(myL) > 0)
- i = 0
- while i < len(myL):
- myL.append(myL[0])
- # Should this be in the file thus, or in the files set?
- # As we are mutating the file object we need to use both
- # getDitaFileObj() and setDitaFileObj()
- fObj = self._fileMap.getDitaFileObj(myL[0])
- fObj.addError(701, (str(myL),))
- self._fileMap.setDitaFileObj(myL[0], fObj)
- myL.pop()
- myL.append(myL.pop(0))
- i += 1
-
- def _checkLonely(self):
- self._checkLonelyMaps()
- self._checkLonelyTopics()
-
- def _checkLonelyMaps(self):
- """Checks for lonely maps."""
- mapPathSet = set()
- pathSetRemain = set()
- for f in self._fileMap.keys():
- if self._fileMap.getDitaFileObj(f).isMap:
- mapPathSet.add(f)
- pathSetRemain.add(f)
- for aPath in mapPathSet:
- myMapObj = self._fileMap.getDitaFileObj(aPath)
- for r in myMapObj.refS:
- refFile, frag = r.fileFragment(f)
- try:
- pathSetRemain.remove(refFile)
- except KeyError:
- # refFile is a topic or an already seen map
- pass
- if len(pathSetRemain) > 1:
- for aPath in pathSetRemain:
- self.addError(700, (aPath,))
- elif len(pathSetRemain) == 1:
- self._uniqueMapPath = pathSetRemain.pop()
-
- def _checkLonelyTopics(self):
- """Checks for topics that are not referenced by any map."""
- mapPathSet = set()
- pathSetRemain = set()
- for f in self._fileMap.keys():
- #print 'TRACE: f:', f
- if self._fileMap.getDitaFileObj(f).isMap:
- mapPathSet.add(f)
- else:
- pathSetRemain.add(f)
- #print 'TRACE: mapPathSet', mapPathSet
- #print 'TRACE: pathSetRemain', pathSetRemain
- for aMapPath in mapPathSet:
- myMapObj = self._fileMap.getDitaFileObj(aMapPath)
- for r in myMapObj.refS:
- refFile, frag = r.fileFragment(aMapPath)
- #print 'TRACE: removing:', refFile
- try:
- pathSetRemain.remove(refFile)
- except KeyError:
- # topic has already been seen in another map
- pass
- if len(pathSetRemain) > 0:
- for aPath in pathSetRemain:
- self.addError(600, (aPath,))
-
- def _checkRefArcs(self):
- """Checks all references are reachable."""
- for fPath in self._fileMap.keys():
- fObjSrc = self._fileMap.getDitaFileObj(fPath)
- hasMutated = False
- for rObjSrc in fObjSrc.refS:
- if rObjSrc.scheme:
- # Decide whether to test and external URL
- if self._testExt:
- rObjSrc.checkUrl()
- else:
- fi, fr = rObjSrc.fileFragment(fPath)
- assert(fi is not None), 'fi is None for rObjSrc: %s in file: %s' % (rObjSrc, fPath)
- assert(fr is not None), 'fr is None for rObjSrc: %s in file: %s' % (rObjSrc, fPath)
- ## If a url then fileFragment() returns (None, None)
- #if fi is None:
- # print 'fPath', fPath
- # print 'rObjSrc', rObjSrc
- # print 'fi', fi
- # print 'fr', fr
- try:
- fObjTgt = self._fileMap.getDitaFileObj(fi)
- except KeyError:
- # Target file can not be found in the IR
- # check the file system to see if it is a non-DITA resource
- if not os.path.isfile(fi):
- #print 'TRACE: adding 410 to', fObj.identity
- fObjSrc.addError(410, (fi,))
- hasMutated = True
- else:
- if len(fr) > 0:
- # Target file is found, test fragment
- if not fObjTgt.hasId(fr):
- # Fragment not found
- fObjSrc.addError(411, (fi, fr))
- hasMutated = True
- if self._checkRefArcElemName(fObjSrc, rObjSrc, fObjTgt, fr):
- hasMutated = True
- if hasMutated:
- self._fileMap.setDitaFileObj(fPath, fObjSrc)
-
- def _checkRefArcElemName(self, fObjSrc, rObjSrc, fObjTgt, frag):
- """Test source and target element names
- e.g. Source <cxxClassRef> should match target <cxxClass>
- And in vanilla DITA:
- <topicref href="batcaring.dita" type="task"></topicref>
- or:
- <topicref href="batcaring.dita" format="ditamap"></topicref>
- Should match target element <task>."""
- isRootTgt = False
- hasMutated = False
- if len(frag) == 0:
- # iObjTgt is the root element of fObjTgt
- if fObjTgt.rootId is None or fObjTgt.idElem(fObjTgt.rootId) is None:
- # Covered by other error codes
- return
- iObjTgt = fObjTgt.idObj(fObjTgt.rootId)
- isRootTgt = True
- elif fObjTgt.hasId(frag):
- iObjTgt = fObjTgt.idObj(frag)
- else:
- # frag not found that will be a 411 error (handled by caller).
- return
- # Have an rObjSrc + iObjTgt so check elements
- # First case:
- if rObjSrc.elem.endswith('Ref'):
- if rObjSrc.elem[:-3] != iObjTgt.elem:
- if isRootTgt:
- fObjSrc.addError(412, (rObjSrc.elem, iObjTgt.elem))
- else:
- fObjSrc.addError(413, (fObjTgt.idElem(frag), rObjSrc.elem, frag))
- hasMutated = True
- # Second case(s) for vanilla DITA
- elif rObjSrc.elem == 'topicref':
- # Check DITA map links
- if rObjSrc.format == 'ditamap' and iObjTgt.elem != 'map':
- # Target must be a root element (actually we don't care)
- fObjSrc.addError(414, (iObjTgt.elem,))
- hasMutated = True
- elif iObjTgt.elem == 'map' and rObjSrc.format != 'ditamap':
- fObjSrc.addError(415, (rObjSrc.format,))
- hasMutated = True
- elif not (rObjSrc.format == 'ditamap' and iObjTgt.elem == 'map'):
- # Treat refType None as type="topic", see DITA standard for <topicref>
- # Well, also look at the type attribute in chapter 25
- # "When the type attribute is unspecified, it should be
- # determined by inspecting the target if possible. If the
- # target cannot be inspected for some reason, the value
- # should default to "topic".
- # Note: DITA 1.2 takes a different view...
- # Was:
- #if (rObjSrc.refType is None and iObjTgt.elem != 'topic') \
- #or (rObjSrc.refType is not None and rObjSrc.refType != iObjTgt.elem):
- if rObjSrc.refType is not None and rObjSrc.refType != iObjTgt.elem:
- if isRootTgt:
- fObjSrc.addError(416, (rObjSrc.refType, iObjTgt.elem,))
- hasMutated = True
- else:
- fObjSrc.addError(417, (rObjSrc.refType, iObjTgt.elem, frag,))
- hasMutated = True
- # Otherwise topicref looks OK
- elif rObjSrc.elem != 'xref' and rObjSrc.elem not in XREF_DESCENDENTS:
- # Unknown referencing element
- if isRootTgt:
- fObjSrc.addError(418, (rObjSrc.elem, fObjTgt.doctype))
- hasMutated = True
- else:
- fObjSrc.addError(419, (rObjSrc.elem, fObjTgt.idElem(frag), frag))
- hasMutated = True
- return hasMutated
-
- def updateErrorCount(self, theMap):
- """Updates a map of {error_code, : count, ...}."""
- if self._errS is not None:
- for e in self._errS.keys():
- theMap[e] += len(self._errS[e])
- for fPath in self._fileMap.keys():
- fObj = self._fileMap.getDitaFileObj(fPath)
- # Mutable call so need to update
- fObj.updateErrorCount(theMap)
- self._fileMap.setDitaFileObj(fPath, fObj)
-
- def debugDump(self, s=sys.stdout, prefix=''):
- """Dump of IR for debug purposes."""
- s.write(' Debug Dump '.center(PRINT_WIDTH, '+'))
- s.write('\n')
- fileS = self._fileMap.keys()
- fileS.sort()
- for f in fileS:
- self._fileMap.getDitaFileObj(f).debugDump(s, prefix)
- s.write(' END Debug Dump '.center(PRINT_WIDTH, '+'))
- s.write('\n\n')
-
-#####################################
-# Multiprocessing code
-#####################################
-def retDitaFileObj(thePath):
- return DitaFilePath(thePath)
-
-def genDitaPath(theDir, thePatS, recursive):
- assert(os.path.isdir(theDir))
- for aName in os.listdir(theDir):
- aPath = os.path.join(theDir, aName)
- if os.path.isdir(aPath) and recursive:
- for p in genDitaPath(aPath, thePatS, recursive):
- yield p
- elif os.path.isfile(aPath):
- for aPat in thePatS:
- if fnmatch.fnmatch(aName, aPat):
- #logging.info('genDitaPath(): %s' % aPath)
- yield aPath
- break
-
-def retMpDitaFileSetObj(theDir,
- thePatterns,
- recursive,
- numJobs,
- checkExt,
- useDb):
- assert(os.path.isdir(theDir))
- assert(numJobs >= 0)
- retObj = DitaFileSet(theDir, procDir=False, testExt=checkExt, useDbase=useDb)
- myNumJobs = numJobs
- if numJobs == 0:
- myNumJobs = multiprocessing.cpu_count()
- logging.info('Set multiprocessing number of jobs to %d' % myNumJobs)
- myPool = multiprocessing.Pool(processes=myNumJobs)
- for result in [
- myPool.apply_async(retDitaFileObj, (f,))
- for f in genDitaPath(theDir, thePatterns, recursive)
- ]:
- myObj = result.get()
- logging.debug('Got %s' % myObj.identity)
- retObj._addDitaFileObj(myObj)
- # Note: finalise() is a serial process
- logging.info('retMpDitaFileSetObj(): finalising')
- retObj.finalise()
- return retObj
-
-######################################
-# Test code
-######################################
-try:
- import cStringIO as StringIO
-except ImportError:
- import StringIO
-
-class NullClass(unittest.TestCase):
- pass
-
-class TestCountDict(unittest.TestCase):
- def setUp(self):
- pass
-
- def tearDown(self):
- pass
-
- def testSetUpTearDown(self):
- """TestCountDict: test setUp() and tearDown()."""
- pass
-
- def test_basic(self):
- """TestCountDict: test basic functionality."""
- myMap = CountDict()
- self.assertEqual(myMap.has_key('wtf'), False)
- self.assertEqual(myMap['wtf'], 0)
- self.assertEqual(myMap.has_key('wtf'), True)
- myMap['wtf'] += 1
- self.assertEqual(myMap['wtf'], 1)
-
-class TestDitaId(unittest.TestCase):
- def setUp(self):
- pass
-
- def tearDown(self):
- pass
-
- def testSetUpTearDown(self):
- """DitaId: test setUp() and tearDown()."""
- pass
-
- def test_basic(self):
- """DitaId: basic read of an node with an id"""
- myXml = """<cxxClass id="class_big_endian"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaId(myTree.getroot())
- self.assertEqual(myObj.id, 'class_big_endian')
- self.assertEqual(str(myObj), 'class_big_endian')
- self.assertEqual(myObj.errStrings(True, None), [])
- self.assertEqual(myObj.errStrings(False, None), [])
-
- def test_guid_00(self):
- """DitaId: basic read of an node with an GUID id"""
- myXml = """<cxxClass id="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaId(myTree.getroot())
- self.assertEqual(myObj.id, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
- myObj.checkGuid()
- self.assertEqual(myObj.errStrings(True, None), [])
- self.assertEqual(myObj.errStrings(False, None), [])
-
- def test_guid_01(self):
- """DitaId: basic read of an node with an GUID id fails"""
- myXml = """<cxxClass id="25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaId(myTree.getroot())
- self.assertEqual(myObj.id, '25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
- myObj.checkGuid()
- self.assertEqual(
- myObj.errStrings(False, None),
- [
- 'GUID specification does not match id="25825EC4-341F-3EA4-94AA-7DCE380E6D2E"'
- ])
- self.assertEqual(
- myObj.errStrings(True, None),
- [
- 'GUID specification does not match id="%s"' % GENERIC_STRING,
- ])
-
- def test_cmp_eq_00(self):
- """DitaId: cmp(), == of two identical nodes"""
- myXml = """<cxxClass id="class_big_endian"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj_00 = DitaId(myTree.getroot())
- myObj_01 = DitaId(myTree.getroot())
- self.assertEqual(cmp(myObj_00, myObj_01), 0)
- self.assertEqual((myObj_00 == myObj_01), True)
-
- def test_cmp_eq_01(self):
- """DitaId: cmp(), == of two identical nodes from different elements."""
- myXml_00 = """<cxxClass id="big_endian"/>"""
- myTree_00 = etree.parse(StringIO.StringIO(myXml_00))
- myObj_00 = DitaId(myTree_00.getroot())
- myXml_01 = """<cxxStruct id="big_endian"/>"""
- myTree_01 = etree.parse(StringIO.StringIO(myXml_01))
- myObj_01 = DitaId(myTree_01.getroot())
- self.assertEqual(cmp(myObj_00, myObj_01), 0)
- self.assertEqual((myObj_00 == myObj_01), True)
-
- def test_set(self):
- """DitaId: read of an node with an id several times into a set and check unique,"""
- myXml = """<cxxClass id="class_big_endian"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- s = set()
- i = 0
- while i < 8:
- s.add(DitaId(myTree.getroot()))
- i += 1
- self.assertEqual(len(s), 1)
- self.assertEqual(DitaId(myTree.getroot()) in s, True)
-
- def test_map(self):
- """DitaId: read of an node with an id several times into a map and check unique,"""
- myXml = """<cxxClass id="class_big_endian"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- m = {}
- i = 0
- while i < 8:
- m[DitaId(myTree.getroot())] = 1
- i += 1
- self.assertEqual(len(m), 1)
- self.assertEqual(m.has_key(DitaId(myTree.getroot())), True)
-
- def test_error_hash(self):
- """DitaId: error with a '#' in an id"""
- myXml = """<cxxClass id="class_#big_endian"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaId(myTree.getroot())
- self.assertEqual(myObj.id, 'class_#big_endian')
- self.assertEqual(str(myObj), 'class_#big_endian')
- self.assertEqual(
- myObj.errStrings(True, None),
- [
- genericStringForErrorCode(100),
- ]
- )
- self.assertEqual(
- myObj.errStrings(False, None),
- [
- 'Character \'#\' not allowed in id="class_#big_endian"',
- ]
- )
-
-
-
-class TestDitaRef(unittest.TestCase):
- def setUp(self):
- pass
-
- def tearDown(self):
- pass
-
- def testSetUpTearDown(self):
- """DitaRef: test setUp() and tearDown()."""
- pass
-
- def test_basic(self):
- """DitaRef: basic read of an xref node, no fragment"""
- myXml = """<xref href="class_big_endian"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaRef(myTree.getroot())
- self.assertEqual(myObj.href, 'class_big_endian')
- self.assertEqual(myObj.path, 'class_big_endian')
- self.assertEqual(myObj.elem, 'xref')
- self.assertEqual(str(myObj), 'xref class_big_endian')
- self.assertEqual(myObj.fragment, '')
- self.assertEqual(myObj.scheme, '')
- self.assertEqual(myObj.errStrings(False, None), [])
- self.assertEqual(myObj.errStrings(True, None), [])
-
- def test_basic_frag(self):
- """DitaRef: basic read of an xref node, with fragment"""
- myXml = """<xref href="class_big_endian.xml#function"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaRef(myTree.getroot())
- self.assertEqual(myObj.href, 'class_big_endian.xml#function')
- self.assertEqual(myObj.path, 'class_big_endian.xml')
- self.assertEqual(myObj.fragment, 'function')
- self.assertEqual(myObj.scheme, '')
- self.assertEqual(myObj.errStrings(False, None), [])
- self.assertEqual(myObj.errStrings(True, None), [])
-
- def test_file_frag_00(self):
- """DitaRef: accessing an xref node, with a file and a fragment"""
- myXml = """<xref href="class_big_endian.xml#function"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaRef(myTree.getroot())
- self.assertEqual(myObj.href, 'class_big_endian.xml#function')
- self.assertEqual(myObj.path, 'class_big_endian.xml')
- self.assertEqual(myObj.fragment, 'function')
- self.assertEqual(myObj.scheme, '')
- srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
- expPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'class_big_endian.xml'))
- self.assertEqual(
- myObj.fileFragment(srcPath),
- (expPath, 'function')
- )
- self.assertEqual(myObj.errStrings(False, None), [])
- self.assertEqual(myObj.errStrings(True, None), [])
-
- def test_file_frag_01(self):
- """DitaRef: accessing an xref node, with a file and a fragment and relative path with '\\'."""
- myXml = """<xref href="..\\chips\\class_big_endian.xml#function"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaRef(myTree.getroot())
- srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
- expPath = normalisePath(os.path.join('C:%s' % os.sep, 'chips', 'class_big_endian.xml'))
- self.assertEqual(
- myObj.fileFragment(srcPath),
- (expPath, 'function')
- )
- self.assertEqual(myObj.errStrings(False, None), [])
- self.assertEqual(myObj.errStrings(True, None), [])
-
- def test_file_frag_02(self):
- """DitaRef: accessing an xref node, with a file and a fragment and relative path with '/'."""
- myXml = """<xref href="../chips/class_big_endian.xml#function"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaRef(myTree.getroot())
- srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
- expPath = normalisePath(os.path.join('C:%s' % os.sep, 'chips', 'class_big_endian.xml'))
- self.assertEqual(
- myObj.fileFragment(srcPath),
- (expPath, 'function')
- )
- self.assertEqual(myObj.errStrings(False, None), [])
- self.assertEqual(myObj.errStrings(True, None), [])
-
- def test_file_frag_03(self):
- """DitaRef: accessing an xref node, with a no file but with a fragment"""
- myXml = """<xref href="#function"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaRef(myTree.getroot())
- self.assertEqual(myObj.href, '#function')
- self.assertEqual(myObj.path, '')
- self.assertEqual(myObj.fragment, 'function')
- self.assertEqual(myObj.scheme, '')
- srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
- expPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
- self.assertEqual(
- myObj.fileFragment(srcPath),
- (expPath, 'function')
- )
- self.assertEqual(myObj.errStrings(False, None), [])
- self.assertEqual(myObj.errStrings(True, None), [])
-
- def test_basic_scheme(self):
- """DitaRef: an xref node with a URI scheme"""
- myXml = """<xref href="http://www.cwi.nl:80/%7Eguido/Python.html#fragment"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaRef(myTree.getroot())
- self.assertEqual(myObj.href, 'http://www.cwi.nl:80/%7Eguido/Python.html#fragment')
- self.assertEqual(myObj.path, '/%7Eguido/Python.html')
- self.assertEqual(myObj.fragment, 'fragment')
- self.assertEqual(myObj.scheme, 'http')
- self.assertEqual(myObj.errStrings(False, None), [])
- self.assertEqual(myObj.errStrings(True, None), [])
-
- def test_basic_scheme_file_frag(self):
- """DitaRef: an xref node with a URI scheme, invoking fileFragment()"""
- myXml = """<xref href="http://www.cwi.nl:80/%7Eguido/Python.html#fragment"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaRef(myTree.getroot())
- self.assertEqual(myObj.href, 'http://www.cwi.nl:80/%7Eguido/Python.html#fragment')
- self.assertEqual(myObj.path, '/%7Eguido/Python.html')
- self.assertEqual(myObj.fragment, 'fragment')
- self.assertEqual(myObj.scheme, 'http')
- srcPath = os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml')
- self.assertEqual(
- myObj.fileFragment(srcPath),
- (None, None)
- )
- self.assertEqual(myObj.errStrings(False, None), [])
- self.assertEqual(myObj.errStrings(True, None), [])
-
- def test_fail_no_href(self):
- """DitaRef: Fails on an xref node with no href attribute"""
- myXml = """<xref />"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaRef(myTree.getroot())
- self.assertEqual(
- myObj.errStrings(False, None),
- [
- 'Reference element "xref" is missing href=... attribute',
- ]
- )
- self.assertEqual(
- myObj.errStrings(True, None),
- [
- 'Reference element "%s" is missing href=... attribute' % GENERIC_STRING,
- ]
- )
-
- def test_fail_bad_frag(self):
- """DitaRef: Fails on an xref node with href attribute that has multiple '#' characters"""
- myXml = """<xref href="a#b#c" />"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaRef(myTree.getroot())
- self.assertEqual(
- myObj.errStrings(False, None),
- [
- 'Multiple \'#\' not allowed in reference "a#b#c"',
- ]
- )
- self.assertEqual(
- myObj.errStrings(True, None),
- [
- 'Multiple \'#\' not allowed in reference "%s"' % GENERIC_STRING,
- ]
- )
-
- def test_guid_00(self):
- """DitaRef: basic read of an node with an GUID file/fragment reference"""
- myXml = """<xref href="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaRef(myTree.getroot())
- self.assertEqual(myObj.href, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
- self.assertEqual(myObj.path, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml')
- self.assertEqual(myObj.elem, 'xref')
- self.assertEqual(str(myObj), 'xref GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
- self.assertEqual(myObj.fragment, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
- self.assertEqual(myObj.scheme, '')
- self.assertEqual(myObj.errStrings(False, None), [])
- self.assertEqual(myObj.errStrings(True, None), [])
-
- def test_guid_01(self):
- """DitaRef: basic read of an node with an GUID file part fails"""
- myXml = """<xref href="GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaRef(myTree.getroot())
- self.assertEqual(myObj.href, 'GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
- self.assertEqual(myObj.path, 'GUID-.xml')
- self.assertEqual(myObj.elem, 'xref')
- self.assertEqual(str(myObj), 'xref GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
- self.assertEqual(myObj.fragment, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
- self.assertEqual(myObj.errStrings(False, None), [])
- self.assertEqual(myObj.errStrings(True, None), [])
- myObj.checkGuid()
- self.assertEqual(
- myObj.errStrings(False, None),
- [
- 'GUID specification does not match file reference "GUID-.xml"'
- ])
- self.assertEqual(
- myObj.errStrings(True, None),
- [
- genericStringForErrorCode(203),
- ]
- )
-
- def test_guid_02(self):
- """DitaRef: basic read of an node with an GUID fragment part fails"""
- myXml = """<xref href="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4"/>"""
- myTree = etree.parse(StringIO.StringIO(myXml))
- myObj = DitaRef(myTree.getroot())
- self.assertEqual(myObj.href, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4')
- self.assertEqual(myObj.path, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml')
- self.assertEqual(myObj.elem, 'xref')
- self.assertEqual(str(myObj), 'xref GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4')
- self.assertEqual(myObj.fragment, 'GUID-25825EC4')
- self.assertEqual(myObj.errStrings(False, None), [])
- self.assertEqual(myObj.errStrings(True, None), [])
- myObj.checkGuid()
- self.assertEqual(
- myObj.errStrings(False, None),
- [
- 'GUID specification does not match fragment reference "GUID-25825EC4"'
- ])
- self.assertEqual(
- myObj.errStrings(True, None),
- [
- genericStringForErrorCode(204),
- ]
- )
-
-class TestDitaFile(unittest.TestCase):
- def setUp(self):
- pass
-
- def tearDown(self):
- pass
-
- def testSetUpTearDown(self):
- """DitaFile: test setUp() and tearDown()."""
- pass
-
- def test_Basic(self):
- """DitaFile: basic read of an XML file"""
- myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
-<!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
-<cxxClass id="class_big_endian">
- <apiName>BigEndian</apiName>
- <shortdesc/>
- <cxxClassDetail>
- <cxxClassDefinition>
- <cxxClassAccessSpecifier value="public"/>
- <cxxClassAPIItemLocation>
- <cxxClassDeclarationFile name="filePath" value="K:/sf/os/commsfw/datacommsserver/esockserver/inc/es_sock.h"/>
- <cxxClassDeclarationFileLine name="lineNumber" value="1520"/>
- <cxxClassDefinitionFile name="filePath" value="K:/sf/os/commsfw/datacommsserver/esockserver/inc/es_sock.h"/>
- <cxxClassDefinitionFileLineStart name="lineNumber" value="1516"/>
- <cxxClassDefinitionFileLineEnd name="lineNumber" value="1526"/>
- </cxxClassAPIItemLocation>
- </cxxClassDefinition>
- <apiDesc>
- <p>Inserts and extracts integers in big-endian format. </p>
- </apiDesc>
- </cxxClassDetail>
- <cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f">
- </cxxFunction>
- <cxxFunction id="class_big_endian_1aedf702f5c0118e4294d1a6d9684f8441">
- </cxxFunction>
- <cxxFunction id="class_big_endian_1ae266722f7bb965c971155a3315bad484">
- </cxxFunction>
- <cxxFunction id="class_big_endian_1a497d5248ea259f8490fb40ac4f2aafb2">
- </cxxFunction>
-</cxxClass>"""
- myFile = StringIO.StringIO(myXml)
- myObj = DitaFileObj(myFile, 'foo')
- self.assertEqual(myObj.identity, normalisePath('foo'))
- self.assertEqual(myObj.doctype, 'cxxClass')
- self.assertEqual(myObj.rootId, 'class_big_endian')
- #print myObj.idMap()
- self.assertEqual(
- myObj.idElemMap(),
- {
- 'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f' : 'cxxFunction',
- 'class_big_endian_1aedf702f5c0118e4294d1a6d9684f8441' : 'cxxFunction',
- 'class_big_endian' : 'cxxClass',
- 'class_big_endian_1a497d5248ea259f8490fb40ac4f2aafb2' : 'cxxFunction',
- 'class_big_endian_1ae266722f7bb965c971155a3315bad484' : 'cxxFunction',
- }
- )
- self.assertEqual(myObj.errStrings(False, None), [])
- self.assertEqual(myObj.errStrings(True, None), [])
-
- def test_missing_file(self):
- """DitaFile: read an missing XML file"""
- myObj = DitaFileObj(None, 'foo')
- self.assertEqual(
- myObj.errStrings(False, None),
- [
- 'Failed to open: "%s"' % normalisePath('foo'),
- ]
- )
- self.assertEqual(
- myObj.errStrings(True, None),
- [
- genericStringForErrorCode(400),
- ]
- )
-
- def test_IllFormedFile(self):
- """DitaFile: read an ill-formed XML file"""
- myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
-<!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
-<cxxClass id="class_big_endian">
-"""
- myFile = StringIO.StringIO(myXml)
- myObj = DitaFileObj(myFile, 'foo')
- self.assertEqual(myObj.identity, normalisePath('foo'))
- self.assertEqual(myObj.doctype, None)
- self.assertEqual(myObj.rootId, None)
- #print myObj.idMap()
- self.assertEqual(myObj.idElemMap(), {})
- self.assertEqual(
- myObj.errStrings(False, None),
- [
- 'Can not parse: "no element found: line 4, column 0"',
- ]
- )
- self.assertEqual(
- myObj.errStrings(True, None),
- [
- genericStringForErrorCode(404),
- ]
- )
-
- def test_missing_root_id(self):
- """DitaFile: read of an XML file with no id on root element"""
- myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
-<!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
-<cxxClass>
- <xref href="OtherClass">OtherClass</xref>
- <cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f"/>
-</cxxClass>"""
- myFile = StringIO.StringIO(myXml)
- myObj = DitaFileObj(myFile, 'foo')
- self.assertEqual(myObj.identity, normalisePath('foo'))
- self.assertEqual(myObj.doctype, 'cxxClass')
- self.assertEqual(myObj.rootId, None)
- self.assertEqual(
- myObj.idElemMap(),
- {
- 'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f' : 'cxxFunction',
- }
- )
- self.assertEqual(myObj.errStrings(False, None), [genericStringForErrorCode(402)])
- self.assertEqual(myObj.errStrings(True, None), [genericStringForErrorCode(402)])
-
- def test_duplicate_id(self):
- """DitaFile: duplicate IDs"""
- myXml = """<root id="AnID">
-<elem id="AnID"/>
-</root>"""
- myFile = StringIO.StringIO(myXml)
- myObj = DitaFileObj(myFile, 'spam.xml')
- self.assertEqual(myObj.identity, normalisePath('spam.xml'))
- self.assertEqual(myObj.doctype, 'root')
- self.assertEqual(myObj.rootId, 'AnID')
- self.assertEqual(myObj.idElemMap(), {})
- self.assertEqual(
- myObj.errStrings(False, None),
- [
- 'Multiple id="AnID"',
- ]
- )
- self.assertEqual(myObj.errStrings(True, None), [genericStringForErrorCode(401)])
-
- def test_ismap_00(self):
- """DitaFile: Is a map for <map>."""
- myXml = """<map id="myMap"/>"""
- myFile = StringIO.StringIO(myXml)
- myObj = DitaFileObj(myFile, 'spam.xml')
- self.assertEqual(myObj.isMap, True)
-
- def test_ismap_01(self):
- """DitaFile: Is a map for <cxxAPIMap>."""
- myXml = """<cxxAPIMap id="myMap"/>"""
- myFile = StringIO.StringIO(myXml)
- myObj = DitaFileObj(myFile, 'spam.xml')
- self.assertEqual(myObj.isMap, True)
-
- def test_Basic_01(self):
- """DitaFile: read of an simple XML file with id and xref"""
- myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
-<!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
-<cxxClass id="class_big_endian">
- <xref href="OtherClass">OtherClass</xref>
- <cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f"/>
-</cxxClass>"""
- myFile = StringIO.StringIO(myXml)
- myObj = DitaFileObj(myFile, 'foo')
- self.assertEqual(myObj.identity, normalisePath('foo'))
- self.assertEqual(myObj.doctype, 'cxxClass')
- self.assertEqual(myObj.rootId, 'class_big_endian')
- self.assertEqual(myObj.isMap, False)
- self.assertEqual(len(myObj.idS), 2)
- self.assertEqual(len(myObj.refS), 1)
- self.assertEqual(myObj.hasId('class_big_endian'), True)
- self.assertEqual(myObj.hasId('class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'), True)
- self.assertEqual(myObj.hasId('noID'), False)
- self.assertEqual(myObj.idElem('class_big_endian'), 'cxxClass')
- self.assertEqual(myObj.idElem('noID'), None)
- self.assertEqual(
- myObj.idElem('class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'),
- 'cxxFunction'
- )
- #print myObj.idMap()
- self.assertEqual(
- myObj.idElemMap(),
- {
- 'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f' : 'cxxFunction',
- 'class_big_endian' : 'cxxClass',
- }
- )
- self.assertEqual(myObj.errStrings(False, None), [])
- self.assertEqual(myObj.errStrings(True, None), [])
-
-class TestDitaFileSet(unittest.TestCase):
- def setUp(self):
- pass
-
- def tearDown(self):
- pass
-
- def testSetUpTearDown(self):
- """DitaFileSet: test setUp() and tearDown()."""
- pass
-
- def test_None(self):
- """DitaFileSet: read of None."""
- myO = DitaFileSet(None)
- myO.finalise()
- self.assertEqual(myO.errStrings(False, None), ['Not a directory: None'])
- self.assertEqual(myO.errStrings(True, None), ['Not a directory: %s' % GENERIC_STRING, ])
- self.assertEqual(myO.errCountMap, {500 : 1})
-
- def test_basic(self):
- """DitaFileSet: Test reading a map and a couple of files."""
- myO = DitaFileSet(None, procDir=False)
- myO._addFileObj(
- StringIO.StringIO(
-"""<map id="map_00">
- <topicref href="spam.dita" />
- <topicref href="eggs.dita" />
-</map>"""
- ),
- 'map.ditamap'
- )
- myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita')
- myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'eggs.dita')
- myO.finalise()
- #print 'HI'
- #myO.writeErrors(False)
- self.assertEqual(myO.allErrStrings(False, None), [])
- self.assertEqual(myO.allErrStrings(True, None), [])
- self.assertEqual(myO.errCountMap, {})
-
- def test_duplicate_paths(self):
- """DitaFileSet: Test reading a couple of files in duplicate paths."""
- myO = DitaFileSet(None, procDir=False)
- myO._addFileObj(
- StringIO.StringIO(
-"""<map id="map_00">
- <topicref href="spam.dita" />
-</map>"""
- ),
- 'map.ditamap'
- )
- myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita')
- myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'spam.dita')
- myO.finalise()
- self.assertEqual(
- myO.errStrings(False, None),
- [
- 'Duplicate file path: "%s"' % normalisePath('spam.dita'),
- ]
- )
- self.assertEqual(myO.errStrings(True, None), [genericStringForErrorCode(504),])
- self.assertEqual(myO.errCountMap, {504 : 1})
-
- def test_duplicate_ids(self):
- """DitaFileSet: Test reading a map and a couple of files with duplicate IDs."""
- myO = DitaFileSet(None, procDir=False)
- myO._addFileObj(
- StringIO.StringIO(
-"""<map id="map_00">
- <topicref href="spam.dita" />
- <topicref href="eggs.dita" />
- <topicref href="chips.dita" />
-</map>"""
- ),
- 'map.ditamap'
- )
- myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'spam.dita')
- myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'eggs.dita')
- myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'chips.dita')
- myO.finalise()
- #print 'HI'
- #myO.writeErrors(False)
- #pprint.pprint(myO.errStrings(False, None))
- self.assertEqual(
- myO.errStrings(True, None),
- [
- genericStringForErrorCode(505),
- genericStringForErrorCode(501),
- ]
- )
- expErrs = [
- """Duplicate id="chips" in files: ('%s', '%s', '%s')""" \
- % (normalisePath('chips.dita'), normalisePath('eggs.dita'), normalisePath('spam.dita')),
- """Duplicate root id="chips" in files: ('%s', '%s', '%s')""" \
- % (normalisePath('chips.dita'), normalisePath('eggs.dita'), normalisePath('spam.dita')),
- ]
- myErrs = myO.errStrings(False, None)
-#===============================================================================
-# for i in range(2):
-# if myErrs[i] != expErrs[i]:
-# print myErrs[i]
-# print expErrs[i]
-# print
-#===============================================================================
- self.assertEqual(myErrs, expErrs)
- self.assertEqual(myO.errCountMap, {505: 1, 501: 1})
-
- def test_lonely_topics(self):
- """DitaFileSet: Test a couple of lonely topics."""
- myO = DitaFileSet(None, procDir=False)
- myO._addFileObj(StringIO.StringIO('<spam id="spam"/>'), 'spam')
- myO._addFileObj(StringIO.StringIO('<eggs id="eggs"/>'), 'eggs')
- myO.finalise()
- self.assertEqual(
- myO.errStrings(False, None),
- [
- 'Topic id="%s" is not referenced by any map' % normalisePath('eggs'),
- 'Topic id="%s" is not referenced by any map' % normalisePath('spam'),
- ]
- )
- self.assertEqual(
- myO.errStrings(True, None),
- [
- genericStringForErrorCode(600),
- ]
- )
-
- def test_map_cycles_00(self):
- """DitaFileSet: Cyclic references between two maps."""
- myO = DitaFileSet(None, procDir=False)
- myO._addFileObj(
- StringIO.StringIO(
-"""<map id="map_00">
- <topicref href="map_01.ditamap" format="ditamap" />
-</map>"""
- ),
- 'map_00.ditamap'
- )
- myO._addFileObj(
- StringIO.StringIO(
-"""<map id="map_01">
- <topicref href="map_00.ditamap" format="ditamap" />
-</map>"""
- ),
- 'map_01.ditamap'
- )
- myO.finalise()
- #print 'HI test_map_cycles_00()'
- #pprint.pprint(myO._retMapAdjList())
- self.assertEqual(
- myO.errStrings(False, None),
- [
- 'Maps "%s" are in a a cycle.' % str(
- (
- normalisePath('map_00.ditamap'),
- normalisePath('map_01.ditamap'),
- )
- ),
- 'Maps "%s" are in a a cycle.' % str(
- (
- normalisePath('map_01.ditamap'),
- normalisePath('map_00.ditamap'),
- )
- ),
- ]
- )
- #print
- #pprint.pprint(myO.allErrStrings(False, None))
- self.assertEqual(myO.allErrStrings(True, None), [genericStringForErrorCode(701)])
- self.assertEqual(myO.errCountMap, {701 : 4})
-
- def test_map_cycles_01(self):
- """DitaFileSet: Cyclic references between three maps."""
- myO = DitaFileSet(None, procDir=False)
- myO._addFileObj(
- StringIO.StringIO(
-"""<map id="map_00">
- <topicref href="map_01.ditamap" format="ditamap" />
-</map>"""
- ),
- 'map_00.ditamap'
- )
- myO._addFileObj(
- StringIO.StringIO(
-"""<map id="map_01">
- <topicref href="map_02.ditamap" format="ditamap" />
-</map>"""
- ),
- 'map_01.ditamap'
- )
- myO._addFileObj(
- StringIO.StringIO(
-"""<map id="map_02">
- <topicref href="map_00.ditamap" format="ditamap" />
-</map>"""
- ),
- 'map_02.ditamap'
- )
- myO.finalise()
- #print 'HI test_map_cycles_00()'
- #pprint.pprint(myO._retMapAdjList())
- self.assertEqual(
- myO.errStrings(False, None),
- [
- 'Maps "%s" are in a a cycle.' % str(
- (
- normalisePath('map_00.ditamap'),
- normalisePath('map_01.ditamap'),
- normalisePath('map_02.ditamap'),
- )
- ),
- 'Maps "%s" are in a a cycle.' % str(
- (
- normalisePath('map_01.ditamap'),
- normalisePath('map_02.ditamap'),
- normalisePath('map_00.ditamap'),
- )
- ),
- 'Maps "%s" are in a a cycle.' % str(
- (
- normalisePath('map_02.ditamap'),
- normalisePath('map_00.ditamap'),
- normalisePath('map_01.ditamap'),
- )
- ),
- ]
- )
- self.assertEqual(myO.errStrings(True, None), [genericStringForErrorCode(701)])
- self.assertEqual(myO.errCountMap, {701 : 6})
-
- def test_refarc_00(self):
- """DitaFileSet: Test ref arcing - all resolve."""
- myO = DitaFileSet(None, procDir=False)
- myO._addFileObj(
- StringIO.StringIO(
-"""<map id="map_00">
- <topicref href="spam.dita#spam" />
- <topicref href="eggs.dita#eggs" />
-</map>"""
- ),
- 'map.ditamap'
- )
- myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita')
- myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'eggs.dita')
- myO.finalise()
- self.assertEqual(myO.errCountMap, {})
- self.assertEqual(myO.allErrStrings(False, None), [])
- self.assertEqual(myO.allErrStrings(True, None), [])
- self.assertEqual(myO.errStrings(False, None), [])
- self.assertEqual(myO.errStrings(True, None), [])
-
- def test_refarc_fail_00(self):
- """DitaFileSet: Test ref arcing - can't find file."""
- myO = DitaFileSet(None, procDir=False)
- myO._addFileObj(
- StringIO.StringIO(
-"""<map id="map_00">
- <topicref href="spam_.dita" />
- <topicref href="eggs_for_tea.dita" />
-</map>"""
- ),
- 'map.ditamap'
- )
- myO.finalise()
- self.assertEqual(myO.errCountMap, {410: 2})
- #print 'HI'
- #pprint.pprint(myO.allErrStrings(False, None))
- self.assertEqual(
- myO.allErrStrings(False, None),
- [
- 'Can not resolve reference to file "%s"' % normalisePath('eggs_for_tea.dita'),
- 'Can not resolve reference to file "%s"' % normalisePath('spam_.dita'),
- ]
- )
- self.assertEqual(
- myO.allErrStrings(True, None),
- [
- 'Can not resolve reference to file "..."',
- ]
- )
- self.assertEqual(myO.errStrings(False, None), [])
- self.assertEqual(myO.errStrings(True, None), [])
-
- def test_refarc_fail_01(self):
- """DitaFileSet: Test ref arcing - can't find fragment."""
- myO = DitaFileSet(None, procDir=False)
- myO._addFileObj(
- StringIO.StringIO(
-"""<map id="map_00">
- <topicref href="spam.dita#spam_" />
- <topicref href="eggs.dita#eggs_" />
-</map>"""
- ),
- 'map.ditamap'
- )
- myO._addFileObj(StringIO.StringIO('<spam id="spam"/>'), 'spam.dita')
- myO._addFileObj(StringIO.StringIO('<eggs id="eggs"/>'), 'eggs.dita')
- myO.finalise()
- self.assertEqual(myO.errCountMap, {411: 2})
- #print 'HI'
- #pprint.pprint(myO.allErrStrings(False, None))
- self.assertEqual(
- myO.allErrStrings(False, None),
- [
- 'Can resolve reference to file "%s" but not to fragment "eggs_"' % normalisePath('eggs.dita'),
- 'Can resolve reference to file "%s" but not to fragment "spam_"' % normalisePath('spam.dita'),
- ]
- )
- self.assertEqual(
- myO.allErrStrings(True, None),
- [
- 'Can resolve reference to file "%s" but not to fragment "%s"' % (GENERIC_STRING, GENERIC_STRING),
- ]
- )
- self.assertEqual(myO.errStrings(False, None), [])
- self.assertEqual(myO.errStrings(True, None), [])
-
- def test_refarc_url_00(self):
- """DitaFileSet: Test ref arcing - URL."""
- myO = DitaFileSet(None, procDir=False, testExt=True)
- myO._addFileObj(
- StringIO.StringIO(
-"""<map id="map_00">
- <topicref href="spam.dita#spam" />
- <topicref href="eggs.dita#eggs" />
-</map>"""
- ),
- 'map.ditamap'
- )
- myO._addFileObj(StringIO.StringIO("""<topic id="spam">
- <xref href="http://www.nokia.com">Nokia</xref>
-</topic>"""), 'spam.dita')
- myO._addFileObj(StringIO.StringIO("""<topic id="eggs">
- <xref href="http://www.google.com">Google</xref>
-</topic>"""), 'eggs.dita')
- myO.finalise()
- #print 'HI'
- #pprint.pprint(myO.allErrStrings(False, None))
- self.assertEqual(myO.errCountMap, {})
- self.assertEqual(
- myO.allErrStrings(False, None),
- [
- ]
- )
- self.assertEqual(
- myO.allErrStrings(True, None),
- [
- ]
- )
- self.assertEqual(myO.errStrings(False, None), [])
- self.assertEqual(myO.errStrings(True, None), [])
-
-class TestDitaBookmapFileSet(unittest.TestCase):
- def setUp(self):
- pass
-
- def tearDown(self):
- pass
-
- def testSetUpTearDown(self):
- """TestDitaBookmapFileSet: test setUp() and tearDown()."""
- pass
-
- def test_basic(self):
- """TestDitaBookmapFileSet: Test reading a bookmap and a topic."""
- myO = DitaFileSet(None, procDir=False)
- myO._addFileObj(
- StringIO.StringIO(
-"""<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE bookmap PUBLIC "-//OASIS//DTD DITA BookMap//EN"
-"bookmap.dtd">
-<bookmap id="GUID-5BDFDB6B-7801-4804-9F41-2BDC5BE53DDF">
- <booktitle>
- <mainbooktitle>My Bookmap</mainbooktitle>
- <booktitlealt>Alternate title</booktitlealt>
- </booktitle>
- <frontmatter id="GUID-DA857913-F826-4CF7-A135-93F2AEB48353">
- <topicref href="GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB.dita" id="GUID-994B1764-393F-401F-8571-CE0955AB6CA6" />
- </frontmatter>
-</bookmap>
-"""
- ),
- 'bookmap.ditamap'
- )
- myO._addFileObj(StringIO.StringIO("""<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
-<concept id="GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB" xml:lang="en">
- <title>How to read and write a file</title>
-</concept>
-"""), 'GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB.dita')
- myO.finalise()
- #print
- #myO.debugDump()
- #print 'HI'
- #myO.writeErrors(False)
- self.assertEqual(myO.allErrStrings(False, None), [])
- self.assertEqual(myO.allErrStrings(True, None), [])
- self.assertEqual(myO.errCountMap, {})
-
-class Special(unittest.TestCase):
- pass
-
-def unitTest(theVerbosity=2):
- suite = unittest.TestLoader().loadTestsFromTestCase(NullClass)
- suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCountDict))
- suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaId))
- suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaRef))
- suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaFile))
- suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaFileSet))
- suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaBookmapFileSet))
- suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Special))
- myResult = unittest.TextTestRunner(verbosity=theVerbosity).run(suite)
- return (myResult.testsRun, len(myResult.errors), len(myResult.failures))
-
-######################################
-# main() stuff
-######################################
-def main():
- print 'CMD: %s' % ' '.join(sys.argv)
- usage = "usage: %prog [options] <Directory of XML content>"
- parser = OptionParser(usage, version='%prog ' + __version__)
- parser.add_option("-d", action="store_true", dest="dump", default=False,
- help="Dump internal representation. [default: %default]")
- parser.add_option(
- "-e", "--errors",
- type="str",
- dest="error_codes",
- default='All',
- help="Only report on certain error codes (space seperated list). [default: \"%default\"]"
- )
- parser.add_option("-f", "--file", dest="file", type="str", default='None',
- help="Report of errors by file either 'None', 'generic', 'specific'. [default: %default]")
- parser.add_option("-g", action="store_true", dest="guid", default=False,
- help="Enforce GUID specification. [default: %default]")
- parser.add_option(
- "-j", "--jobs",
- type="int",
- dest="jobs",
- default=-1,
- help="Max processes when multiprocessing. 0 takes CPUs, -1 no MP. [default: %default]"
- )
- parser.add_option(
- "-l", "--loglevel",
- type="int",
- dest="loglevel",
- default=20,
- help="Log Level (debug=10, info=20, warning=30, [error=40], critical=50) [default: %default]"
- )
- parser.add_option(
- "-p", "--pattern",
- type="str",
- dest="pattern",
- default=FNMATCH_STRING,
- help="Pattern match. [default: \"%default\"]"
- )
- parser.add_option("-r", action="store_true", dest="recursive", default=False,
- help="Recursive. [default: %default]")
- parser.add_option("-s", action="store_true", dest="shelve", default=False,
- help="Use the shelve dBase rather than storing the internal representation in memory. This is slower but is useful for large data sets where a memory error might occur. [default: %default]")
- parser.add_option("-u", action="store_true", dest="unit_test", default=False,
- help="Execute unit tests and exit. [default: %default]")
- parser.add_option("-x", action="store_true", dest="ext_url", default=False,
- help="Test external |URLs. [default: %default]")
- parser.add_option("-?", action="store_true", dest="query_errors", default=False,
- help="Display the error types that are detected. [default: %default]")
- (options, args) = parser.parse_args()
- logging.basicConfig(
- level=options.loglevel,
- format='%(asctime)s %(levelname)-8s %(message)s',
- stream=sys.stdout,
- )
- if options.file not in ('None', 'generic', 'specific'):
- parser.error("--file option must be: 'None' | 'generic' | 'specific'")
- return 1
- if options.unit_test:
- unitTest()
- if options.query_errors:
- writeGenericStringsForErrorCodes()
- if len(args) < 1 and not options.unit_test:
- parser.print_help()
- parser.error("I can't do much without a path to the XML content.")
- return 1
- elif len(args) == 1:
- if options.jobs > -1:
- myObj = retMpDitaFileSetObj(
- args[0],
- options.pattern.split(' '),
- options.recursive,
- options.jobs,
- options.ext_url,
- options.shelve,
- )
- else:
- myObj = DitaFileSet(args[0],
- procDir=True,
- thePatterns=options.pattern.split(' '),
- recursive=options.recursive,
- testExt=options.ext_url,
- useDbase=options.shelve,
- )
- #print 'MyObj:', myObj
- if options.dump:
- myObj.debugDump()
- myObj.writeStatistics()
- myObj.writeErrorSummary()
- #pprint.pprint(myObj.statsMap)
- # TODO: Write out the results in different ways
- errFilter = set(PROBLEM_CODE_FORMAT.keys())
- if options.error_codes != 'All':
- errFilter = set([int(i) for i in options.error_codes.split()])
- if options.file == 'generic':
- print 'Generic problems:'
- myObj.writeErrors(True, errFilter)
- elif options.file == 'specific':
- print 'Specific problems:'
- myObj.writeErrors(False, errFilter)
- elif len(args) > 1:
- parser.error("Too many arguments, I need only one.")
- return 1
- return 0
-
-if __name__ == '__main__':
- multiprocessing.freeze_support()
- sys.exit(main())