FCL/sftools/depl/doctools: comparison mpdot/linkcheck.py

equal deleted inserted replaced

-:d8fccb2cd802
+:468f4c8d3d5b
-# Copyright (c) 2007-2010 Nokia Corporation and/or its subsidiary(-ies) All rights reserved.
-# This component and the accompanying materials are made available under the terms of the License
-# "Eclipse Public License v1.0" which accompanies this distribution,
-# and is available at the URL "http://www.eclipse.org/legal/epl-v10.html".
-#
-# Initial Contributors:
-# Nokia Corporation - initial contribution.
-#
-# Contributors:
-#
-# Description:
-# Checks links in DITA XML and reports issues.
-"""
-Created on 12 Feb 2010
-@author: p2ross
-Definitions
-===========
-Doctype
--------
-See: http://www.w3.org/TR/2008/REC-xml-20081126/#dt-root
-Note: this is sometimes called the Doctype because of http://www.w3.org/TR/2008/REC-xml-20081126/#vc-roottype
-ID
---
-The value of the 'id' attribute of an element.
-Root ID
--------
-The value of the 'id' attribute of the root element.
-Note: A development would allow differently named attributes provided that they
-were ID types. See http://www.w3.org/TR/2008/REC-xml-20081126/#sec-attribute-types
-for validity constraints for ID types.
-Reference
----------
-The value of the href attribute of an element.
-Map
----
-An XML file whose root element name is 'map' or ends with 'Map'.
-Topic
------
-An XML file that is not a Map.
-Lonely topic
-------------
-A topic whose root ID is not referenced by any map.
-Lonely map
-----------
-A map whose root ID is not referenced by any map.
-Map Cycle
----------
-A sequence of map references whose members are not unique.
-"""
-import os
-import unittest
-import sys
-import logging
-import pprint
-import fnmatch
-import re
-import urllib
-import time
-from optparse import OptionParser, check_choice
-try:
-from xml.etree import cElementTree as etree
-except ImportError:
-from xml.etree import ElementTree as etree
-import urlparse
-import multiprocessing
-# used for DitaFileObj persistence
-import shelve
-__version__ = '0.1.5'
-class ExceptionLinkCheck(Exception):
-pass
-class CountDict(dict):
-"""Dictionary with a default value of 0 for unknown keys."""
-def __getitem__(self, key):
-if key not in self:
-self[key] = 0
-return self.get(key)
-# Matches stuff like: GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E
-RE_GUID = re.compile(r'GUID-[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}', re.IGNORECASE)
-# Of the form {integer_error_code : (format_string, num_args), ...}
-PROBLEM_CODE_FORMAT = {
-# 'id_syntax'
-100 : ('Character \'#\' not allowed in id="%s"', 1),
-101 : ('NMTOKEN character \'%s\' not allowed in id="%s"', 2),
-102 : ('GUID specification does not match id="%s"', 1),
-# 'ref_syntax'
-200 : ('Multiple \'#\' not allowed in reference "%s"', 1),
-201 : ('Reference element "%s" is missing href=... attribute', 1),
-202 : ('URL has missing type/format in reference "%s"', 1),
-203 : ('GUID specification does not match file reference "%s"', 1),
-204 : ('GUID specification does not match fragment reference "%s"', 1),
-# 'ref'
-300 : ('Can not resolve URI "%s"', 1),
-# 'file'
-400 : ('Failed to open: "%s"', 1),
-401 : ('Multiple id="%s"', 1),
-402 : ('No id attribute on root element', 0),
-403 : ('Root ID in cycle: %s', 1),
-404 : ('Can not parse: "%s"', 1),
-410 : ('Can not resolve reference to file "%s"', 1),
-411 : ('Can resolve reference to file "%s" but not to fragment "%s"', 2),
-412 : ('Referencing element "%s" does not match target root element "%s"', 2),
-413 : ('Referencing element "%s" does not match target element "%s" for id="%s"', 3),
-414 : ('topicref element with format="ditamap" does not match target root element "%s"', 1),
-415 : ('topicref to <map> does not have format="ditamap" but format="%s"', 1),
-416 : ('topicref element type="%s" does not match target root element "%s"', 2),
-417 : ('topicref element type="%s" does not match target element "%s" for id="%s"', 3),
-418 : ('Unknown referencing element "%s" does not match target root element "%s"', 2),
-419 : ('Unknown referencing element "%s" does not match target element "%s" for id="%s"', 3),
-# 'file_set'
-500 : ('Not a directory: %s', 1),
-501 : ('Duplicate root id="%s" in files: %s', 2),
-#502 : ('Can not resolve reference to "%s"', 1),
-#503 : ('Reference type "%s" does not match target type "%s" for id="%s"', 3),
-504 : ('Duplicate file path: "%s"', 1),
-505 : ('Duplicate id="%s" in files: %s', 2),
-# 'topic_set'
-600 : ('Topic id="%s" is not referenced by any map', 1),
-# 'map_set'
-700 : ('More than one top level map exists: %s', 1),
-701 : ('Maps "%s" are in a a cycle.', 1),
-}
-GENERIC_STRING = '...'
-PRINT_WIDTH = 75
-def genericStringForErrorCode(ec):
-assert(PROBLEM_CODE_FORMAT.has_key(ec))
-f, c = PROBLEM_CODE_FORMAT[ec]
-if c == 0:
-return f
-return f % ((GENERIC_STRING,) * c)
-def writeGenericStringsForErrorCodes(s=sys.stdout):
-s.write(' All Error Codes '.center(PRINT_WIDTH, '='))
-s.write('\n')
-s.write('%4s  %s\n' % ('Code', 'Error'))
-s.write('%4s  %s\n' % ('----', '-----'))
-ecS = PROBLEM_CODE_FORMAT.keys()
-ecS.sort()
-for ec in ecS:
-s.write('%4d  %s\n' % (ec, genericStringForErrorCode(ec)))
-s.write('='*PRINT_WIDTH)
-s.write('\n\n')
-def normalisePath(thePath):
-# TODO: How come this does not work?
-#return os.path.abspath(thePath)
-return os.path.abspath(thePath).replace('\\', '/')
-FNMATCH_PATTERNS = ['*.xml', '*.dita', '*.ditamap']
-FNMATCH_STRING = ' '.join(FNMATCH_PATTERNS)
-# These elements descend from topic/xref so can be treated as referencing elements
-XREF_DESCENDENTS = set(
-(
-# From the api specialisation
-'apiRelation',
-'apiBaseClassifier',
-'apiOtherClassifier',
-'apiOperationClassifier',
-'apiValueClassifier',
-# From the C++ specialisation
-'cxxfile',
-'cxxclass',
-'cxxstruct',
-'cxxunion',
-'cxxfunction',
-'cxxdefine',
-'cxxtypedef',
-'cxxvariable',
-'cxxenumeration',
-'cxxClassBaseClass',
-'cxxClassBaseStruct',
-'cxxClassBaseUnion',
-'cxxClassNestedClass',
-'cxxClassNestedStruct',
-'cxxClassNestedUnion',
-'cxxClassEnumerationInherited',
-'cxxClassEnumeratorInherited',
-'cxxClassFunctionInherited',
-'cxxClassVariableInherited',
-'cxxDefineReimplemented',
-'cxxEnumerationReimplemented',
-'cxxFunctionReimplemented',
-'cxxStructBaseClass',
-'cxxStructBaseStruct',
-'cxxStructBaseUnion',
-'cxxStructNestedClass',
-'cxxStructNestedStruct',
-'cxxStructNestedUnion',
-'cxxStructEnumerationInherited',
-'cxxStructEnumeratorInherited',
-'cxxStructFunctionInherited',
-'cxxStructVariableInherited',
-'cxxTypedefReimplemented',
-'cxxUnionBaseClass',
-'cxxUnionBaseStruct',
-'cxxUnionBaseUnion',
-'cxxUnionNestedClass',
-'cxxUnionNestedStruct',
-'cxxUnionNestedUnion',
-'cxxUnionEnumerationInherited',
-'cxxUnionFunctionInherited',
-'cxxUnionVariableInherited',
-'cxxVariableReimplemented',
-)
-)
-class UrlAccessCache(object):
-def __init__(self):
-# {URL : True/False, ...}
-self._cache = {}
-def clear(self):
-self._cache = {}
-def canAccess(self, theUrl):
-if not self._cache.has_key(theUrl):
-try:
-u = urllib.urlopen(theUrl)#, data, proxies)
-u.read()
-self._cache[theUrl] = True
-logging.debug('URL: %s  for %s' % (True, theUrl))
-except IOError:
-self._cache[theUrl] = False
-logging.debug('URL: %s for %s' % (False, theUrl))
-return self._cache[theUrl]
-GlobalUrlCache = UrlAccessCache()
-class DitaLinkCheckBase(object):
-"""Base class that holds some common functionality."""
-def __init__(self, theIdentity):#=None):
-self.__identity = theIdentity
-# Set of error strings, lazily evaluated
-self._errS = None
-@property
-def identity(self):
-return self.__identity
-def __cmp__(self, other):
-assert(self.identity is not None)
-assert(other.identity is not None)
-return cmp(self.identity, other.identity)
-def __eq__(self, other):
-assert(self.identity is not None)
-assert(other.identity is not None)
-return self.identity == other.identity
-def __hash__(self):
-assert(self.identity is not None)
-return hash(self.identity)
-def __str__(self):
-return str(self.__identity)
-def debugDump(self, s=sys.stdout, prefix=''):
-"""Dump of IR for debug purposes."""
-raise NotImplementedError
-def addError(self, errCode, argTuple):
-assert(errCode in PROBLEM_CODE_FORMAT.keys()), 'No error code: %s' % errCode
-assert(PROBLEM_CODE_FORMAT[errCode][1] == len(argTuple)), \
-'Length missmatch for error code %d: %d != %d for %s' \
-% (errCode, PROBLEM_CODE_FORMAT[errCode][1], len(argTuple), str(argTuple))
-if self._errS is None:
-self._errS = {}
-try:
-self._errS[errCode].add(argTuple)
-except KeyError:
-self._errS[errCode] = set((argTuple,))
-def errStrings(self, generic, theFilter):
-"""Return a sorted list of error messages without duplicates."""
-if self._errS is not None:
-mySet = set()
-for ec in self._errS.keys():
-if theFilter is None or ec in theFilter:
-assert(ec in PROBLEM_CODE_FORMAT.keys())
-for tu in self._errS[ec]:
-if generic:
-mySet.add(genericStringForErrorCode(ec))
-else:
-f, c = PROBLEM_CODE_FORMAT[ec]
-assert(len(tu) == c)
-mySet.add(f % tu)
-l = list(mySet)
-l.sort()
-return l
-return []
-def updateErrorCount(self, theMap):
-"""Updates a map of {error_code, : count, ...}.
-Overridden for file and file set."""
-if self._errS is not None:
-for e in self._errS.keys():
-theMap[e] += len(self._errS[e])
-def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout):
-"""Can be overridden in child classes to recurse into
-their data structures."""
-theStream.write('\n'.join(self.errStrings(isGeneric, theFilter)))
-class DitaId(DitaLinkCheckBase):
-"""Represents a node with an id."""
-def __init__(self, theN):
-assert(theN.get('id', None) is not None)
-super(DitaId, self).__init__(theN.get('id', None))
-self._elem = theN.tag
-if '#' in self.id:
-self.addError(100, (self.id,))
-# TODO: NMTOKENS
-@property
-def elem(self):
-return self._elem
-@property
-def id(self):
-return self.identity
-def checkGuid(self):
-"""optionally applies additional checks for GUID requirements."""
-if RE_GUID.match(self.id) is None:
-self.addError(102, (self.id,))
-def debugDump(self, s=sys.stdout, prefix=''):
-"""Dump of IR for debug purposes."""
-s.write('%sID:  <%s id="%s" />\n' % (prefix, self.elem, self.id))
-class DitaRef(DitaLinkCheckBase):
-"""Represents a reference node."""
-def __init__(self, theN):
-self._elem = theN.tag
-self._href = theN.get('href', None)
-super(DitaRef, self).__init__('%s %s' % (self._elem, self._href))
-# This is used when figuring out of the target is the correct element
-# e.g. in Vanilla DITA
-# <topicref href="batcaring.dita" type="task"></topicref>
-self._refType = theN.get('type', None)
-# Format attribute, this can be format="ditamap"
-self._format = theN.get('format', None)
-if self._href is None:
-self.addError(201, (self._elem,))
-self._url = None
-else:
-self._url = urlparse.urlparse(self._href)
-if '#' in self._url.fragment:
-self.addError(200, (self._href,))
-@property
-def elem(self):
-return self._elem
-@property
-def href(self):
-"""The value of the href attribute."""
-return self._href
-@property
-def refType(self):
-"""The value of the type attribute."""
-return self._refType
-@property
-def format(self):
-"""The value of the format attribute."""
-return self._format
-@property
-def path(self):
-"""The value of the path part of the href attribute."""
-return self._url.path
-@property
-def fragment(self):
-"""The value of the fragment part of the href attribute."""
-return self._url.fragment
-@property
-def scheme(self):
-"""The URI scheme e.g. 'http' or '' if no scheme."""
-return self._url.scheme
-def fileFragment(self, theRefFile):
-"""The absolute path of the file and the fragment identifier or (None, None)."""
-if self.scheme not in ('', 'file'):
-return (None, None)
-if len(self.path) == 0:
-myPath = theRefFile
-else:
-myPath = os.path.join(os.path.dirname(theRefFile), self.path)
-return normalisePath(myPath), self.fragment
-def checkGuid(self):
-"""optionally applies additional checks for GUID requirements."""
-if RE_GUID.match(self.path) is None:
-self.addError(203, (self.path,))
-if RE_GUID.match(self.fragment) is None:
-self.addError(204, (self.fragment,))
-def checkUrl(self):
-if self.scheme:
-myU = urlparse.urlunparse(self._url)
-if not GlobalUrlCache.canAccess(myU):
-self.addError(300, (myU,))
-def debugDump(self, s=sys.stdout, prefix=''):
-"""Dump of IR for debug purposes."""
-s.write('%sREF: <%s href="%s" />\n' % (prefix, self.elem, self._href))
-class DitaFileObj(DitaLinkCheckBase):
-"""Base class for a DITA topic or map."""
-def __init__(self, theFileObj, theFileName=None):
-"""Initialiser with a file object and a file path"""
-#print '\nDitaFileObj(%s, %s)' % (theFileObj, theFileName)
-if theFileName is not None:
-super(DitaFileObj, self).__init__(normalisePath(theFileName))
-elif theFileObj is not None:
-super(DitaFileObj, self).__init__(theFileObj.name)
-else:
-super(DitaFileObj, self).__init__(None)
-self._rootId = None
-self._doctype = None
-# Sets of class DitaId
-self._idS = set()
-self._dupeIdS = set()
-# Set of class DitaRef
-self._xrefS = set()
-# Ouptut control
-self._hasWritten = False
-# Size of input
-try:
-self._bytes = os.path.getsize(theFileName)
-except Exception:
-# Try as if a StringIO
-try:
-self._bytes = theFileObj.len
-except AttributeError:
-# Give up
-self._bytes = 0
-# Process the file object
-if theFileObj is not None:
-try:
-# TODO: use iterparse?
-theTree = etree.parse(theFileObj)
-except SyntaxError, err:
-self.addError(404, (str(err),))
-else:
-# Walk the tree
-for i, e in enumerate(theTree.getiterator()):
-#print 'TRACE: e', e
-# Element [0] is the root element
-if i == 0:
-assert(self._rootId is None)
-assert(self._doctype is None)
-self._doctype = e.tag
-if e.get('id', None) is not None:
-self._rootId = DitaId(e)
-self._addId(self._rootId)
-else:
-self.addError(402, ())
-else:
-# NOTE: Elements with id attributes can also have href
-# attributes. For example a <topicref> in a <bookmap>
-# Thus these tests are not exclusive
-if e.get('id', None) is not None:
-self._addId(DitaId(e))
-if e.get('href', None) is not None:
-# TODO: Do we limit ourselves to only a certain set of elements?
-self._xrefS.add(DitaRef(e))
-else:
-self.addError(400, (self.identity,))
-def _addId(self, theId):
-#print 'TRACE: adding %s' % theId
-#print 'TRACE: self._idS %s' % self._idS
-if theId in self._idS:
-# Remove from self._idS
-#print 'TRACE: removing %s' % theId
-self._idS.remove(theId)
-self._dupeIdS.add(theId)
-self.addError(401, (theId.identity,))
-elif theId not in self._dupeIdS:
-self._idS.add(theId)
-@property
-def bytes(self):
-return self._bytes
-@property
-def doctype(self):
-return self._doctype
-@property
-def rootId(self):
-if self._rootId is not None:
-return self._rootId.id
-@property
-def isMap(self):
-return self.doctype == "map" \
-or self.doctype == 'bookmap' \
-or (self.doctype is not None and self.doctype.endswith('Map'))
-@property
-def idS(self):
-"""The set of IDs."""
-return self._idS
-@property
-def refS(self):
-"""The set of DitaRef objects."""
-return self._xrefS
-def idElemMap(self):
-"""Returns a map {id : elem name, ...}."""
-retVal = {}
-for anId in self._idS:
-retVal[anId.id] = anId.elem
-return retVal
-def hasId(self, theString):
-for anId in self._idS:
-if theString == anId.id:
-return True
-return False
-def idElem(self, theString):
-for anId in self._idS:
-if theString == anId.id:
-return anId.elem
-return None
-def idObj(self, theString):
-for anId in self._idS:
-if theString == anId.id:
-return anId
-return None
-def updateErrorCount(self, theMap):
-"""Updates a map of {error_code, : count, ...}."""
-if self._errS is not None:
-for e in self._errS.keys():
-theMap[e] += len(self._errS[e])
-for idObj in self.idS:
-idObj.updateErrorCount(theMap)
-for refObj in self.refS:
-refObj.updateErrorCount(theMap)
-def writeErrorList(self, theList, theSubHead='', theS=sys.stdout):
-if len(theList) > 0:
-theList.sort()
-if not self._hasWritten:
-theS.write('File: %s\n' % self.identity)
-self._hasWritten = True
-if len(theSubHead) > 0:
-theS.write('%s [%d]:\n' % (theSubHead, len(theList)))
-theS.write('\n'.join(theList))
-theS.write('\n')
-def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout):
-"""Writes out errors for me, my IDs and my Refs."""
-self._hasWritten = False
-self.writeErrorList(self.errStrings(isGeneric, theFilter), 'File errors:', theStream)
-#===============================================================================
-#        # Duplicate IDs
-#        myList = (list(self._dupeIdS))
-#        if len(myList):
-#            self.writeErrorList(
-#                    [i.identity for i in myList],
-#                    'Duplicate ID',
-#                    theStream)
-#===============================================================================
-# Now IDs
-myList = (list(self.idS))
-myList.sort()
-for anId in myList:
-self.writeErrorList(anId.errStrings(isGeneric, theFilter), 'ID=%s' % anId.identity, theStream)
-# Now Refs
-myList = (list(self._xrefS))
-myList.sort()
-for anId in myList:
-self.writeErrorList(anId.errStrings(isGeneric, theFilter), 'Ref=%s' % anId.identity, theStream)
-if self._hasWritten:
-theStream.write('\n')
-def debugDump(self, s=sys.stdout, prefix=''):
-"""Dump of IR for debug purposes."""
-s.write('%sFile: %s\n' % (prefix, self.identity))
-for anId in self._idS:
-anId.debugDump(s, prefix=prefix+'  ')
-for aRef in self._xrefS:
-aRef.debugDump(s, prefix=prefix+'  ')
-class DitaFilePath(DitaFileObj):
-"""Base class for a DITA topic or map from the file system."""
-def __init__(self, theFilePath):
-"""Initialiser with a file path"""
-try:
-f = open(theFilePath)
-except IOError:
-f = None
-#print 'DitaFilePath(%s)' % theFilePath
-super(DitaFilePath, self).__init__(f, theFilePath)
-if f is None:
-self.addError(400, (theFilePath,))
-class DitaFileMapBase(object):
-"""Base class for holding a map of {file path : class DitaFile, ...}
-Actual implementation can be in-memory or via a database e.g. the
-shelve module."""
-def keys(self):
-"""Returns an unsorted list of keys in the map."""
-raise NotImplementedError()
-def has_key(self, thePath):
-"""Return True if the key exists."""
-raise NotImplementedError()
-def remove(self, thePath):
-"""Remove the entry corresponding to thePath, may raise KeyError."""
-raise NotImplementedError()
-def getDitaFileObj(self, thePath):
-"""Return a DitaFileObj that corresponds to thePath, may raise KeyError."""
-raise NotImplementedError()
-def setDitaFileObj(self, thePath, theObj):
-"""Load a DitaFileObj or update a mutated DitaFileObj."""
-raise NotImplementedError()
-class DitaFileMapInMemory(DitaFileMapBase):
-"""Holds map of {file path : class DitaFile, ...} in memory."""
-def __init__(self):
-# Map of {file path : class DitaFile, ...}
-self._fileMap = {}
-def keys(self):
-"""Returns an unsorted list of keys in the map."""
-return self._fileMap.keys()
-def has_key(self, thePath):
-"""Return True if the key exists."""
-return self._fileMap.has_key(thePath)
-def remove(self, thePath):
-"""Remove the entry corresponding to thePath, may raise KeyError."""
-del self._fileMap[thePath]
-def getDitaFileObj(self, thePath):
-"""Return a DitaFileObj that corresponds to thePath, may raise KeyError."""
-return self._fileMap[thePath]
-def setDitaFileObj(self, thePath, theObj):
-"""Load a DitaFileObj or update a mutated DitaFileObj."""
-self._fileMap[thePath] = theObj
-class DitaFileMapShelve(DitaFileMapBase):
-"""Holds map of {file path : class DitaFile, ...} in a shelve database."""
-DBASE_FILENAME = 'linkchecker.dbase'
-def __init__(self):
-if os.path.exists(self.DBASE_FILENAME):
-os.remove(self.DBASE_FILENAME)
-self._db = shelve.open(self.DBASE_FILENAME)
-# Use this as a 'cache' as shelf.keys() is slow
-self._keys = set()
-def keys(self):
-"""Returns an unsorted list of keys in the map."""
-return list(self._keys)
-def has_key(self, thePath):
-"""Return True if the key exists."""
-return thePath in self._keys
-def remove(self, thePath):
-"""Remove the entry corresponding to thePath, may raise KeyError."""
-del self._db[thePath]
-self._keys.remove(thePath)
-def getDitaFileObj(self, thePath):
-"""Return a DitaFileObj that corresponds to thePath, may raise KeyError."""
-return self._db[thePath]
-def setDitaFileObj(self, thePath, theObj):
-"""Load a DitaFileObj or update a mutated DitaFileObj."""
-self._db[thePath] = theObj
-self._keys.add(thePath)
-class DitaFileSet(DitaLinkCheckBase):
-"""Holds information about a set of DITA files."""
-STATS_KEYS = ('Maps', 'Non-maps', 'Files', 'Bytes', 'IDs', 'Refs')
-def __init__(self,
-theDir,
-procDir=True,
-thePatterns=None,
-recursive=False,
-testExt=False,
-useDbase=False):
-"""Constructor. theDir is the root directory of DITA XML.
-procDir - If True then process this directory immediately, otherwise
-the directory can be processed independently and
-_addFileObj() or _addDitaFileObj() invoked.
-thePatterns - If supplied this should be a space separated string of
-fnmatch extensions.
-recursive - If True and procDir True the directory is processed recursively.
-testExt - If True then test external URLs.
-useDbase - If True then store all DitaFile objects in an external dbase
-(slower but less memory issues).
-"""
-if thePatterns is None:
-thePatterns = FNMATCH_STRING.split(' ')
-if theDir is not None:
-theDir = normalisePath(theDir)
-super(DitaFileSet, self).__init__(theDir)
-logging.info('DitaFileSet starting to read...')
-GlobalUrlCache.clear()
-self._testExt = testExt
-# Set up how we store the DitaFile objects
-if useDbase:
-self._fileMap = DitaFileMapShelve()
-else:
-self._fileMap = DitaFileMapInMemory()
-# Map of (str(rootId) : filepath, ...) with no duplicates
-# Keys will be in self._uniqueRootIds
-self._rootIdToFilePathMap = {}
-# Path to the unique DITA map
-self._uniqueMapPath = None
-# Count of {error_code : count, ...}
-self._errCountMap = CountDict()
-# Statistics
-self._statsMap = CountDict()
-## and initialise
-#for k in self.STATS_KEYS:
-#    self._statsMap[k]
-# Finalisation control (weak)
-self._hasFinalised = False
-# Timers
-self._timeRead = time.clock()
-self._timeAnalyse = 0.0
-if procDir:
-if theDir is not None and os.path.isdir(theDir):
-self._readDir(theDir, thePatterns, recursive)
-else:
-self.addError(500, (theDir,))
-# Finalise and run all the tests
-self.finalise()
-@property
-def errCountMap(self):
-return self._errCountMap
-@property
-def statsMap(self):
-return self._statsMap
-def writeStatistics(self, s=sys.stdout):
-"""Writes out read statistics."""
-s.write(' Statistics '.center(PRINT_WIDTH, '='))
-s.write('\n')
-if len(self._statsMap) > 0:
-o = self.STATS_KEYS
-#assert(set(o) == set(self._statsMap.keys())), \
-#    '%s != %s' % (o, self._statsMap.keys())
-for k in o:
-try:
-m = self._statsMap[k] / (1024.0*1024.0)
-s.write('%20s: %10d [%10.3f M]\n' % (k, self._statsMap[k], m))
-except KeyError:
-s.write('%20s: %10s \n' % (k, 'Not seen'))
-s.write('%20s: %10.3f (s)\n' % ('Read time', self._timeRead))
-s.write('%20s: %10.3f (s)\n' % ('Analysis time', self._timeAnalyse))
-s.write('='*PRINT_WIDTH)
-else:
-s.write('Nothing processed.')
-s.write('\n')
-def writeErrorSummary(self, s=sys.stdout):
-s.write(' Error Summary '.center(PRINT_WIDTH, '='))
-s.write('\n')
-if len(self._errCountMap):
-s.write('%4s %10s %s\n' % ('Code', 'Count', 'Error'))
-s.write('%4s %10s %s\n' % ('----', '-----', '-----'))
-errCodeS = self._errCountMap.keys()
-errCodeS.sort()
-for c in errCodeS:
-s.write('%4d %10d %s\n' \
-% (c, self._errCountMap[c], genericStringForErrorCode(c)))
-else:
-s.write('No errors\n')
-s.write('='*PRINT_WIDTH)
-s.write('\n')
-def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout):
-"""Writes out errors for me and my files."""
-theStream.write('\n'.join(self.errStrings(isGeneric, theFilter)))
-fileS = self._fileMap.keys()
-fileS.sort()
-for aFile in fileS:
-# Immutable call so just use get
-self._fileMap.getDitaFileObj(aFile).writeErrors(isGeneric, theFilter, theStream)
-def allErrStrings(self, isGeneric, theFilter):
-"""Return a sorted list of error messages without duplicates including
-files."""
-retSet = set(self.errStrings(isGeneric, theFilter))
-fileS = self._fileMap.keys()
-fileS.sort()
-for aFilePath in self._fileMap.keys():
-# Immutable call so just use get
-for anErr in self._fileMap.getDitaFileObj(aFilePath).errStrings(isGeneric, theFilter):
-retSet.add(anErr)
-retList = list(retSet)
-retList.sort()
-return retList
-def _readDir(self, theDir, thePatS, recursive):
-assert(os.path.isdir(theDir))
-for aName in os.listdir(theDir):
-aPath = os.path.join(theDir, aName)
-if os.path.isdir(aPath) and recursive:
-self._readDir(aPath, thePatS, recursive)
-elif os.path.isfile(aPath):
-for aPat in thePatS:
-if fnmatch.fnmatch(aName, aPat):
-assert(not self._fileMap.has_key(aPath))
-logging.debug(' Reading %s' % aPath)
-try:
-f = open(aPath)
-except IOError:
-f = None
-self._addFileObj(f, aPath)
-break
-def _addFileObj(self, theFileObj, theFilePath):
-myObj = DitaFileObj(theFileObj, theFilePath)
-self._addDitaFileObj(myObj)
-def _addDitaFileObj(self, theDitaFileObj):
-if self._fileMap.has_key(theDitaFileObj.identity):
-self.addError(504, (theDitaFileObj.identity,))
-else:
-# Mutable call so use set
-self._fileMap.setDitaFileObj(theDitaFileObj.identity, theDitaFileObj)
-# Update statistics (files, bytes, ids, refs) etc.
-self._statsMap['Files'] += 1
-self._statsMap['Bytes'] += theDitaFileObj.bytes
-self._statsMap['IDs'] += len(theDitaFileObj.idS)
-self._statsMap['Refs'] += len(theDitaFileObj.refS)
-if theDitaFileObj.isMap:
-self._statsMap['Maps'] += 1
-else:
-self._statsMap['Non-maps'] += 1
-def finalise(self):
-"""Creates the environment for all checks and then runs them."""
-logging.info('DitaFileSet.finalise() start...')
-if not self._hasFinalised:
-self._timeRead = time.clock() - self._timeRead
-self._timeAnalyse = time.clock()
-self._initRootIdToFilePathMap()
-self._checkDupeIdS()
-self._setMapCycles()
-self._checkLonely()
-self._checkRefArcs()
-self._errCountMap = CountDict()
-self.updateErrorCount(self._errCountMap)
-self._hasFinalised = True
-self._timeAnalyse = time.clock() - self._timeAnalyse
-logging.info('DitaFileSet.finalise() done.')
-def _initRootIdToFilePathMap(self):
-# Map of (str(rootId) : filepath, ...) with no duplicates
-self._rootIdToFilePathMap = {}
-# Temporary map of (str(rootId) : [filepath, ...], ...)
-myDupeIdFiles = {}
-for fPath in self._fileMap.keys():
-# fObj is not written to so we don't need to use set
-fObj = self._fileMap.getDitaFileObj(fPath)
-#print 'TRACE: _initRootIdToFilePathMap() fPath:', fPath
-rId = fObj.rootId
-if rId is not None:
-if myDupeIdFiles.has_key(rId):
-#print 'TRACE: _initRootIdToFilePathMap() another dupe:', fPath
-myDupeIdFiles[rId].append(fObj.identity)
-elif self._rootIdToFilePathMap.has_key(rId):
-#print 'TRACE: _initRootIdToFilePathMap() first dupe:', fPath
-# Remove from map and add to myDupeIdFiles
-myFile = self._rootIdToFilePathMap.pop(rId)
-try:
-myDupeIdFiles[rId].append(myFile)
-except KeyError:
-myDupeIdFiles[rId] = [myFile,]
-myDupeIdFiles[rId].append(fPath)
-else:
-#print 'TRACE: _initRootIdToFilePathMap() adding:', fPath
-self._rootIdToFilePathMap[rId] = fObj.identity
-# Set duplicate errors
-for k in myDupeIdFiles.keys():
-myDupeIdFiles[k].sort()
-self.addError(501, (k, tuple(myDupeIdFiles[k])))
-#self.addError(501, (k, str([str(a) for a in myDupeIdFiles[k]])))
-def _checkDupeIdS(self):
-"""Checks if there are any duplicate IDs anywhere."""
-# {ID : [fileS, ...], ...}
-myDupeIdMap = {}
-# Temporary data structure
-# {ID : first file ID is seen in, ...}
-seenIdMap = {}
-for f in self._fileMap.keys():
-# o is not written to so we don't need set...
-o = self._fileMap.getDitaFileObj(f)
-for anId in o.idS:
-if seenIdMap.has_key(anId):
-try:
-myDupeIdMap[anId].append(f)
-except KeyError:
-myDupeIdMap[anId] = [seenIdMap[anId],]
-myDupeIdMap[anId].append(f)
-else:
-seenIdMap[anId] = f
-# Now add to errs as a 505 error message
-# Sort the files in the map
-for k in myDupeIdMap.keys():
-myDupeIdMap[k].sort()
-self.addError(505, (k, tuple(myDupeIdMap[k])))
-#self.addError(505, (k, str([str(a) for a in myDupeIdMap[k]])))
-def _retMapAdjList(self):
-"""Create an adjacency list {file_path : set(refs), ...} (all strings)"""
-adjList = {}
-for f in self._fileMap.keys():
-fObj = self._fileMap.getDitaFileObj(f)
-if fObj.isMap:# and fObj.rootId is not None:
-assert(fObj.identity not in adjList.keys())
-refSet = set()
-for r in fObj.refS:
-refSet.add(r.fileFragment(fObj.identity)[0])
-adjList[fObj.identity] = refSet
-return adjList
-def _setMapCycles(self):
-"""Sets any cyclic references seen in DITA maps."""
-adjList = self._retMapAdjList()
-# A branch
-myBr = []
-myCycles = set()
-for aPath, aSet in adjList.items():
-myBr.append(aPath)
-self._recurseCycles(adjList, myBr, myCycles)
-myBr.pop()
-self._setCycleErrors(myCycles)
-def _recurseCycles(self, a, b, c):
-assert(len(b) > 0)
-try:
-myPath = b[-1]
-for r in a[myPath]:
-#print '_recurseCycles() testing r', r
-#print '_recurseCycles() testing b', b
-if r in b:
-#print 'Adding cycle', tuple(b[b.index(r):])
-c.add(tuple(b[b.index(r):]))
-else:
-b.append(r)
-self._recurseCycles(a, b, c)
-b.pop()
-except KeyError:
-pass
-def _setCycleErrors(self, theC):
-for aT in theC:
-self.addError(701, (str(aT),))
-myL = list(aT)
-assert(len(myL) > 0)
-i = 0
-while i < len(myL):
-myL.append(myL[0])
-# Should this be in the file thus, or in the files set?
-# As we are mutating the file object we need to use both
-# getDitaFileObj() and setDitaFileObj()
-fObj = self._fileMap.getDitaFileObj(myL[0])
-fObj.addError(701, (str(myL),))
-self._fileMap.setDitaFileObj(myL[0], fObj)
-myL.pop()
-myL.append(myL.pop(0))
-i += 1
-def _checkLonely(self):
-self._checkLonelyMaps()
-self._checkLonelyTopics()
-def _checkLonelyMaps(self):
-"""Checks for lonely maps."""
-mapPathSet = set()
-pathSetRemain = set()
-for f in self._fileMap.keys():
-if self._fileMap.getDitaFileObj(f).isMap:
-mapPathSet.add(f)
-pathSetRemain.add(f)
-for aPath in mapPathSet:
-myMapObj = self._fileMap.getDitaFileObj(aPath)
-for r in myMapObj.refS:
-refFile, frag = r.fileFragment(f)
-try:
-pathSetRemain.remove(refFile)
-except KeyError:
-# refFile is a topic or an already seen map
-pass
-if len(pathSetRemain) > 1:
-for aPath in pathSetRemain:
-self.addError(700, (aPath,))
-elif len(pathSetRemain) == 1:
-self._uniqueMapPath = pathSetRemain.pop()
-def _checkLonelyTopics(self):
-"""Checks for topics that are not referenced by any map."""
-mapPathSet = set()
-pathSetRemain = set()
-for f in self._fileMap.keys():
-#print 'TRACE: f:', f
-if self._fileMap.getDitaFileObj(f).isMap:
-mapPathSet.add(f)
-else:
-pathSetRemain.add(f)
-#print 'TRACE: mapPathSet', mapPathSet
-#print 'TRACE: pathSetRemain', pathSetRemain
-for aMapPath in mapPathSet:
-myMapObj = self._fileMap.getDitaFileObj(aMapPath)
-for r in myMapObj.refS:
-refFile, frag = r.fileFragment(aMapPath)
-#print 'TRACE: removing:', refFile
-try:
-pathSetRemain.remove(refFile)
-except KeyError:
-# topic has already been seen in another map
-pass
-if len(pathSetRemain) > 0:
-for aPath in pathSetRemain:
-self.addError(600, (aPath,))
-def _checkRefArcs(self):
-"""Checks all references are reachable."""
-for fPath in self._fileMap.keys():
-fObjSrc = self._fileMap.getDitaFileObj(fPath)
-hasMutated = False
-for rObjSrc in fObjSrc.refS:
-if rObjSrc.scheme:
-# Decide whether to test and external URL
-if self._testExt:
-rObjSrc.checkUrl()
-else:
-fi, fr = rObjSrc.fileFragment(fPath)
-assert(fi is not None), 'fi is None for rObjSrc: %s in file: %s' % (rObjSrc, fPath)
-assert(fr is not None), 'fr is None for rObjSrc: %s in file: %s' % (rObjSrc, fPath)
-## If a url then fileFragment() returns (None, None)
-#if fi is None:
-#    print 'fPath', fPath
-#    print 'rObjSrc', rObjSrc
-#    print 'fi', fi
-#    print 'fr', fr
-try:
-fObjTgt = self._fileMap.getDitaFileObj(fi)
-except KeyError:
-# Target file can not be found in the IR
-# check the file system to see if it is a non-DITA resource
-if not os.path.isfile(fi):
-#print 'TRACE: adding 410 to', fObj.identity
-fObjSrc.addError(410, (fi,))
-hasMutated = True
-else:
-if len(fr) > 0:
-# Target file is found, test fragment
-if not fObjTgt.hasId(fr):
-# Fragment not found
-fObjSrc.addError(411, (fi, fr))
-hasMutated = True
-if self._checkRefArcElemName(fObjSrc, rObjSrc, fObjTgt, fr):
-hasMutated = True
-if hasMutated:
-self._fileMap.setDitaFileObj(fPath, fObjSrc)
-def _checkRefArcElemName(self, fObjSrc, rObjSrc, fObjTgt, frag):
-"""Test source and target element names
-e.g. Source <cxxClassRef> should match target <cxxClass>
-And in vanilla DITA:
-<topicref href="batcaring.dita" type="task"></topicref>
-or:
-<topicref href="batcaring.dita" format="ditamap"></topicref>
-Should match target element <task>."""
-isRootTgt = False
-hasMutated = False
-if len(frag) == 0:
-# iObjTgt is the root element of fObjTgt
-if fObjTgt.rootId is None or fObjTgt.idElem(fObjTgt.rootId) is None:
-# Covered by other error codes
-return
-iObjTgt = fObjTgt.idObj(fObjTgt.rootId)
-isRootTgt = True
-elif fObjTgt.hasId(frag):
-iObjTgt = fObjTgt.idObj(frag)
-else:
-# frag not found that will be a 411 error (handled by caller).
-return
-# Have an rObjSrc + iObjTgt so check elements
-# First case:
-if rObjSrc.elem.endswith('Ref'):
-if rObjSrc.elem[:-3] != iObjTgt.elem:
-if isRootTgt:
-fObjSrc.addError(412, (rObjSrc.elem, iObjTgt.elem))
-else:
-fObjSrc.addError(413, (fObjTgt.idElem(frag), rObjSrc.elem, frag))
-hasMutated = True
-# Second case(s) for vanilla DITA
-elif rObjSrc.elem == 'topicref':
-# Check DITA map links
-if rObjSrc.format == 'ditamap' and iObjTgt.elem != 'map':
-# Target must be a root element (actually we don't care)
-fObjSrc.addError(414, (iObjTgt.elem,))
-hasMutated = True
-elif iObjTgt.elem == 'map' and rObjSrc.format != 'ditamap':
-fObjSrc.addError(415, (rObjSrc.format,))
-hasMutated = True
-elif not (rObjSrc.format == 'ditamap' and iObjTgt.elem == 'map'):
-# Treat refType None as type="topic", see DITA standard for <topicref>
-# Well, also look at the type attribute in chapter 25
-# "When the type attribute is unspecified, it should be
-# determined by inspecting the target if possible. If the
-# target cannot be inspected for some reason, the value
-# should default to "topic".
-# Note: DITA 1.2 takes a different view...
-# Was:
-#if (rObjSrc.refType is None and iObjTgt.elem != 'topic') \
-#or (rObjSrc.refType is not None and rObjSrc.refType != iObjTgt.elem):
-if rObjSrc.refType is not None and rObjSrc.refType != iObjTgt.elem:
-if isRootTgt:
-fObjSrc.addError(416, (rObjSrc.refType, iObjTgt.elem,))
-hasMutated = True
-else:
-fObjSrc.addError(417, (rObjSrc.refType, iObjTgt.elem, frag,))
-hasMutated = True
-# Otherwise topicref looks OK
-elif rObjSrc.elem != 'xref' and rObjSrc.elem not in XREF_DESCENDENTS:
-# Unknown referencing element
-if isRootTgt:
-fObjSrc.addError(418, (rObjSrc.elem, fObjTgt.doctype))
-hasMutated = True
-else:
-fObjSrc.addError(419, (rObjSrc.elem, fObjTgt.idElem(frag), frag))
-hasMutated = True
-return hasMutated
-def updateErrorCount(self, theMap):
-"""Updates a map of {error_code, : count, ...}."""
-if self._errS is not None:
-for e in self._errS.keys():
-theMap[e] += len(self._errS[e])
-for fPath in self._fileMap.keys():
-fObj = self._fileMap.getDitaFileObj(fPath)
-# Mutable call so need to update
-fObj.updateErrorCount(theMap)
-self._fileMap.setDitaFileObj(fPath, fObj)
-def debugDump(self, s=sys.stdout, prefix=''):
-"""Dump of IR for debug purposes."""
-s.write(' Debug Dump '.center(PRINT_WIDTH, '+'))
-s.write('\n')
-fileS = self._fileMap.keys()
-fileS.sort()
-for f in fileS:
-self._fileMap.getDitaFileObj(f).debugDump(s, prefix)
-s.write(' END Debug Dump '.center(PRINT_WIDTH, '+'))
-s.write('\n\n')
-#####################################
-# Multiprocessing code
-#####################################
-def retDitaFileObj(thePath):
-return DitaFilePath(thePath)
-def genDitaPath(theDir, thePatS, recursive):
-assert(os.path.isdir(theDir))
-for aName in os.listdir(theDir):
-aPath = os.path.join(theDir, aName)
-if os.path.isdir(aPath) and recursive:
-for p in genDitaPath(aPath, thePatS, recursive):
-yield p
-elif os.path.isfile(aPath):
-for aPat in thePatS:
-if fnmatch.fnmatch(aName, aPat):
-#logging.info('genDitaPath(): %s' % aPath)
-yield aPath
-break
-def retMpDitaFileSetObj(theDir,
-thePatterns,
-recursive,
-numJobs,
-checkExt,
-useDb):
-assert(os.path.isdir(theDir))
-assert(numJobs >= 0)
-retObj = DitaFileSet(theDir, procDir=False, testExt=checkExt, useDbase=useDb)
-myNumJobs = numJobs
-if numJobs == 0:
-myNumJobs = multiprocessing.cpu_count()
-logging.info('Set multiprocessing number of jobs to %d' % myNumJobs)
-myPool = multiprocessing.Pool(processes=myNumJobs)
-for result in [
-myPool.apply_async(retDitaFileObj, (f,))
-for f in genDitaPath(theDir, thePatterns, recursive)
-]:
-myObj = result.get()
-logging.debug('Got %s' % myObj.identity)
-retObj._addDitaFileObj(myObj)
-# Note: finalise() is a serial process
-logging.info('retMpDitaFileSetObj(): finalising')
-retObj.finalise()
-return retObj
-######################################
-# Test code
-######################################
-try:
-import cStringIO as StringIO
-except ImportError:
-import StringIO
-class NullClass(unittest.TestCase):
-pass
-class TestCountDict(unittest.TestCase):
-def setUp(self):
-pass
-def tearDown(self):
-pass
-def testSetUpTearDown(self):
-"""TestCountDict: test setUp() and tearDown()."""
-pass
-def test_basic(self):
-"""TestCountDict: test basic functionality."""
-myMap = CountDict()
-self.assertEqual(myMap.has_key('wtf'), False)
-self.assertEqual(myMap['wtf'], 0)
-self.assertEqual(myMap.has_key('wtf'), True)
-myMap['wtf'] += 1
-self.assertEqual(myMap['wtf'], 1)
-class TestDitaId(unittest.TestCase):
-def setUp(self):
-pass
-def tearDown(self):
-pass
-def testSetUpTearDown(self):
-"""DitaId: test setUp() and tearDown()."""
-pass
-def test_basic(self):
-"""DitaId: basic read of an node with an id"""
-myXml = """<cxxClass id="class_big_endian"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaId(myTree.getroot())
-self.assertEqual(myObj.id, 'class_big_endian')
-self.assertEqual(str(myObj), 'class_big_endian')
-self.assertEqual(myObj.errStrings(True, None), [])
-self.assertEqual(myObj.errStrings(False, None), [])
-def test_guid_00(self):
-"""DitaId: basic read of an node with an GUID id"""
-myXml = """<cxxClass id="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaId(myTree.getroot())
-self.assertEqual(myObj.id, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
-myObj.checkGuid()
-self.assertEqual(myObj.errStrings(True, None), [])
-self.assertEqual(myObj.errStrings(False, None), [])
-def test_guid_01(self):
-"""DitaId: basic read of an node with an GUID id fails"""
-myXml = """<cxxClass id="25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaId(myTree.getroot())
-self.assertEqual(myObj.id, '25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
-myObj.checkGuid()
-self.assertEqual(
-myObj.errStrings(False, None),
-[
-'GUID specification does not match id="25825EC4-341F-3EA4-94AA-7DCE380E6D2E"'
-])
-self.assertEqual(
-myObj.errStrings(True, None),
-[
-'GUID specification does not match id="%s"' % GENERIC_STRING,
-])
-def test_cmp_eq_00(self):
-"""DitaId: cmp(), == of two identical nodes"""
-myXml = """<cxxClass id="class_big_endian"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj_00 = DitaId(myTree.getroot())
-myObj_01 = DitaId(myTree.getroot())
-self.assertEqual(cmp(myObj_00, myObj_01), 0)
-self.assertEqual((myObj_00 == myObj_01), True)
-def test_cmp_eq_01(self):
-"""DitaId: cmp(), == of two identical nodes from different elements."""
-myXml_00 = """<cxxClass id="big_endian"/>"""
-myTree_00 = etree.parse(StringIO.StringIO(myXml_00))
-myObj_00 = DitaId(myTree_00.getroot())
-myXml_01 = """<cxxStruct id="big_endian"/>"""
-myTree_01 = etree.parse(StringIO.StringIO(myXml_01))
-myObj_01 = DitaId(myTree_01.getroot())
-self.assertEqual(cmp(myObj_00, myObj_01), 0)
-self.assertEqual((myObj_00 == myObj_01), True)
-def test_set(self):
-"""DitaId: read of an node with an id several times into a set and check unique,"""
-myXml = """<cxxClass id="class_big_endian"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-s = set()
-i = 0
-while i < 8:
-s.add(DitaId(myTree.getroot()))
-i += 1
-self.assertEqual(len(s), 1)
-self.assertEqual(DitaId(myTree.getroot()) in s, True)
-def test_map(self):
-"""DitaId: read of an node with an id several times into a map and check unique,"""
-myXml = """<cxxClass id="class_big_endian"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-m = {}
-i = 0
-while i < 8:
-m[DitaId(myTree.getroot())] = 1
-i += 1
-self.assertEqual(len(m), 1)
-self.assertEqual(m.has_key(DitaId(myTree.getroot())), True)
-def test_error_hash(self):
-"""DitaId: error with a '#' in an id"""
-myXml = """<cxxClass id="class_#big_endian"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaId(myTree.getroot())
-self.assertEqual(myObj.id, 'class_#big_endian')
-self.assertEqual(str(myObj), 'class_#big_endian')
-self.assertEqual(
-myObj.errStrings(True, None),
-[
-genericStringForErrorCode(100),
-]
-)
-self.assertEqual(
-myObj.errStrings(False, None),
-[
-'Character \'#\' not allowed in id="class_#big_endian"',
-]
-)
-class TestDitaRef(unittest.TestCase):
-def setUp(self):
-pass
-def tearDown(self):
-pass
-def testSetUpTearDown(self):
-"""DitaRef: test setUp() and tearDown()."""
-pass
-def test_basic(self):
-"""DitaRef: basic read of an xref node, no fragment"""
-myXml = """<xref href="class_big_endian"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaRef(myTree.getroot())
-self.assertEqual(myObj.href, 'class_big_endian')
-self.assertEqual(myObj.path, 'class_big_endian')
-self.assertEqual(myObj.elem, 'xref')
-self.assertEqual(str(myObj), 'xref class_big_endian')
-self.assertEqual(myObj.fragment, '')
-self.assertEqual(myObj.scheme, '')
-self.assertEqual(myObj.errStrings(False, None), [])
-self.assertEqual(myObj.errStrings(True, None), [])
-def test_basic_frag(self):
-"""DitaRef: basic read of an xref node, with fragment"""
-myXml = """<xref href="class_big_endian.xml#function"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaRef(myTree.getroot())
-self.assertEqual(myObj.href, 'class_big_endian.xml#function')
-self.assertEqual(myObj.path, 'class_big_endian.xml')
-self.assertEqual(myObj.fragment, 'function')
-self.assertEqual(myObj.scheme, '')
-self.assertEqual(myObj.errStrings(False, None), [])
-self.assertEqual(myObj.errStrings(True, None), [])
-def test_file_frag_00(self):
-"""DitaRef: accessing an xref node, with a file and a fragment"""
-myXml = """<xref href="class_big_endian.xml#function"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaRef(myTree.getroot())
-self.assertEqual(myObj.href, 'class_big_endian.xml#function')
-self.assertEqual(myObj.path, 'class_big_endian.xml')
-self.assertEqual(myObj.fragment, 'function')
-self.assertEqual(myObj.scheme, '')
-srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
-expPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'class_big_endian.xml'))
-self.assertEqual(
-myObj.fileFragment(srcPath),
-(expPath, 'function')
-)
-self.assertEqual(myObj.errStrings(False, None), [])
-self.assertEqual(myObj.errStrings(True, None), [])
-def test_file_frag_01(self):
-"""DitaRef: accessing an xref node, with a file and a fragment and relative path with '\\'."""
-myXml = """<xref href="..\\chips\\class_big_endian.xml#function"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaRef(myTree.getroot())
-srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
-expPath = normalisePath(os.path.join('C:%s' % os.sep, 'chips', 'class_big_endian.xml'))
-self.assertEqual(
-myObj.fileFragment(srcPath),
-(expPath, 'function')
-)
-self.assertEqual(myObj.errStrings(False, None), [])
-self.assertEqual(myObj.errStrings(True, None), [])
-def test_file_frag_02(self):
-"""DitaRef: accessing an xref node, with a file and a fragment and relative path with '/'."""
-myXml = """<xref href="../chips/class_big_endian.xml#function"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaRef(myTree.getroot())
-srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
-expPath = normalisePath(os.path.join('C:%s' % os.sep, 'chips', 'class_big_endian.xml'))
-self.assertEqual(
-myObj.fileFragment(srcPath),
-(expPath, 'function')
-)
-self.assertEqual(myObj.errStrings(False, None), [])
-self.assertEqual(myObj.errStrings(True, None), [])
-def test_file_frag_03(self):
-"""DitaRef: accessing an xref node, with a no file but with a fragment"""
-myXml = """<xref href="#function"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaRef(myTree.getroot())
-self.assertEqual(myObj.href, '#function')
-self.assertEqual(myObj.path, '')
-self.assertEqual(myObj.fragment, 'function')
-self.assertEqual(myObj.scheme, '')
-srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
-expPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml'))
-self.assertEqual(
-myObj.fileFragment(srcPath),
-(expPath, 'function')
-)
-self.assertEqual(myObj.errStrings(False, None), [])
-self.assertEqual(myObj.errStrings(True, None), [])
-def test_basic_scheme(self):
-"""DitaRef: an xref node with a URI scheme"""
-myXml = """<xref href="http://www.cwi.nl:80/%7Eguido/Python.html#fragment"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaRef(myTree.getroot())
-self.assertEqual(myObj.href, 'http://www.cwi.nl:80/%7Eguido/Python.html#fragment')
-self.assertEqual(myObj.path, '/%7Eguido/Python.html')
-self.assertEqual(myObj.fragment, 'fragment')
-self.assertEqual(myObj.scheme, 'http')
-self.assertEqual(myObj.errStrings(False, None), [])
-self.assertEqual(myObj.errStrings(True, None), [])
-def test_basic_scheme_file_frag(self):
-"""DitaRef: an xref node with a URI scheme, invoking fileFragment()"""
-myXml = """<xref href="http://www.cwi.nl:80/%7Eguido/Python.html#fragment"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaRef(myTree.getroot())
-self.assertEqual(myObj.href, 'http://www.cwi.nl:80/%7Eguido/Python.html#fragment')
-self.assertEqual(myObj.path, '/%7Eguido/Python.html')
-self.assertEqual(myObj.fragment, 'fragment')
-self.assertEqual(myObj.scheme, 'http')
-srcPath = os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml')
-self.assertEqual(
-myObj.fileFragment(srcPath),
-(None, None)
-)
-self.assertEqual(myObj.errStrings(False, None), [])
-self.assertEqual(myObj.errStrings(True, None), [])
-def test_fail_no_href(self):
-"""DitaRef: Fails on an xref node with no href attribute"""
-myXml = """<xref />"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaRef(myTree.getroot())
-self.assertEqual(
-myObj.errStrings(False, None),
-[
-'Reference element "xref" is missing href=... attribute',
-]
-)
-self.assertEqual(
-myObj.errStrings(True, None),
-[
-'Reference element "%s" is missing href=... attribute' % GENERIC_STRING,
-]
-)
-def test_fail_bad_frag(self):
-"""DitaRef: Fails on an xref node with href attribute that has multiple '#' characters"""
-myXml = """<xref href="a#b#c" />"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaRef(myTree.getroot())
-self.assertEqual(
-myObj.errStrings(False, None),
-[
-'Multiple \'#\' not allowed in reference "a#b#c"',
-]
-)
-self.assertEqual(
-myObj.errStrings(True, None),
-[
-'Multiple \'#\' not allowed in reference "%s"' % GENERIC_STRING,
-]
-)
-def test_guid_00(self):
-"""DitaRef: basic read of an node with an GUID file/fragment reference"""
-myXml = """<xref href="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaRef(myTree.getroot())
-self.assertEqual(myObj.href, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
-self.assertEqual(myObj.path, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml')
-self.assertEqual(myObj.elem, 'xref')
-self.assertEqual(str(myObj), 'xref GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
-self.assertEqual(myObj.fragment, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
-self.assertEqual(myObj.scheme, '')
-self.assertEqual(myObj.errStrings(False, None), [])
-self.assertEqual(myObj.errStrings(True, None), [])
-def test_guid_01(self):
-"""DitaRef: basic read of an node with an GUID file part fails"""
-myXml = """<xref href="GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaRef(myTree.getroot())
-self.assertEqual(myObj.href, 'GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
-self.assertEqual(myObj.path, 'GUID-.xml')
-self.assertEqual(myObj.elem, 'xref')
-self.assertEqual(str(myObj), 'xref GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
-self.assertEqual(myObj.fragment, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E')
-self.assertEqual(myObj.errStrings(False, None), [])
-self.assertEqual(myObj.errStrings(True, None), [])
-myObj.checkGuid()
-self.assertEqual(
-myObj.errStrings(False, None),
-[
-'GUID specification does not match file reference "GUID-.xml"'
-])
-self.assertEqual(
-myObj.errStrings(True, None),
-[
-genericStringForErrorCode(203),
-]
-)
-def test_guid_02(self):
-"""DitaRef: basic read of an node with an GUID fragment part fails"""
-myXml = """<xref href="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4"/>"""
-myTree = etree.parse(StringIO.StringIO(myXml))
-myObj = DitaRef(myTree.getroot())
-self.assertEqual(myObj.href, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4')
-self.assertEqual(myObj.path, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml')
-self.assertEqual(myObj.elem, 'xref')
-self.assertEqual(str(myObj), 'xref GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4')
-self.assertEqual(myObj.fragment, 'GUID-25825EC4')
-self.assertEqual(myObj.errStrings(False, None), [])
-self.assertEqual(myObj.errStrings(True, None), [])
-myObj.checkGuid()
-self.assertEqual(
-myObj.errStrings(False, None),
-[
-'GUID specification does not match fragment reference "GUID-25825EC4"'
-])
-self.assertEqual(
-myObj.errStrings(True, None),
-[
-genericStringForErrorCode(204),
-]
-)
-class TestDitaFile(unittest.TestCase):
-def setUp(self):
-pass
-def tearDown(self):
-pass
-def testSetUpTearDown(self):
-"""DitaFile: test setUp() and tearDown()."""
-pass
-def test_Basic(self):
-"""DitaFile: basic read of an XML file"""
-myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
-<!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
-<cxxClass id="class_big_endian">
-<apiName>BigEndian</apiName>
-<shortdesc/>
-<cxxClassDetail>
-<cxxClassDefinition>
-<cxxClassAccessSpecifier value="public"/>
-<cxxClassAPIItemLocation>
-<cxxClassDeclarationFile name="filePath" value="K:/sf/os/commsfw/datacommsserver/esockserver/inc/es_sock.h"/>
-<cxxClassDeclarationFileLine name="lineNumber" value="1520"/>
-<cxxClassDefinitionFile name="filePath" value="K:/sf/os/commsfw/datacommsserver/esockserver/inc/es_sock.h"/>
-<cxxClassDefinitionFileLineStart name="lineNumber" value="1516"/>
-<cxxClassDefinitionFileLineEnd name="lineNumber" value="1526"/>
-</cxxClassAPIItemLocation>
-</cxxClassDefinition>
-<apiDesc>
-<p>Inserts and extracts integers in big-endian format.   </p>
-</apiDesc>
-</cxxClassDetail>
-<cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f">
-</cxxFunction>
-<cxxFunction id="class_big_endian_1aedf702f5c0118e4294d1a6d9684f8441">
-</cxxFunction>
-<cxxFunction id="class_big_endian_1ae266722f7bb965c971155a3315bad484">
-</cxxFunction>
-<cxxFunction id="class_big_endian_1a497d5248ea259f8490fb40ac4f2aafb2">
-</cxxFunction>
-</cxxClass>"""
-myFile = StringIO.StringIO(myXml)
-myObj = DitaFileObj(myFile, 'foo')
-self.assertEqual(myObj.identity, normalisePath('foo'))
-self.assertEqual(myObj.doctype, 'cxxClass')
-self.assertEqual(myObj.rootId, 'class_big_endian')
-#print myObj.idMap()
-self.assertEqual(
-myObj.idElemMap(),
-{
-'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'   : 'cxxFunction',
-'class_big_endian_1aedf702f5c0118e4294d1a6d9684f8441'   : 'cxxFunction',
-'class_big_endian'                                      : 'cxxClass',
-'class_big_endian_1a497d5248ea259f8490fb40ac4f2aafb2'   : 'cxxFunction',
-'class_big_endian_1ae266722f7bb965c971155a3315bad484'   : 'cxxFunction',
-}
-)
-self.assertEqual(myObj.errStrings(False, None), [])
-self.assertEqual(myObj.errStrings(True, None), [])
-def test_missing_file(self):
-"""DitaFile: read an missing XML file"""
-myObj = DitaFileObj(None, 'foo')
-self.assertEqual(
-myObj.errStrings(False, None),
-[
-'Failed to open: "%s"' % normalisePath('foo'),
-]
-)
-self.assertEqual(
-myObj.errStrings(True, None),
-[
-genericStringForErrorCode(400),
-]
-)
-def test_IllFormedFile(self):
-"""DitaFile: read an ill-formed XML file"""
-myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
-<!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
-<cxxClass id="class_big_endian">
-"""
-myFile = StringIO.StringIO(myXml)
-myObj = DitaFileObj(myFile, 'foo')
-self.assertEqual(myObj.identity, normalisePath('foo'))
-self.assertEqual(myObj.doctype, None)
-self.assertEqual(myObj.rootId, None)
-#print myObj.idMap()
-self.assertEqual(myObj.idElemMap(), {})
-self.assertEqual(
-myObj.errStrings(False, None),
-[
-'Can not parse: "no element found: line 4, column 0"',
-]
-)
-self.assertEqual(
-myObj.errStrings(True, None),
-[
-genericStringForErrorCode(404),
-]
-)
-def test_missing_root_id(self):
-"""DitaFile: read of an XML file with no id on root element"""
-myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
-<!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
-<cxxClass>
-<xref href="OtherClass">OtherClass</xref>
-<cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f"/>
-</cxxClass>"""
-myFile = StringIO.StringIO(myXml)
-myObj = DitaFileObj(myFile, 'foo')
-self.assertEqual(myObj.identity, normalisePath('foo'))
-self.assertEqual(myObj.doctype, 'cxxClass')
-self.assertEqual(myObj.rootId, None)
-self.assertEqual(
-myObj.idElemMap(),
-{
-'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'   : 'cxxFunction',
-}
-)
-self.assertEqual(myObj.errStrings(False, None), [genericStringForErrorCode(402)])
-self.assertEqual(myObj.errStrings(True, None), [genericStringForErrorCode(402)])
-def test_duplicate_id(self):
-"""DitaFile: duplicate IDs"""
-myXml = """<root id="AnID">
-<elem id="AnID"/>
-</root>"""
-myFile = StringIO.StringIO(myXml)
-myObj = DitaFileObj(myFile, 'spam.xml')
-self.assertEqual(myObj.identity, normalisePath('spam.xml'))
-self.assertEqual(myObj.doctype, 'root')
-self.assertEqual(myObj.rootId, 'AnID')
-self.assertEqual(myObj.idElemMap(), {})
-self.assertEqual(
-myObj.errStrings(False, None),
-[
-'Multiple id="AnID"',
-]
-)
-self.assertEqual(myObj.errStrings(True, None), [genericStringForErrorCode(401)])
-def test_ismap_00(self):
-"""DitaFile: Is a map for <map>."""
-myXml = """<map id="myMap"/>"""
-myFile = StringIO.StringIO(myXml)
-myObj = DitaFileObj(myFile, 'spam.xml')
-self.assertEqual(myObj.isMap, True)
-def test_ismap_01(self):
-"""DitaFile: Is a map for <cxxAPIMap>."""
-myXml = """<cxxAPIMap id="myMap"/>"""
-myFile = StringIO.StringIO(myXml)
-myObj = DitaFileObj(myFile, 'spam.xml')
-self.assertEqual(myObj.isMap, True)
-def test_Basic_01(self):
-"""DitaFile: read of an simple XML file with id and xref"""
-myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?>
-<!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" >
-<cxxClass id="class_big_endian">
-<xref href="OtherClass">OtherClass</xref>
-<cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f"/>
-</cxxClass>"""
-myFile = StringIO.StringIO(myXml)
-myObj = DitaFileObj(myFile, 'foo')
-self.assertEqual(myObj.identity, normalisePath('foo'))
-self.assertEqual(myObj.doctype, 'cxxClass')
-self.assertEqual(myObj.rootId, 'class_big_endian')
-self.assertEqual(myObj.isMap, False)
-self.assertEqual(len(myObj.idS), 2)
-self.assertEqual(len(myObj.refS), 1)
-self.assertEqual(myObj.hasId('class_big_endian'), True)
-self.assertEqual(myObj.hasId('class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'), True)
-self.assertEqual(myObj.hasId('noID'), False)
-self.assertEqual(myObj.idElem('class_big_endian'), 'cxxClass')
-self.assertEqual(myObj.idElem('noID'), None)
-self.assertEqual(
-myObj.idElem('class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'),
-'cxxFunction'
-)
-#print myObj.idMap()
-self.assertEqual(
-myObj.idElemMap(),
-{
-'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'   : 'cxxFunction',
-'class_big_endian'                                      : 'cxxClass',
-}
-)
-self.assertEqual(myObj.errStrings(False, None), [])
-self.assertEqual(myObj.errStrings(True, None), [])
-class TestDitaFileSet(unittest.TestCase):
-def setUp(self):
-pass
-def tearDown(self):
-pass
-def testSetUpTearDown(self):
-"""DitaFileSet: test setUp() and tearDown()."""
-pass
-def test_None(self):
-"""DitaFileSet: read of None."""
-myO = DitaFileSet(None)
-myO.finalise()
-self.assertEqual(myO.errStrings(False, None), ['Not a directory: None'])
-self.assertEqual(myO.errStrings(True, None), ['Not a directory: %s' % GENERIC_STRING, ])
-self.assertEqual(myO.errCountMap, {500 : 1})
-def test_basic(self):
-"""DitaFileSet: Test reading a map and a couple of files."""
-myO = DitaFileSet(None, procDir=False)
-myO._addFileObj(
-StringIO.StringIO(
-"""<map id="map_00">
-<topicref href="spam.dita" />
-<topicref href="eggs.dita" />
-</map>"""
-),
-'map.ditamap'
-)
-myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita')
-myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'eggs.dita')
-myO.finalise()
-#print 'HI'
-#myO.writeErrors(False)
-self.assertEqual(myO.allErrStrings(False, None), [])
-self.assertEqual(myO.allErrStrings(True, None), [])
-self.assertEqual(myO.errCountMap, {})
-def test_duplicate_paths(self):
-"""DitaFileSet: Test reading a couple of files in duplicate paths."""
-myO = DitaFileSet(None, procDir=False)
-myO._addFileObj(
-StringIO.StringIO(
-"""<map id="map_00">
-<topicref href="spam.dita" />
-</map>"""
-),
-'map.ditamap'
-)
-myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita')
-myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'spam.dita')
-myO.finalise()
-self.assertEqual(
-myO.errStrings(False, None),
-[
-'Duplicate file path: "%s"' % normalisePath('spam.dita'),
-]
-)
-self.assertEqual(myO.errStrings(True, None), [genericStringForErrorCode(504),])
-self.assertEqual(myO.errCountMap, {504 : 1})
-def test_duplicate_ids(self):
-"""DitaFileSet: Test reading a map and a couple of files with duplicate IDs."""
-myO = DitaFileSet(None, procDir=False)
-myO._addFileObj(
-StringIO.StringIO(
-"""<map id="map_00">
-<topicref href="spam.dita" />
-<topicref href="eggs.dita" />
-<topicref href="chips.dita" />
-</map>"""
-),
-'map.ditamap'
-)
-myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'spam.dita')
-myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'eggs.dita')
-myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'chips.dita')
-myO.finalise()
-#print 'HI'
-#myO.writeErrors(False)
-#pprint.pprint(myO.errStrings(False, None))
-self.assertEqual(
-myO.errStrings(True, None),
-[
-genericStringForErrorCode(505),
-genericStringForErrorCode(501),
-]
-)
-expErrs = [
-"""Duplicate id="chips" in files: ('%s', '%s', '%s')""" \
-% (normalisePath('chips.dita'), normalisePath('eggs.dita'), normalisePath('spam.dita')),
-"""Duplicate root id="chips" in files: ('%s', '%s', '%s')""" \
-% (normalisePath('chips.dita'), normalisePath('eggs.dita'), normalisePath('spam.dita')),
-]
-myErrs = myO.errStrings(False, None)
-#===============================================================================
-#        for i in range(2):
-#            if myErrs[i] != expErrs[i]:
-#                print myErrs[i]
-#                print expErrs[i]
-#                print
-#===============================================================================
-self.assertEqual(myErrs, expErrs)
-self.assertEqual(myO.errCountMap, {505: 1, 501: 1})
-def test_lonely_topics(self):
-"""DitaFileSet: Test a couple of lonely topics."""
-myO = DitaFileSet(None, procDir=False)
-myO._addFileObj(StringIO.StringIO('<spam id="spam"/>'), 'spam')
-myO._addFileObj(StringIO.StringIO('<eggs id="eggs"/>'), 'eggs')
-myO.finalise()
-self.assertEqual(
-myO.errStrings(False, None),
-[
-'Topic id="%s" is not referenced by any map' % normalisePath('eggs'),
-'Topic id="%s" is not referenced by any map' % normalisePath('spam'),
-]
-)
-self.assertEqual(
-myO.errStrings(True, None),
-[
-genericStringForErrorCode(600),
-]
-)
-def test_map_cycles_00(self):
-"""DitaFileSet: Cyclic references between two maps."""
-myO = DitaFileSet(None, procDir=False)
-myO._addFileObj(
-StringIO.StringIO(
-"""<map id="map_00">
-<topicref href="map_01.ditamap" format="ditamap" />
-</map>"""
-),
-'map_00.ditamap'
-)
-myO._addFileObj(
-StringIO.StringIO(
-"""<map id="map_01">
-<topicref href="map_00.ditamap" format="ditamap" />
-</map>"""
-),
-'map_01.ditamap'
-)
-myO.finalise()
-#print 'HI test_map_cycles_00()'
-#pprint.pprint(myO._retMapAdjList())
-self.assertEqual(
-myO.errStrings(False, None),
-[
-'Maps "%s" are in a a cycle.' % str(
-(
-normalisePath('map_00.ditamap'),
-normalisePath('map_01.ditamap'),
-)
-),
-'Maps "%s" are in a a cycle.' % str(
-(
-normalisePath('map_01.ditamap'),
-normalisePath('map_00.ditamap'),
-)
-),
-]
-)
-#print
-#pprint.pprint(myO.allErrStrings(False, None))
-self.assertEqual(myO.allErrStrings(True, None), [genericStringForErrorCode(701)])
-self.assertEqual(myO.errCountMap, {701 : 4})
-def test_map_cycles_01(self):
-"""DitaFileSet: Cyclic references between three maps."""
-myO = DitaFileSet(None, procDir=False)
-myO._addFileObj(
-StringIO.StringIO(
-"""<map id="map_00">
-<topicref href="map_01.ditamap" format="ditamap" />
-</map>"""
-),
-'map_00.ditamap'
-)
-myO._addFileObj(
-StringIO.StringIO(
-"""<map id="map_01">
-<topicref href="map_02.ditamap" format="ditamap" />
-</map>"""
-),
-'map_01.ditamap'
-)
-myO._addFileObj(
-StringIO.StringIO(
-"""<map id="map_02">
-<topicref href="map_00.ditamap" format="ditamap" />
-</map>"""
-),
-'map_02.ditamap'
-)
-myO.finalise()
-#print 'HI test_map_cycles_00()'
-#pprint.pprint(myO._retMapAdjList())
-self.assertEqual(
-myO.errStrings(False, None),
-[
-'Maps "%s" are in a a cycle.' % str(
-(
-normalisePath('map_00.ditamap'),
-normalisePath('map_01.ditamap'),
-normalisePath('map_02.ditamap'),
-)
-),
-'Maps "%s" are in a a cycle.' % str(
-(
-normalisePath('map_01.ditamap'),
-normalisePath('map_02.ditamap'),
-normalisePath('map_00.ditamap'),
-)
-),
-'Maps "%s" are in a a cycle.' % str(
-(
-normalisePath('map_02.ditamap'),
-normalisePath('map_00.ditamap'),
-normalisePath('map_01.ditamap'),
-)
-),
-]
-)
-self.assertEqual(myO.errStrings(True, None), [genericStringForErrorCode(701)])
-self.assertEqual(myO.errCountMap, {701 : 6})
-def test_refarc_00(self):
-"""DitaFileSet: Test ref arcing - all resolve."""
-myO = DitaFileSet(None, procDir=False)
-myO._addFileObj(
-StringIO.StringIO(
-"""<map id="map_00">
-<topicref href="spam.dita#spam" />
-<topicref href="eggs.dita#eggs" />
-</map>"""
-),
-'map.ditamap'
-)
-myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita')
-myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'eggs.dita')
-myO.finalise()
-self.assertEqual(myO.errCountMap, {})
-self.assertEqual(myO.allErrStrings(False, None), [])
-self.assertEqual(myO.allErrStrings(True, None), [])
-self.assertEqual(myO.errStrings(False, None), [])
-self.assertEqual(myO.errStrings(True, None), [])
-def test_refarc_fail_00(self):
-"""DitaFileSet: Test ref arcing - can't find file."""
-myO = DitaFileSet(None, procDir=False)
-myO._addFileObj(
-StringIO.StringIO(
-"""<map id="map_00">
-<topicref href="spam_.dita" />
-<topicref href="eggs_for_tea.dita" />
-</map>"""
-),
-'map.ditamap'
-)
-myO.finalise()
-self.assertEqual(myO.errCountMap, {410: 2})
-#print 'HI'
-#pprint.pprint(myO.allErrStrings(False, None))
-self.assertEqual(
-myO.allErrStrings(False, None),
-[
-'Can not resolve reference to file "%s"' % normalisePath('eggs_for_tea.dita'),
-'Can not resolve reference to file "%s"' % normalisePath('spam_.dita'),
-]
-)
-self.assertEqual(
-myO.allErrStrings(True, None),
-[
-'Can not resolve reference to file "..."',
-]
-)
-self.assertEqual(myO.errStrings(False, None), [])
-self.assertEqual(myO.errStrings(True, None), [])
-def test_refarc_fail_01(self):
-"""DitaFileSet: Test ref arcing - can't find fragment."""
-myO = DitaFileSet(None, procDir=False)
-myO._addFileObj(
-StringIO.StringIO(
-"""<map id="map_00">
-<topicref href="spam.dita#spam_" />
-<topicref href="eggs.dita#eggs_" />
-</map>"""
-),
-'map.ditamap'
-)
-myO._addFileObj(StringIO.StringIO('<spam id="spam"/>'), 'spam.dita')
-myO._addFileObj(StringIO.StringIO('<eggs id="eggs"/>'), 'eggs.dita')
-myO.finalise()
-self.assertEqual(myO.errCountMap, {411: 2})
-#print 'HI'
-#pprint.pprint(myO.allErrStrings(False, None))
-self.assertEqual(
-myO.allErrStrings(False, None),
-[
-'Can resolve reference to file "%s" but not to fragment "eggs_"' % normalisePath('eggs.dita'),
-'Can resolve reference to file "%s" but not to fragment "spam_"' % normalisePath('spam.dita'),
-]
-)
-self.assertEqual(
-myO.allErrStrings(True, None),
-[
-'Can resolve reference to file "%s" but not to fragment "%s"' % (GENERIC_STRING, GENERIC_STRING),
-]
-)
-self.assertEqual(myO.errStrings(False, None), [])
-self.assertEqual(myO.errStrings(True, None), [])
-def test_refarc_url_00(self):
-"""DitaFileSet: Test ref arcing - URL."""
-myO = DitaFileSet(None, procDir=False, testExt=True)
-myO._addFileObj(
-StringIO.StringIO(
-"""<map id="map_00">
-<topicref href="spam.dita#spam" />
-<topicref href="eggs.dita#eggs" />
-</map>"""
-),
-'map.ditamap'
-)
-myO._addFileObj(StringIO.StringIO("""<topic id="spam">
-<xref href="http://www.nokia.com">Nokia</xref>
-</topic>"""), 'spam.dita')
-myO._addFileObj(StringIO.StringIO("""<topic id="eggs">
-<xref href="http://www.google.com">Google</xref>
-</topic>"""), 'eggs.dita')
-myO.finalise()
-#print 'HI'
-#pprint.pprint(myO.allErrStrings(False, None))
-self.assertEqual(myO.errCountMap, {})
-self.assertEqual(
-myO.allErrStrings(False, None),
-[
-]
-)
-self.assertEqual(
-myO.allErrStrings(True, None),
-[
-]
-)
-self.assertEqual(myO.errStrings(False, None), [])
-self.assertEqual(myO.errStrings(True, None), [])
-class TestDitaBookmapFileSet(unittest.TestCase):
-def setUp(self):
-pass
-def tearDown(self):
-pass
-def testSetUpTearDown(self):
-"""TestDitaBookmapFileSet: test setUp() and tearDown()."""
-pass
-def test_basic(self):
-"""TestDitaBookmapFileSet: Test reading a bookmap and a topic."""
-myO = DitaFileSet(None, procDir=False)
-myO._addFileObj(
-StringIO.StringIO(
-"""<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE bookmap PUBLIC "-//OASIS//DTD DITA BookMap//EN"
-"bookmap.dtd">
-<bookmap id="GUID-5BDFDB6B-7801-4804-9F41-2BDC5BE53DDF">
-<booktitle>
-<mainbooktitle>My Bookmap</mainbooktitle>
-<booktitlealt>Alternate title</booktitlealt>
-</booktitle>
-<frontmatter id="GUID-DA857913-F826-4CF7-A135-93F2AEB48353">
-<topicref href="GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB.dita" id="GUID-994B1764-393F-401F-8571-CE0955AB6CA6" />
-</frontmatter>
-</bookmap>
-"""
-),
-'bookmap.ditamap'
-)
-myO._addFileObj(StringIO.StringIO("""<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE concept  PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
-<concept id="GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB" xml:lang="en">
-<title>How to read and write a file</title>
-</concept>
-"""), 'GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB.dita')
-myO.finalise()
-#print
-#myO.debugDump()
-#print 'HI'
-#myO.writeErrors(False)
-self.assertEqual(myO.allErrStrings(False, None), [])
-self.assertEqual(myO.allErrStrings(True, None), [])
-self.assertEqual(myO.errCountMap, {})
-class Special(unittest.TestCase):
-pass
-def unitTest(theVerbosity=2):
-suite = unittest.TestLoader().loadTestsFromTestCase(NullClass)
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCountDict))
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaId))
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaRef))
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaFile))
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaFileSet))
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaBookmapFileSet))
-suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Special))
-myResult = unittest.TextTestRunner(verbosity=theVerbosity).run(suite)
-return (myResult.testsRun, len(myResult.errors), len(myResult.failures))
-######################################
-# main() stuff
-######################################
-def main():
-print 'CMD: %s' % ' '.join(sys.argv)
-usage = "usage: %prog [options] <Directory of XML content>"
-parser = OptionParser(usage, version='%prog ' + __version__)
-parser.add_option("-d", action="store_true", dest="dump", default=False,
-help="Dump internal representation. [default: %default]")
-parser.add_option(
-"-e", "--errors",
-type="str",
-dest="error_codes",
-default='All',
-help="Only report on certain error codes (space seperated list). [default: \"%default\"]"
-)
-parser.add_option("-f", "--file", dest="file", type="str", default='None',
-help="Report of errors by file either 'None', 'generic', 'specific'. [default: %default]")
-parser.add_option("-g", action="store_true", dest="guid", default=False,
-help="Enforce GUID specification. [default: %default]")
-parser.add_option(
-"-j", "--jobs",
-type="int",
-dest="jobs",
-default=-1,
-help="Max processes when multiprocessing. 0 takes CPUs, -1 no MP. [default: %default]"
-)
-parser.add_option(
-"-l", "--loglevel",
-type="int",
-dest="loglevel",
-default=20,
-help="Log Level (debug=10, info=20, warning=30, [error=40], critical=50) [default: %default]"
-)
-parser.add_option(
-"-p", "--pattern",
-type="str",
-dest="pattern",
-default=FNMATCH_STRING,
-help="Pattern match. [default: \"%default\"]"
-)
-parser.add_option("-r", action="store_true", dest="recursive", default=False,
-help="Recursive. [default: %default]")
-parser.add_option("-s", action="store_true", dest="shelve", default=False,
-help="Use the shelve dBase rather than storing the internal representation in memory. This is slower but is useful for large data sets where a memory error might occur. [default: %default]")
-parser.add_option("-u", action="store_true", dest="unit_test", default=False,
-help="Execute unit tests and exit. [default: %default]")
-parser.add_option("-x", action="store_true", dest="ext_url", default=False,
-help="Test external |URLs. [default: %default]")
-parser.add_option("-?", action="store_true", dest="query_errors", default=False,
-help="Display the error types that are detected. [default: %default]")
-(options, args) = parser.parse_args()
-logging.basicConfig(
-level=options.loglevel,
-format='%(asctime)s %(levelname)-8s %(message)s',
-stream=sys.stdout,
-)
-if options.file not in ('None', 'generic', 'specific'):
-parser.error("--file option must be: 'None' | 'generic' | 'specific'")
-return 1
-if options.unit_test:
-unitTest()
-if options.query_errors:
-writeGenericStringsForErrorCodes()
-if len(args) < 1 and not options.unit_test:
-parser.print_help()
-parser.error("I can't do much without a path to the XML content.")
-return 1
-elif len(args) == 1:
-if options.jobs > -1:
-myObj = retMpDitaFileSetObj(
-args[0],
-options.pattern.split(' '),
-options.recursive,
-options.jobs,
-options.ext_url,
-options.shelve,
-)
-else:
-myObj = DitaFileSet(args[0],
-procDir=True,
-thePatterns=options.pattern.split(' '),
-recursive=options.recursive,
-testExt=options.ext_url,
-useDbase=options.shelve,
-)
-#print 'MyObj:', myObj
-if options.dump:
-myObj.debugDump()
-myObj.writeStatistics()
-myObj.writeErrorSummary()
-#pprint.pprint(myObj.statsMap)
-# TODO: Write out the results in different ways
-errFilter = set(PROBLEM_CODE_FORMAT.keys())
-if options.error_codes != 'All':
-errFilter = set([int(i) for i in options.error_codes.split()])
-if options.file == 'generic':
-print 'Generic problems:'
-myObj.writeErrors(True, errFilter)
-elif options.file == 'specific':
-print 'Specific problems:'
-myObj.writeErrors(False, errFilter)
-elif len(args) > 1:
-parser.error("Too many arguments, I need only one.")
-return 1
-return 0
-if __name__ == '__main__':
-multiprocessing.freeze_support()
-sys.exit(main())

changeset 4	468f4c8d3d5b
parent 3	d8fccb2cd802