1 # Copyright (c) 2007-2010 Nokia Corporation and/or its subsidiary(-ies) All rights reserved. |
|
2 # This component and the accompanying materials are made available under the terms of the License |
|
3 # "Eclipse Public License v1.0" which accompanies this distribution, |
|
4 # and is available at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
5 # |
|
6 # Initial Contributors: |
|
7 # Nokia Corporation - initial contribution. |
|
8 # |
|
9 # Contributors: |
|
10 # |
|
11 # Description: |
|
12 # Checks links in DITA XML and reports issues. |
|
13 """ |
|
14 Created on 12 Feb 2010 |
|
15 |
|
16 @author: p2ross |
|
17 |
|
18 Definitions |
|
19 =========== |
|
20 Doctype |
|
21 ------- |
|
22 See: http://www.w3.org/TR/2008/REC-xml-20081126/#dt-root |
|
23 Note: this is sometimes called the Doctype because of http://www.w3.org/TR/2008/REC-xml-20081126/#vc-roottype |
|
24 |
|
25 ID |
|
26 -- |
|
27 The value of the 'id' attribute of an element. |
|
28 |
|
29 Root ID |
|
30 ------- |
|
31 The value of the 'id' attribute of the root element. |
|
32 Note: A development would allow differently named attributes provided that they |
|
33 were ID types. See http://www.w3.org/TR/2008/REC-xml-20081126/#sec-attribute-types |
|
34 for validity constraints for ID types. |
|
35 |
|
36 Reference |
|
37 --------- |
|
38 The value of the href attribute of an element. |
|
39 |
|
40 Map |
|
41 --- |
|
42 An XML file whose root element name is 'map' or ends with 'Map'. |
|
43 |
|
44 Topic |
|
45 ----- |
|
46 An XML file that is not a Map. |
|
47 |
|
48 Lonely topic |
|
49 ------------ |
|
50 A topic whose root ID is not referenced by any map. |
|
51 |
|
52 Lonely map |
|
53 ---------- |
|
54 A map whose root ID is not referenced by any map. |
|
55 |
|
56 Map Cycle |
|
57 --------- |
|
58 A sequence of map references whose members are not unique. |
|
59 |
|
60 """ |
|
61 |
|
62 import os |
|
63 import unittest |
|
64 import sys |
|
65 import logging |
|
66 import pprint |
|
67 import fnmatch |
|
68 import re |
|
69 import urllib |
|
70 import time |
|
71 from optparse import OptionParser, check_choice |
|
72 try: |
|
73 from xml.etree import cElementTree as etree |
|
74 except ImportError: |
|
75 from xml.etree import ElementTree as etree |
|
76 import urlparse |
|
77 import multiprocessing |
|
78 # used for DitaFileObj persistence |
|
79 import shelve |
|
80 |
|
81 __version__ = '0.1.5' |
|
82 |
|
83 class ExceptionLinkCheck(Exception): |
|
84 pass |
|
85 |
|
86 class CountDict(dict): |
|
87 """Dictionary with a default value of 0 for unknown keys.""" |
|
88 def __getitem__(self, key): |
|
89 if key not in self: |
|
90 self[key] = 0 |
|
91 return self.get(key) |
|
92 |
|
93 # Matches stuff like: GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E |
|
94 RE_GUID = re.compile(r'GUID-[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}', re.IGNORECASE) |
|
95 |
|
96 # Of the form {integer_error_code : (format_string, num_args), ...} |
|
97 PROBLEM_CODE_FORMAT = { |
|
98 # 'id_syntax' |
|
99 100 : ('Character \'#\' not allowed in id="%s"', 1), |
|
100 101 : ('NMTOKEN character \'%s\' not allowed in id="%s"', 2), |
|
101 102 : ('GUID specification does not match id="%s"', 1), |
|
102 # 'ref_syntax' |
|
103 200 : ('Multiple \'#\' not allowed in reference "%s"', 1), |
|
104 201 : ('Reference element "%s" is missing href=... attribute', 1), |
|
105 202 : ('URL has missing type/format in reference "%s"', 1), |
|
106 203 : ('GUID specification does not match file reference "%s"', 1), |
|
107 204 : ('GUID specification does not match fragment reference "%s"', 1), |
|
108 # 'ref' |
|
109 300 : ('Can not resolve URI "%s"', 1), |
|
110 # 'file' |
|
111 400 : ('Failed to open: "%s"', 1), |
|
112 401 : ('Multiple id="%s"', 1), |
|
113 402 : ('No id attribute on root element', 0), |
|
114 403 : ('Root ID in cycle: %s', 1), |
|
115 404 : ('Can not parse: "%s"', 1), |
|
116 410 : ('Can not resolve reference to file "%s"', 1), |
|
117 411 : ('Can resolve reference to file "%s" but not to fragment "%s"', 2), |
|
118 412 : ('Referencing element "%s" does not match target root element "%s"', 2), |
|
119 413 : ('Referencing element "%s" does not match target element "%s" for id="%s"', 3), |
|
120 414 : ('topicref element with format="ditamap" does not match target root element "%s"', 1), |
|
121 415 : ('topicref to <map> does not have format="ditamap" but format="%s"', 1), |
|
122 416 : ('topicref element type="%s" does not match target root element "%s"', 2), |
|
123 417 : ('topicref element type="%s" does not match target element "%s" for id="%s"', 3), |
|
124 418 : ('Unknown referencing element "%s" does not match target root element "%s"', 2), |
|
125 419 : ('Unknown referencing element "%s" does not match target element "%s" for id="%s"', 3), |
|
126 # 'file_set' |
|
127 500 : ('Not a directory: %s', 1), |
|
128 501 : ('Duplicate root id="%s" in files: %s', 2), |
|
129 #502 : ('Can not resolve reference to "%s"', 1), |
|
130 #503 : ('Reference type "%s" does not match target type "%s" for id="%s"', 3), |
|
131 504 : ('Duplicate file path: "%s"', 1), |
|
132 505 : ('Duplicate id="%s" in files: %s', 2), |
|
133 # 'topic_set' |
|
134 600 : ('Topic id="%s" is not referenced by any map', 1), |
|
135 # 'map_set' |
|
136 700 : ('More than one top level map exists: %s', 1), |
|
137 701 : ('Maps "%s" are in a a cycle.', 1), |
|
138 } |
|
139 |
|
140 GENERIC_STRING = '...' |
|
141 PRINT_WIDTH = 75 |
|
142 |
|
143 def genericStringForErrorCode(ec): |
|
144 assert(PROBLEM_CODE_FORMAT.has_key(ec)) |
|
145 f, c = PROBLEM_CODE_FORMAT[ec] |
|
146 if c == 0: |
|
147 return f |
|
148 return f % ((GENERIC_STRING,) * c) |
|
149 |
|
150 def writeGenericStringsForErrorCodes(s=sys.stdout): |
|
151 s.write(' All Error Codes '.center(PRINT_WIDTH, '=')) |
|
152 s.write('\n') |
|
153 s.write('%4s %s\n' % ('Code', 'Error')) |
|
154 s.write('%4s %s\n' % ('----', '-----')) |
|
155 ecS = PROBLEM_CODE_FORMAT.keys() |
|
156 ecS.sort() |
|
157 for ec in ecS: |
|
158 s.write('%4d %s\n' % (ec, genericStringForErrorCode(ec))) |
|
159 s.write('='*PRINT_WIDTH) |
|
160 s.write('\n\n') |
|
161 |
|
162 def normalisePath(thePath): |
|
163 # TODO: How come this does not work? |
|
164 #return os.path.abspath(thePath) |
|
165 return os.path.abspath(thePath).replace('\\', '/') |
|
166 |
|
167 FNMATCH_PATTERNS = ['*.xml', '*.dita', '*.ditamap'] |
|
168 FNMATCH_STRING = ' '.join(FNMATCH_PATTERNS) |
|
169 |
|
170 # These elements descend from topic/xref so can be treated as referencing elements |
|
171 XREF_DESCENDENTS = set( |
|
172 ( |
|
173 # From the api specialisation |
|
174 'apiRelation', |
|
175 'apiBaseClassifier', |
|
176 'apiOtherClassifier', |
|
177 'apiOperationClassifier', |
|
178 'apiValueClassifier', |
|
179 # From the C++ specialisation |
|
180 'cxxfile', |
|
181 'cxxclass', |
|
182 'cxxstruct', |
|
183 'cxxunion', |
|
184 'cxxfunction', |
|
185 'cxxdefine', |
|
186 'cxxtypedef', |
|
187 'cxxvariable', |
|
188 'cxxenumeration', |
|
189 'cxxClassBaseClass', |
|
190 'cxxClassBaseStruct', |
|
191 'cxxClassBaseUnion', |
|
192 'cxxClassNestedClass', |
|
193 'cxxClassNestedStruct', |
|
194 'cxxClassNestedUnion', |
|
195 'cxxClassEnumerationInherited', |
|
196 'cxxClassEnumeratorInherited', |
|
197 'cxxClassFunctionInherited', |
|
198 'cxxClassVariableInherited', |
|
199 'cxxDefineReimplemented', |
|
200 'cxxEnumerationReimplemented', |
|
201 'cxxFunctionReimplemented', |
|
202 'cxxStructBaseClass', |
|
203 'cxxStructBaseStruct', |
|
204 'cxxStructBaseUnion', |
|
205 'cxxStructNestedClass', |
|
206 'cxxStructNestedStruct', |
|
207 'cxxStructNestedUnion', |
|
208 'cxxStructEnumerationInherited', |
|
209 'cxxStructEnumeratorInherited', |
|
210 'cxxStructFunctionInherited', |
|
211 'cxxStructVariableInherited', |
|
212 'cxxTypedefReimplemented', |
|
213 'cxxUnionBaseClass', |
|
214 'cxxUnionBaseStruct', |
|
215 'cxxUnionBaseUnion', |
|
216 'cxxUnionNestedClass', |
|
217 'cxxUnionNestedStruct', |
|
218 'cxxUnionNestedUnion', |
|
219 'cxxUnionEnumerationInherited', |
|
220 'cxxUnionFunctionInherited', |
|
221 'cxxUnionVariableInherited', |
|
222 'cxxVariableReimplemented', |
|
223 ) |
|
224 ) |
|
225 |
|
226 class UrlAccessCache(object): |
|
227 def __init__(self): |
|
228 # {URL : True/False, ...} |
|
229 self._cache = {} |
|
230 |
|
231 def clear(self): |
|
232 self._cache = {} |
|
233 |
|
234 def canAccess(self, theUrl): |
|
235 if not self._cache.has_key(theUrl): |
|
236 try: |
|
237 u = urllib.urlopen(theUrl)#, data, proxies) |
|
238 u.read() |
|
239 self._cache[theUrl] = True |
|
240 logging.debug('URL: %s for %s' % (True, theUrl)) |
|
241 except IOError: |
|
242 self._cache[theUrl] = False |
|
243 logging.debug('URL: %s for %s' % (False, theUrl)) |
|
244 return self._cache[theUrl] |
|
245 |
|
246 GlobalUrlCache = UrlAccessCache() |
|
247 |
|
248 class DitaLinkCheckBase(object): |
|
249 """Base class that holds some common functionality.""" |
|
250 def __init__(self, theIdentity):#=None): |
|
251 self.__identity = theIdentity |
|
252 # Set of error strings, lazily evaluated |
|
253 self._errS = None |
|
254 |
|
255 @property |
|
256 def identity(self): |
|
257 return self.__identity |
|
258 |
|
259 def __cmp__(self, other): |
|
260 assert(self.identity is not None) |
|
261 assert(other.identity is not None) |
|
262 return cmp(self.identity, other.identity) |
|
263 |
|
264 def __eq__(self, other): |
|
265 assert(self.identity is not None) |
|
266 assert(other.identity is not None) |
|
267 return self.identity == other.identity |
|
268 |
|
269 def __hash__(self): |
|
270 assert(self.identity is not None) |
|
271 return hash(self.identity) |
|
272 |
|
273 def __str__(self): |
|
274 return str(self.__identity) |
|
275 |
|
276 def debugDump(self, s=sys.stdout, prefix=''): |
|
277 """Dump of IR for debug purposes.""" |
|
278 raise NotImplementedError |
|
279 |
|
280 def addError(self, errCode, argTuple): |
|
281 assert(errCode in PROBLEM_CODE_FORMAT.keys()), 'No error code: %s' % errCode |
|
282 assert(PROBLEM_CODE_FORMAT[errCode][1] == len(argTuple)), \ |
|
283 'Length missmatch for error code %d: %d != %d for %s' \ |
|
284 % (errCode, PROBLEM_CODE_FORMAT[errCode][1], len(argTuple), str(argTuple)) |
|
285 if self._errS is None: |
|
286 self._errS = {} |
|
287 try: |
|
288 self._errS[errCode].add(argTuple) |
|
289 except KeyError: |
|
290 self._errS[errCode] = set((argTuple,)) |
|
291 |
|
292 def errStrings(self, generic, theFilter): |
|
293 """Return a sorted list of error messages without duplicates.""" |
|
294 if self._errS is not None: |
|
295 mySet = set() |
|
296 for ec in self._errS.keys(): |
|
297 if theFilter is None or ec in theFilter: |
|
298 assert(ec in PROBLEM_CODE_FORMAT.keys()) |
|
299 for tu in self._errS[ec]: |
|
300 if generic: |
|
301 mySet.add(genericStringForErrorCode(ec)) |
|
302 else: |
|
303 f, c = PROBLEM_CODE_FORMAT[ec] |
|
304 assert(len(tu) == c) |
|
305 mySet.add(f % tu) |
|
306 l = list(mySet) |
|
307 l.sort() |
|
308 return l |
|
309 return [] |
|
310 |
|
311 def updateErrorCount(self, theMap): |
|
312 """Updates a map of {error_code, : count, ...}. |
|
313 Overridden for file and file set.""" |
|
314 if self._errS is not None: |
|
315 for e in self._errS.keys(): |
|
316 theMap[e] += len(self._errS[e]) |
|
317 |
|
318 def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout): |
|
319 """Can be overridden in child classes to recurse into |
|
320 their data structures.""" |
|
321 theStream.write('\n'.join(self.errStrings(isGeneric, theFilter))) |
|
322 |
|
323 class DitaId(DitaLinkCheckBase): |
|
324 """Represents a node with an id.""" |
|
325 def __init__(self, theN): |
|
326 assert(theN.get('id', None) is not None) |
|
327 super(DitaId, self).__init__(theN.get('id', None)) |
|
328 self._elem = theN.tag |
|
329 if '#' in self.id: |
|
330 self.addError(100, (self.id,)) |
|
331 # TODO: NMTOKENS |
|
332 |
|
333 @property |
|
334 def elem(self): |
|
335 return self._elem |
|
336 |
|
337 @property |
|
338 def id(self): |
|
339 return self.identity |
|
340 |
|
341 def checkGuid(self): |
|
342 """optionally applies additional checks for GUID requirements.""" |
|
343 if RE_GUID.match(self.id) is None: |
|
344 self.addError(102, (self.id,)) |
|
345 |
|
346 def debugDump(self, s=sys.stdout, prefix=''): |
|
347 """Dump of IR for debug purposes.""" |
|
348 s.write('%sID: <%s id="%s" />\n' % (prefix, self.elem, self.id)) |
|
349 |
|
350 class DitaRef(DitaLinkCheckBase): |
|
351 """Represents a reference node.""" |
|
352 def __init__(self, theN): |
|
353 self._elem = theN.tag |
|
354 self._href = theN.get('href', None) |
|
355 super(DitaRef, self).__init__('%s %s' % (self._elem, self._href)) |
|
356 # This is used when figuring out of the target is the correct element |
|
357 # e.g. in Vanilla DITA |
|
358 # <topicref href="batcaring.dita" type="task"></topicref> |
|
359 self._refType = theN.get('type', None) |
|
360 # Format attribute, this can be format="ditamap" |
|
361 self._format = theN.get('format', None) |
|
362 if self._href is None: |
|
363 self.addError(201, (self._elem,)) |
|
364 self._url = None |
|
365 else: |
|
366 self._url = urlparse.urlparse(self._href) |
|
367 if '#' in self._url.fragment: |
|
368 self.addError(200, (self._href,)) |
|
369 |
|
370 @property |
|
371 def elem(self): |
|
372 return self._elem |
|
373 |
|
374 @property |
|
375 def href(self): |
|
376 """The value of the href attribute.""" |
|
377 return self._href |
|
378 |
|
379 @property |
|
380 def refType(self): |
|
381 """The value of the type attribute.""" |
|
382 return self._refType |
|
383 |
|
384 @property |
|
385 def format(self): |
|
386 """The value of the format attribute.""" |
|
387 return self._format |
|
388 |
|
389 @property |
|
390 def path(self): |
|
391 """The value of the path part of the href attribute.""" |
|
392 return self._url.path |
|
393 |
|
394 @property |
|
395 def fragment(self): |
|
396 """The value of the fragment part of the href attribute.""" |
|
397 return self._url.fragment |
|
398 |
|
399 @property |
|
400 def scheme(self): |
|
401 """The URI scheme e.g. 'http' or '' if no scheme.""" |
|
402 return self._url.scheme |
|
403 |
|
404 def fileFragment(self, theRefFile): |
|
405 """The absolute path of the file and the fragment identifier or (None, None).""" |
|
406 if self.scheme not in ('', 'file'): |
|
407 return (None, None) |
|
408 if len(self.path) == 0: |
|
409 myPath = theRefFile |
|
410 else: |
|
411 myPath = os.path.join(os.path.dirname(theRefFile), self.path) |
|
412 return normalisePath(myPath), self.fragment |
|
413 |
|
414 def checkGuid(self): |
|
415 """optionally applies additional checks for GUID requirements.""" |
|
416 if RE_GUID.match(self.path) is None: |
|
417 self.addError(203, (self.path,)) |
|
418 if RE_GUID.match(self.fragment) is None: |
|
419 self.addError(204, (self.fragment,)) |
|
420 |
|
421 def checkUrl(self): |
|
422 if self.scheme: |
|
423 myU = urlparse.urlunparse(self._url) |
|
424 if not GlobalUrlCache.canAccess(myU): |
|
425 self.addError(300, (myU,)) |
|
426 |
|
427 def debugDump(self, s=sys.stdout, prefix=''): |
|
428 """Dump of IR for debug purposes.""" |
|
429 s.write('%sREF: <%s href="%s" />\n' % (prefix, self.elem, self._href)) |
|
430 |
|
431 class DitaFileObj(DitaLinkCheckBase): |
|
432 """Base class for a DITA topic or map.""" |
|
433 def __init__(self, theFileObj, theFileName=None): |
|
434 """Initialiser with a file object and a file path""" |
|
435 #print '\nDitaFileObj(%s, %s)' % (theFileObj, theFileName) |
|
436 if theFileName is not None: |
|
437 super(DitaFileObj, self).__init__(normalisePath(theFileName)) |
|
438 elif theFileObj is not None: |
|
439 super(DitaFileObj, self).__init__(theFileObj.name) |
|
440 else: |
|
441 super(DitaFileObj, self).__init__(None) |
|
442 self._rootId = None |
|
443 self._doctype = None |
|
444 # Sets of class DitaId |
|
445 self._idS = set() |
|
446 self._dupeIdS = set() |
|
447 # Set of class DitaRef |
|
448 self._xrefS = set() |
|
449 # Ouptut control |
|
450 self._hasWritten = False |
|
451 # Size of input |
|
452 try: |
|
453 self._bytes = os.path.getsize(theFileName) |
|
454 except Exception: |
|
455 # Try as if a StringIO |
|
456 try: |
|
457 self._bytes = theFileObj.len |
|
458 except AttributeError: |
|
459 # Give up |
|
460 self._bytes = 0 |
|
461 # Process the file object |
|
462 if theFileObj is not None: |
|
463 try: |
|
464 # TODO: use iterparse? |
|
465 theTree = etree.parse(theFileObj) |
|
466 except SyntaxError, err: |
|
467 self.addError(404, (str(err),)) |
|
468 else: |
|
469 # Walk the tree |
|
470 for i, e in enumerate(theTree.getiterator()): |
|
471 #print 'TRACE: e', e |
|
472 # Element [0] is the root element |
|
473 if i == 0: |
|
474 assert(self._rootId is None) |
|
475 assert(self._doctype is None) |
|
476 self._doctype = e.tag |
|
477 if e.get('id', None) is not None: |
|
478 self._rootId = DitaId(e) |
|
479 self._addId(self._rootId) |
|
480 else: |
|
481 self.addError(402, ()) |
|
482 else: |
|
483 # NOTE: Elements with id attributes can also have href |
|
484 # attributes. For example a <topicref> in a <bookmap> |
|
485 # Thus these tests are not exclusive |
|
486 if e.get('id', None) is not None: |
|
487 self._addId(DitaId(e)) |
|
488 if e.get('href', None) is not None: |
|
489 # TODO: Do we limit ourselves to only a certain set of elements? |
|
490 self._xrefS.add(DitaRef(e)) |
|
491 else: |
|
492 self.addError(400, (self.identity,)) |
|
493 |
|
494 def _addId(self, theId): |
|
495 #print 'TRACE: adding %s' % theId |
|
496 #print 'TRACE: self._idS %s' % self._idS |
|
497 if theId in self._idS: |
|
498 # Remove from self._idS |
|
499 #print 'TRACE: removing %s' % theId |
|
500 self._idS.remove(theId) |
|
501 self._dupeIdS.add(theId) |
|
502 self.addError(401, (theId.identity,)) |
|
503 elif theId not in self._dupeIdS: |
|
504 self._idS.add(theId) |
|
505 |
|
506 @property |
|
507 def bytes(self): |
|
508 return self._bytes |
|
509 |
|
510 @property |
|
511 def doctype(self): |
|
512 return self._doctype |
|
513 |
|
514 @property |
|
515 def rootId(self): |
|
516 if self._rootId is not None: |
|
517 return self._rootId.id |
|
518 |
|
519 @property |
|
520 def isMap(self): |
|
521 return self.doctype == "map" \ |
|
522 or self.doctype == 'bookmap' \ |
|
523 or (self.doctype is not None and self.doctype.endswith('Map')) |
|
524 |
|
525 @property |
|
526 def idS(self): |
|
527 """The set of IDs.""" |
|
528 return self._idS |
|
529 |
|
530 @property |
|
531 def refS(self): |
|
532 """The set of DitaRef objects.""" |
|
533 return self._xrefS |
|
534 |
|
535 def idElemMap(self): |
|
536 """Returns a map {id : elem name, ...}.""" |
|
537 retVal = {} |
|
538 for anId in self._idS: |
|
539 retVal[anId.id] = anId.elem |
|
540 return retVal |
|
541 |
|
542 def hasId(self, theString): |
|
543 for anId in self._idS: |
|
544 if theString == anId.id: |
|
545 return True |
|
546 return False |
|
547 |
|
548 def idElem(self, theString): |
|
549 for anId in self._idS: |
|
550 if theString == anId.id: |
|
551 return anId.elem |
|
552 return None |
|
553 |
|
554 def idObj(self, theString): |
|
555 for anId in self._idS: |
|
556 if theString == anId.id: |
|
557 return anId |
|
558 return None |
|
559 |
|
560 def updateErrorCount(self, theMap): |
|
561 """Updates a map of {error_code, : count, ...}.""" |
|
562 if self._errS is not None: |
|
563 for e in self._errS.keys(): |
|
564 theMap[e] += len(self._errS[e]) |
|
565 for idObj in self.idS: |
|
566 idObj.updateErrorCount(theMap) |
|
567 for refObj in self.refS: |
|
568 refObj.updateErrorCount(theMap) |
|
569 |
|
570 def writeErrorList(self, theList, theSubHead='', theS=sys.stdout): |
|
571 if len(theList) > 0: |
|
572 theList.sort() |
|
573 if not self._hasWritten: |
|
574 theS.write('File: %s\n' % self.identity) |
|
575 self._hasWritten = True |
|
576 if len(theSubHead) > 0: |
|
577 theS.write('%s [%d]:\n' % (theSubHead, len(theList))) |
|
578 theS.write('\n'.join(theList)) |
|
579 theS.write('\n') |
|
580 |
|
581 def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout): |
|
582 """Writes out errors for me, my IDs and my Refs.""" |
|
583 self._hasWritten = False |
|
584 self.writeErrorList(self.errStrings(isGeneric, theFilter), 'File errors:', theStream) |
|
585 #=============================================================================== |
|
586 # # Duplicate IDs |
|
587 # myList = (list(self._dupeIdS)) |
|
588 # if len(myList): |
|
589 # self.writeErrorList( |
|
590 # [i.identity for i in myList], |
|
591 # 'Duplicate ID', |
|
592 # theStream) |
|
593 #=============================================================================== |
|
594 # Now IDs |
|
595 myList = (list(self.idS)) |
|
596 myList.sort() |
|
597 for anId in myList: |
|
598 self.writeErrorList(anId.errStrings(isGeneric, theFilter), 'ID=%s' % anId.identity, theStream) |
|
599 # Now Refs |
|
600 myList = (list(self._xrefS)) |
|
601 myList.sort() |
|
602 for anId in myList: |
|
603 self.writeErrorList(anId.errStrings(isGeneric, theFilter), 'Ref=%s' % anId.identity, theStream) |
|
604 if self._hasWritten: |
|
605 theStream.write('\n') |
|
606 |
|
607 def debugDump(self, s=sys.stdout, prefix=''): |
|
608 """Dump of IR for debug purposes.""" |
|
609 s.write('%sFile: %s\n' % (prefix, self.identity)) |
|
610 for anId in self._idS: |
|
611 anId.debugDump(s, prefix=prefix+' ') |
|
612 for aRef in self._xrefS: |
|
613 aRef.debugDump(s, prefix=prefix+' ') |
|
614 |
|
615 class DitaFilePath(DitaFileObj): |
|
616 """Base class for a DITA topic or map from the file system.""" |
|
617 def __init__(self, theFilePath): |
|
618 """Initialiser with a file path""" |
|
619 try: |
|
620 f = open(theFilePath) |
|
621 except IOError: |
|
622 f = None |
|
623 #print 'DitaFilePath(%s)' % theFilePath |
|
624 super(DitaFilePath, self).__init__(f, theFilePath) |
|
625 if f is None: |
|
626 self.addError(400, (theFilePath,)) |
|
627 |
|
628 |
|
629 class DitaFileMapBase(object): |
|
630 """Base class for holding a map of {file path : class DitaFile, ...} |
|
631 Actual implementation can be in-memory or via a database e.g. the |
|
632 shelve module.""" |
|
633 def keys(self): |
|
634 """Returns an unsorted list of keys in the map.""" |
|
635 raise NotImplementedError() |
|
636 |
|
637 def has_key(self, thePath): |
|
638 """Return True if the key exists.""" |
|
639 raise NotImplementedError() |
|
640 |
|
641 def remove(self, thePath): |
|
642 """Remove the entry corresponding to thePath, may raise KeyError.""" |
|
643 raise NotImplementedError() |
|
644 |
|
645 def getDitaFileObj(self, thePath): |
|
646 """Return a DitaFileObj that corresponds to thePath, may raise KeyError.""" |
|
647 raise NotImplementedError() |
|
648 |
|
649 def setDitaFileObj(self, thePath, theObj): |
|
650 """Load a DitaFileObj or update a mutated DitaFileObj.""" |
|
651 raise NotImplementedError() |
|
652 |
|
653 class DitaFileMapInMemory(DitaFileMapBase): |
|
654 """Holds map of {file path : class DitaFile, ...} in memory.""" |
|
655 def __init__(self): |
|
656 # Map of {file path : class DitaFile, ...} |
|
657 self._fileMap = {} |
|
658 |
|
659 def keys(self): |
|
660 """Returns an unsorted list of keys in the map.""" |
|
661 return self._fileMap.keys() |
|
662 |
|
663 def has_key(self, thePath): |
|
664 """Return True if the key exists.""" |
|
665 return self._fileMap.has_key(thePath) |
|
666 |
|
667 def remove(self, thePath): |
|
668 """Remove the entry corresponding to thePath, may raise KeyError.""" |
|
669 del self._fileMap[thePath] |
|
670 |
|
671 def getDitaFileObj(self, thePath): |
|
672 """Return a DitaFileObj that corresponds to thePath, may raise KeyError.""" |
|
673 return self._fileMap[thePath] |
|
674 |
|
675 def setDitaFileObj(self, thePath, theObj): |
|
676 """Load a DitaFileObj or update a mutated DitaFileObj.""" |
|
677 self._fileMap[thePath] = theObj |
|
678 |
|
679 class DitaFileMapShelve(DitaFileMapBase): |
|
680 """Holds map of {file path : class DitaFile, ...} in a shelve database.""" |
|
681 DBASE_FILENAME = 'linkchecker.dbase' |
|
682 def __init__(self): |
|
683 if os.path.exists(self.DBASE_FILENAME): |
|
684 os.remove(self.DBASE_FILENAME) |
|
685 self._db = shelve.open(self.DBASE_FILENAME) |
|
686 # Use this as a 'cache' as shelf.keys() is slow |
|
687 self._keys = set() |
|
688 |
|
689 def keys(self): |
|
690 """Returns an unsorted list of keys in the map.""" |
|
691 return list(self._keys) |
|
692 |
|
693 def has_key(self, thePath): |
|
694 """Return True if the key exists.""" |
|
695 return thePath in self._keys |
|
696 |
|
697 def remove(self, thePath): |
|
698 """Remove the entry corresponding to thePath, may raise KeyError.""" |
|
699 del self._db[thePath] |
|
700 self._keys.remove(thePath) |
|
701 |
|
702 def getDitaFileObj(self, thePath): |
|
703 """Return a DitaFileObj that corresponds to thePath, may raise KeyError.""" |
|
704 return self._db[thePath] |
|
705 |
|
706 def setDitaFileObj(self, thePath, theObj): |
|
707 """Load a DitaFileObj or update a mutated DitaFileObj.""" |
|
708 self._db[thePath] = theObj |
|
709 self._keys.add(thePath) |
|
710 |
|
711 class DitaFileSet(DitaLinkCheckBase): |
|
712 """Holds information about a set of DITA files.""" |
|
713 STATS_KEYS = ('Maps', 'Non-maps', 'Files', 'Bytes', 'IDs', 'Refs') |
|
714 def __init__(self, |
|
715 theDir, |
|
716 procDir=True, |
|
717 thePatterns=None, |
|
718 recursive=False, |
|
719 testExt=False, |
|
720 useDbase=False): |
|
721 """Constructor. theDir is the root directory of DITA XML. |
|
722 procDir - If True then process this directory immediately, otherwise |
|
723 the directory can be processed independently and |
|
724 _addFileObj() or _addDitaFileObj() invoked. |
|
725 thePatterns - If supplied this should be a space separated string of |
|
726 fnmatch extensions. |
|
727 recursive - If True and procDir True the directory is processed recursively. |
|
728 testExt - If True then test external URLs. |
|
729 useDbase - If True then store all DitaFile objects in an external dbase |
|
730 (slower but less memory issues). |
|
731 """ |
|
732 if thePatterns is None: |
|
733 thePatterns = FNMATCH_STRING.split(' ') |
|
734 if theDir is not None: |
|
735 theDir = normalisePath(theDir) |
|
736 super(DitaFileSet, self).__init__(theDir) |
|
737 logging.info('DitaFileSet starting to read...') |
|
738 GlobalUrlCache.clear() |
|
739 self._testExt = testExt |
|
740 # Set up how we store the DitaFile objects |
|
741 if useDbase: |
|
742 self._fileMap = DitaFileMapShelve() |
|
743 else: |
|
744 self._fileMap = DitaFileMapInMemory() |
|
745 # Map of (str(rootId) : filepath, ...) with no duplicates |
|
746 # Keys will be in self._uniqueRootIds |
|
747 self._rootIdToFilePathMap = {} |
|
748 # Path to the unique DITA map |
|
749 self._uniqueMapPath = None |
|
750 # Count of {error_code : count, ...} |
|
751 self._errCountMap = CountDict() |
|
752 # Statistics |
|
753 self._statsMap = CountDict() |
|
754 ## and initialise |
|
755 #for k in self.STATS_KEYS: |
|
756 # self._statsMap[k] |
|
757 # Finalisation control (weak) |
|
758 self._hasFinalised = False |
|
759 # Timers |
|
760 self._timeRead = time.clock() |
|
761 self._timeAnalyse = 0.0 |
|
762 if procDir: |
|
763 if theDir is not None and os.path.isdir(theDir): |
|
764 self._readDir(theDir, thePatterns, recursive) |
|
765 else: |
|
766 self.addError(500, (theDir,)) |
|
767 # Finalise and run all the tests |
|
768 self.finalise() |
|
769 |
|
770 @property |
|
771 def errCountMap(self): |
|
772 return self._errCountMap |
|
773 |
|
774 @property |
|
775 def statsMap(self): |
|
776 return self._statsMap |
|
777 |
|
778 def writeStatistics(self, s=sys.stdout): |
|
779 """Writes out read statistics.""" |
|
780 s.write(' Statistics '.center(PRINT_WIDTH, '=')) |
|
781 s.write('\n') |
|
782 if len(self._statsMap) > 0: |
|
783 o = self.STATS_KEYS |
|
784 #assert(set(o) == set(self._statsMap.keys())), \ |
|
785 # '%s != %s' % (o, self._statsMap.keys()) |
|
786 for k in o: |
|
787 try: |
|
788 m = self._statsMap[k] / (1024.0*1024.0) |
|
789 s.write('%20s: %10d [%10.3f M]\n' % (k, self._statsMap[k], m)) |
|
790 except KeyError: |
|
791 s.write('%20s: %10s \n' % (k, 'Not seen')) |
|
792 s.write('%20s: %10.3f (s)\n' % ('Read time', self._timeRead)) |
|
793 s.write('%20s: %10.3f (s)\n' % ('Analysis time', self._timeAnalyse)) |
|
794 s.write('='*PRINT_WIDTH) |
|
795 else: |
|
796 s.write('Nothing processed.') |
|
797 s.write('\n') |
|
798 |
|
799 def writeErrorSummary(self, s=sys.stdout): |
|
800 s.write(' Error Summary '.center(PRINT_WIDTH, '=')) |
|
801 s.write('\n') |
|
802 if len(self._errCountMap): |
|
803 s.write('%4s %10s %s\n' % ('Code', 'Count', 'Error')) |
|
804 s.write('%4s %10s %s\n' % ('----', '-----', '-----')) |
|
805 errCodeS = self._errCountMap.keys() |
|
806 errCodeS.sort() |
|
807 for c in errCodeS: |
|
808 s.write('%4d %10d %s\n' \ |
|
809 % (c, self._errCountMap[c], genericStringForErrorCode(c))) |
|
810 else: |
|
811 s.write('No errors\n') |
|
812 s.write('='*PRINT_WIDTH) |
|
813 s.write('\n') |
|
814 |
|
815 def writeErrors(self, isGeneric, theFilter, theStream=sys.stdout): |
|
816 """Writes out errors for me and my files.""" |
|
817 theStream.write('\n'.join(self.errStrings(isGeneric, theFilter))) |
|
818 fileS = self._fileMap.keys() |
|
819 fileS.sort() |
|
820 for aFile in fileS: |
|
821 # Immutable call so just use get |
|
822 self._fileMap.getDitaFileObj(aFile).writeErrors(isGeneric, theFilter, theStream) |
|
823 |
|
824 def allErrStrings(self, isGeneric, theFilter): |
|
825 """Return a sorted list of error messages without duplicates including |
|
826 files.""" |
|
827 retSet = set(self.errStrings(isGeneric, theFilter)) |
|
828 fileS = self._fileMap.keys() |
|
829 fileS.sort() |
|
830 for aFilePath in self._fileMap.keys(): |
|
831 # Immutable call so just use get |
|
832 for anErr in self._fileMap.getDitaFileObj(aFilePath).errStrings(isGeneric, theFilter): |
|
833 retSet.add(anErr) |
|
834 retList = list(retSet) |
|
835 retList.sort() |
|
836 return retList |
|
837 |
|
838 def _readDir(self, theDir, thePatS, recursive): |
|
839 assert(os.path.isdir(theDir)) |
|
840 for aName in os.listdir(theDir): |
|
841 aPath = os.path.join(theDir, aName) |
|
842 if os.path.isdir(aPath) and recursive: |
|
843 self._readDir(aPath, thePatS, recursive) |
|
844 elif os.path.isfile(aPath): |
|
845 for aPat in thePatS: |
|
846 if fnmatch.fnmatch(aName, aPat): |
|
847 assert(not self._fileMap.has_key(aPath)) |
|
848 logging.debug(' Reading %s' % aPath) |
|
849 try: |
|
850 f = open(aPath) |
|
851 except IOError: |
|
852 f = None |
|
853 self._addFileObj(f, aPath) |
|
854 break |
|
855 |
|
856 def _addFileObj(self, theFileObj, theFilePath): |
|
857 myObj = DitaFileObj(theFileObj, theFilePath) |
|
858 self._addDitaFileObj(myObj) |
|
859 |
|
860 def _addDitaFileObj(self, theDitaFileObj): |
|
861 if self._fileMap.has_key(theDitaFileObj.identity): |
|
862 self.addError(504, (theDitaFileObj.identity,)) |
|
863 else: |
|
864 # Mutable call so use set |
|
865 self._fileMap.setDitaFileObj(theDitaFileObj.identity, theDitaFileObj) |
|
866 # Update statistics (files, bytes, ids, refs) etc. |
|
867 self._statsMap['Files'] += 1 |
|
868 self._statsMap['Bytes'] += theDitaFileObj.bytes |
|
869 self._statsMap['IDs'] += len(theDitaFileObj.idS) |
|
870 self._statsMap['Refs'] += len(theDitaFileObj.refS) |
|
871 if theDitaFileObj.isMap: |
|
872 self._statsMap['Maps'] += 1 |
|
873 else: |
|
874 self._statsMap['Non-maps'] += 1 |
|
875 |
|
876 def finalise(self): |
|
877 """Creates the environment for all checks and then runs them.""" |
|
878 logging.info('DitaFileSet.finalise() start...') |
|
879 if not self._hasFinalised: |
|
880 self._timeRead = time.clock() - self._timeRead |
|
881 self._timeAnalyse = time.clock() |
|
882 self._initRootIdToFilePathMap() |
|
883 self._checkDupeIdS() |
|
884 self._setMapCycles() |
|
885 self._checkLonely() |
|
886 self._checkRefArcs() |
|
887 self._errCountMap = CountDict() |
|
888 self.updateErrorCount(self._errCountMap) |
|
889 self._hasFinalised = True |
|
890 self._timeAnalyse = time.clock() - self._timeAnalyse |
|
891 logging.info('DitaFileSet.finalise() done.') |
|
892 |
|
893 def _initRootIdToFilePathMap(self): |
|
894 # Map of (str(rootId) : filepath, ...) with no duplicates |
|
895 self._rootIdToFilePathMap = {} |
|
896 # Temporary map of (str(rootId) : [filepath, ...], ...) |
|
897 myDupeIdFiles = {} |
|
898 for fPath in self._fileMap.keys(): |
|
899 # fObj is not written to so we don't need to use set |
|
900 fObj = self._fileMap.getDitaFileObj(fPath) |
|
901 #print 'TRACE: _initRootIdToFilePathMap() fPath:', fPath |
|
902 rId = fObj.rootId |
|
903 if rId is not None: |
|
904 if myDupeIdFiles.has_key(rId): |
|
905 #print 'TRACE: _initRootIdToFilePathMap() another dupe:', fPath |
|
906 myDupeIdFiles[rId].append(fObj.identity) |
|
907 elif self._rootIdToFilePathMap.has_key(rId): |
|
908 #print 'TRACE: _initRootIdToFilePathMap() first dupe:', fPath |
|
909 # Remove from map and add to myDupeIdFiles |
|
910 myFile = self._rootIdToFilePathMap.pop(rId) |
|
911 try: |
|
912 myDupeIdFiles[rId].append(myFile) |
|
913 except KeyError: |
|
914 myDupeIdFiles[rId] = [myFile,] |
|
915 myDupeIdFiles[rId].append(fPath) |
|
916 else: |
|
917 #print 'TRACE: _initRootIdToFilePathMap() adding:', fPath |
|
918 self._rootIdToFilePathMap[rId] = fObj.identity |
|
919 # Set duplicate errors |
|
920 for k in myDupeIdFiles.keys(): |
|
921 myDupeIdFiles[k].sort() |
|
922 self.addError(501, (k, tuple(myDupeIdFiles[k]))) |
|
923 #self.addError(501, (k, str([str(a) for a in myDupeIdFiles[k]]))) |
|
924 |
|
925 def _checkDupeIdS(self): |
|
926 """Checks if there are any duplicate IDs anywhere.""" |
|
927 # {ID : [fileS, ...], ...} |
|
928 myDupeIdMap = {} |
|
929 # Temporary data structure |
|
930 # {ID : first file ID is seen in, ...} |
|
931 seenIdMap = {} |
|
932 for f in self._fileMap.keys(): |
|
933 # o is not written to so we don't need set... |
|
934 o = self._fileMap.getDitaFileObj(f) |
|
935 for anId in o.idS: |
|
936 if seenIdMap.has_key(anId): |
|
937 try: |
|
938 myDupeIdMap[anId].append(f) |
|
939 except KeyError: |
|
940 myDupeIdMap[anId] = [seenIdMap[anId],] |
|
941 myDupeIdMap[anId].append(f) |
|
942 else: |
|
943 seenIdMap[anId] = f |
|
944 # Now add to errs as a 505 error message |
|
945 # Sort the files in the map |
|
946 for k in myDupeIdMap.keys(): |
|
947 myDupeIdMap[k].sort() |
|
948 self.addError(505, (k, tuple(myDupeIdMap[k]))) |
|
949 #self.addError(505, (k, str([str(a) for a in myDupeIdMap[k]]))) |
|
950 |
|
951 def _retMapAdjList(self): |
|
952 """Create an adjacency list {file_path : set(refs), ...} (all strings)""" |
|
953 adjList = {} |
|
954 for f in self._fileMap.keys(): |
|
955 fObj = self._fileMap.getDitaFileObj(f) |
|
956 if fObj.isMap:# and fObj.rootId is not None: |
|
957 assert(fObj.identity not in adjList.keys()) |
|
958 refSet = set() |
|
959 for r in fObj.refS: |
|
960 refSet.add(r.fileFragment(fObj.identity)[0]) |
|
961 adjList[fObj.identity] = refSet |
|
962 return adjList |
|
963 |
|
964 def _setMapCycles(self): |
|
965 """Sets any cyclic references seen in DITA maps.""" |
|
966 adjList = self._retMapAdjList() |
|
967 # A branch |
|
968 myBr = [] |
|
969 myCycles = set() |
|
970 for aPath, aSet in adjList.items(): |
|
971 myBr.append(aPath) |
|
972 self._recurseCycles(adjList, myBr, myCycles) |
|
973 myBr.pop() |
|
974 self._setCycleErrors(myCycles) |
|
975 |
|
976 def _recurseCycles(self, a, b, c): |
|
977 assert(len(b) > 0) |
|
978 try: |
|
979 myPath = b[-1] |
|
980 for r in a[myPath]: |
|
981 #print '_recurseCycles() testing r', r |
|
982 #print '_recurseCycles() testing b', b |
|
983 if r in b: |
|
984 #print 'Adding cycle', tuple(b[b.index(r):]) |
|
985 c.add(tuple(b[b.index(r):])) |
|
986 else: |
|
987 b.append(r) |
|
988 self._recurseCycles(a, b, c) |
|
989 b.pop() |
|
990 except KeyError: |
|
991 pass |
|
992 |
|
993 def _setCycleErrors(self, theC): |
|
994 for aT in theC: |
|
995 self.addError(701, (str(aT),)) |
|
996 myL = list(aT) |
|
997 assert(len(myL) > 0) |
|
998 i = 0 |
|
999 while i < len(myL): |
|
1000 myL.append(myL[0]) |
|
1001 # Should this be in the file thus, or in the files set? |
|
1002 # As we are mutating the file object we need to use both |
|
1003 # getDitaFileObj() and setDitaFileObj() |
|
1004 fObj = self._fileMap.getDitaFileObj(myL[0]) |
|
1005 fObj.addError(701, (str(myL),)) |
|
1006 self._fileMap.setDitaFileObj(myL[0], fObj) |
|
1007 myL.pop() |
|
1008 myL.append(myL.pop(0)) |
|
1009 i += 1 |
|
1010 |
|
1011 def _checkLonely(self): |
|
1012 self._checkLonelyMaps() |
|
1013 self._checkLonelyTopics() |
|
1014 |
|
1015 def _checkLonelyMaps(self): |
|
1016 """Checks for lonely maps.""" |
|
1017 mapPathSet = set() |
|
1018 pathSetRemain = set() |
|
1019 for f in self._fileMap.keys(): |
|
1020 if self._fileMap.getDitaFileObj(f).isMap: |
|
1021 mapPathSet.add(f) |
|
1022 pathSetRemain.add(f) |
|
1023 for aPath in mapPathSet: |
|
1024 myMapObj = self._fileMap.getDitaFileObj(aPath) |
|
1025 for r in myMapObj.refS: |
|
1026 refFile, frag = r.fileFragment(f) |
|
1027 try: |
|
1028 pathSetRemain.remove(refFile) |
|
1029 except KeyError: |
|
1030 # refFile is a topic or an already seen map |
|
1031 pass |
|
1032 if len(pathSetRemain) > 1: |
|
1033 for aPath in pathSetRemain: |
|
1034 self.addError(700, (aPath,)) |
|
1035 elif len(pathSetRemain) == 1: |
|
1036 self._uniqueMapPath = pathSetRemain.pop() |
|
1037 |
|
1038 def _checkLonelyTopics(self): |
|
1039 """Checks for topics that are not referenced by any map.""" |
|
1040 mapPathSet = set() |
|
1041 pathSetRemain = set() |
|
1042 for f in self._fileMap.keys(): |
|
1043 #print 'TRACE: f:', f |
|
1044 if self._fileMap.getDitaFileObj(f).isMap: |
|
1045 mapPathSet.add(f) |
|
1046 else: |
|
1047 pathSetRemain.add(f) |
|
1048 #print 'TRACE: mapPathSet', mapPathSet |
|
1049 #print 'TRACE: pathSetRemain', pathSetRemain |
|
1050 for aMapPath in mapPathSet: |
|
1051 myMapObj = self._fileMap.getDitaFileObj(aMapPath) |
|
1052 for r in myMapObj.refS: |
|
1053 refFile, frag = r.fileFragment(aMapPath) |
|
1054 #print 'TRACE: removing:', refFile |
|
1055 try: |
|
1056 pathSetRemain.remove(refFile) |
|
1057 except KeyError: |
|
1058 # topic has already been seen in another map |
|
1059 pass |
|
1060 if len(pathSetRemain) > 0: |
|
1061 for aPath in pathSetRemain: |
|
1062 self.addError(600, (aPath,)) |
|
1063 |
|
1064 def _checkRefArcs(self): |
|
1065 """Checks all references are reachable.""" |
|
1066 for fPath in self._fileMap.keys(): |
|
1067 fObjSrc = self._fileMap.getDitaFileObj(fPath) |
|
1068 hasMutated = False |
|
1069 for rObjSrc in fObjSrc.refS: |
|
1070 if rObjSrc.scheme: |
|
1071 # Decide whether to test and external URL |
|
1072 if self._testExt: |
|
1073 rObjSrc.checkUrl() |
|
1074 else: |
|
1075 fi, fr = rObjSrc.fileFragment(fPath) |
|
1076 assert(fi is not None), 'fi is None for rObjSrc: %s in file: %s' % (rObjSrc, fPath) |
|
1077 assert(fr is not None), 'fr is None for rObjSrc: %s in file: %s' % (rObjSrc, fPath) |
|
1078 ## If a url then fileFragment() returns (None, None) |
|
1079 #if fi is None: |
|
1080 # print 'fPath', fPath |
|
1081 # print 'rObjSrc', rObjSrc |
|
1082 # print 'fi', fi |
|
1083 # print 'fr', fr |
|
1084 try: |
|
1085 fObjTgt = self._fileMap.getDitaFileObj(fi) |
|
1086 except KeyError: |
|
1087 # Target file can not be found in the IR |
|
1088 # check the file system to see if it is a non-DITA resource |
|
1089 if not os.path.isfile(fi): |
|
1090 #print 'TRACE: adding 410 to', fObj.identity |
|
1091 fObjSrc.addError(410, (fi,)) |
|
1092 hasMutated = True |
|
1093 else: |
|
1094 if len(fr) > 0: |
|
1095 # Target file is found, test fragment |
|
1096 if not fObjTgt.hasId(fr): |
|
1097 # Fragment not found |
|
1098 fObjSrc.addError(411, (fi, fr)) |
|
1099 hasMutated = True |
|
1100 if self._checkRefArcElemName(fObjSrc, rObjSrc, fObjTgt, fr): |
|
1101 hasMutated = True |
|
1102 if hasMutated: |
|
1103 self._fileMap.setDitaFileObj(fPath, fObjSrc) |
|
1104 |
|
1105 def _checkRefArcElemName(self, fObjSrc, rObjSrc, fObjTgt, frag): |
|
1106 """Test source and target element names |
|
1107 e.g. Source <cxxClassRef> should match target <cxxClass> |
|
1108 And in vanilla DITA: |
|
1109 <topicref href="batcaring.dita" type="task"></topicref> |
|
1110 or: |
|
1111 <topicref href="batcaring.dita" format="ditamap"></topicref> |
|
1112 Should match target element <task>.""" |
|
1113 isRootTgt = False |
|
1114 hasMutated = False |
|
1115 if len(frag) == 0: |
|
1116 # iObjTgt is the root element of fObjTgt |
|
1117 if fObjTgt.rootId is None or fObjTgt.idElem(fObjTgt.rootId) is None: |
|
1118 # Covered by other error codes |
|
1119 return |
|
1120 iObjTgt = fObjTgt.idObj(fObjTgt.rootId) |
|
1121 isRootTgt = True |
|
1122 elif fObjTgt.hasId(frag): |
|
1123 iObjTgt = fObjTgt.idObj(frag) |
|
1124 else: |
|
1125 # frag not found that will be a 411 error (handled by caller). |
|
1126 return |
|
1127 # Have an rObjSrc + iObjTgt so check elements |
|
1128 # First case: |
|
1129 if rObjSrc.elem.endswith('Ref'): |
|
1130 if rObjSrc.elem[:-3] != iObjTgt.elem: |
|
1131 if isRootTgt: |
|
1132 fObjSrc.addError(412, (rObjSrc.elem, iObjTgt.elem)) |
|
1133 else: |
|
1134 fObjSrc.addError(413, (fObjTgt.idElem(frag), rObjSrc.elem, frag)) |
|
1135 hasMutated = True |
|
1136 # Second case(s) for vanilla DITA |
|
1137 elif rObjSrc.elem == 'topicref': |
|
1138 # Check DITA map links |
|
1139 if rObjSrc.format == 'ditamap' and iObjTgt.elem != 'map': |
|
1140 # Target must be a root element (actually we don't care) |
|
1141 fObjSrc.addError(414, (iObjTgt.elem,)) |
|
1142 hasMutated = True |
|
1143 elif iObjTgt.elem == 'map' and rObjSrc.format != 'ditamap': |
|
1144 fObjSrc.addError(415, (rObjSrc.format,)) |
|
1145 hasMutated = True |
|
1146 elif not (rObjSrc.format == 'ditamap' and iObjTgt.elem == 'map'): |
|
1147 # Treat refType None as type="topic", see DITA standard for <topicref> |
|
1148 # Well, also look at the type attribute in chapter 25 |
|
1149 # "When the type attribute is unspecified, it should be |
|
1150 # determined by inspecting the target if possible. If the |
|
1151 # target cannot be inspected for some reason, the value |
|
1152 # should default to "topic". |
|
1153 # Note: DITA 1.2 takes a different view... |
|
1154 # Was: |
|
1155 #if (rObjSrc.refType is None and iObjTgt.elem != 'topic') \ |
|
1156 #or (rObjSrc.refType is not None and rObjSrc.refType != iObjTgt.elem): |
|
1157 if rObjSrc.refType is not None and rObjSrc.refType != iObjTgt.elem: |
|
1158 if isRootTgt: |
|
1159 fObjSrc.addError(416, (rObjSrc.refType, iObjTgt.elem,)) |
|
1160 hasMutated = True |
|
1161 else: |
|
1162 fObjSrc.addError(417, (rObjSrc.refType, iObjTgt.elem, frag,)) |
|
1163 hasMutated = True |
|
1164 # Otherwise topicref looks OK |
|
1165 elif rObjSrc.elem != 'xref' and rObjSrc.elem not in XREF_DESCENDENTS: |
|
1166 # Unknown referencing element |
|
1167 if isRootTgt: |
|
1168 fObjSrc.addError(418, (rObjSrc.elem, fObjTgt.doctype)) |
|
1169 hasMutated = True |
|
1170 else: |
|
1171 fObjSrc.addError(419, (rObjSrc.elem, fObjTgt.idElem(frag), frag)) |
|
1172 hasMutated = True |
|
1173 return hasMutated |
|
1174 |
|
1175 def updateErrorCount(self, theMap): |
|
1176 """Updates a map of {error_code, : count, ...}.""" |
|
1177 if self._errS is not None: |
|
1178 for e in self._errS.keys(): |
|
1179 theMap[e] += len(self._errS[e]) |
|
1180 for fPath in self._fileMap.keys(): |
|
1181 fObj = self._fileMap.getDitaFileObj(fPath) |
|
1182 # Mutable call so need to update |
|
1183 fObj.updateErrorCount(theMap) |
|
1184 self._fileMap.setDitaFileObj(fPath, fObj) |
|
1185 |
|
1186 def debugDump(self, s=sys.stdout, prefix=''): |
|
1187 """Dump of IR for debug purposes.""" |
|
1188 s.write(' Debug Dump '.center(PRINT_WIDTH, '+')) |
|
1189 s.write('\n') |
|
1190 fileS = self._fileMap.keys() |
|
1191 fileS.sort() |
|
1192 for f in fileS: |
|
1193 self._fileMap.getDitaFileObj(f).debugDump(s, prefix) |
|
1194 s.write(' END Debug Dump '.center(PRINT_WIDTH, '+')) |
|
1195 s.write('\n\n') |
|
1196 |
|
1197 ##################################### |
|
1198 # Multiprocessing code |
|
1199 ##################################### |
|
1200 def retDitaFileObj(thePath): |
|
1201 return DitaFilePath(thePath) |
|
1202 |
|
1203 def genDitaPath(theDir, thePatS, recursive): |
|
1204 assert(os.path.isdir(theDir)) |
|
1205 for aName in os.listdir(theDir): |
|
1206 aPath = os.path.join(theDir, aName) |
|
1207 if os.path.isdir(aPath) and recursive: |
|
1208 for p in genDitaPath(aPath, thePatS, recursive): |
|
1209 yield p |
|
1210 elif os.path.isfile(aPath): |
|
1211 for aPat in thePatS: |
|
1212 if fnmatch.fnmatch(aName, aPat): |
|
1213 #logging.info('genDitaPath(): %s' % aPath) |
|
1214 yield aPath |
|
1215 break |
|
1216 |
|
1217 def retMpDitaFileSetObj(theDir, |
|
1218 thePatterns, |
|
1219 recursive, |
|
1220 numJobs, |
|
1221 checkExt, |
|
1222 useDb): |
|
1223 assert(os.path.isdir(theDir)) |
|
1224 assert(numJobs >= 0) |
|
1225 retObj = DitaFileSet(theDir, procDir=False, testExt=checkExt, useDbase=useDb) |
|
1226 myNumJobs = numJobs |
|
1227 if numJobs == 0: |
|
1228 myNumJobs = multiprocessing.cpu_count() |
|
1229 logging.info('Set multiprocessing number of jobs to %d' % myNumJobs) |
|
1230 myPool = multiprocessing.Pool(processes=myNumJobs) |
|
1231 for result in [ |
|
1232 myPool.apply_async(retDitaFileObj, (f,)) |
|
1233 for f in genDitaPath(theDir, thePatterns, recursive) |
|
1234 ]: |
|
1235 myObj = result.get() |
|
1236 logging.debug('Got %s' % myObj.identity) |
|
1237 retObj._addDitaFileObj(myObj) |
|
1238 # Note: finalise() is a serial process |
|
1239 logging.info('retMpDitaFileSetObj(): finalising') |
|
1240 retObj.finalise() |
|
1241 return retObj |
|
1242 |
|
1243 ###################################### |
|
1244 # Test code |
|
1245 ###################################### |
|
1246 try: |
|
1247 import cStringIO as StringIO |
|
1248 except ImportError: |
|
1249 import StringIO |
|
1250 |
|
1251 class NullClass(unittest.TestCase): |
|
1252 pass |
|
1253 |
|
1254 class TestCountDict(unittest.TestCase): |
|
1255 def setUp(self): |
|
1256 pass |
|
1257 |
|
1258 def tearDown(self): |
|
1259 pass |
|
1260 |
|
1261 def testSetUpTearDown(self): |
|
1262 """TestCountDict: test setUp() and tearDown().""" |
|
1263 pass |
|
1264 |
|
1265 def test_basic(self): |
|
1266 """TestCountDict: test basic functionality.""" |
|
1267 myMap = CountDict() |
|
1268 self.assertEqual(myMap.has_key('wtf'), False) |
|
1269 self.assertEqual(myMap['wtf'], 0) |
|
1270 self.assertEqual(myMap.has_key('wtf'), True) |
|
1271 myMap['wtf'] += 1 |
|
1272 self.assertEqual(myMap['wtf'], 1) |
|
1273 |
|
1274 class TestDitaId(unittest.TestCase): |
|
1275 def setUp(self): |
|
1276 pass |
|
1277 |
|
1278 def tearDown(self): |
|
1279 pass |
|
1280 |
|
1281 def testSetUpTearDown(self): |
|
1282 """DitaId: test setUp() and tearDown().""" |
|
1283 pass |
|
1284 |
|
1285 def test_basic(self): |
|
1286 """DitaId: basic read of an node with an id""" |
|
1287 myXml = """<cxxClass id="class_big_endian"/>""" |
|
1288 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1289 myObj = DitaId(myTree.getroot()) |
|
1290 self.assertEqual(myObj.id, 'class_big_endian') |
|
1291 self.assertEqual(str(myObj), 'class_big_endian') |
|
1292 self.assertEqual(myObj.errStrings(True, None), []) |
|
1293 self.assertEqual(myObj.errStrings(False, None), []) |
|
1294 |
|
1295 def test_guid_00(self): |
|
1296 """DitaId: basic read of an node with an GUID id""" |
|
1297 myXml = """<cxxClass id="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>""" |
|
1298 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1299 myObj = DitaId(myTree.getroot()) |
|
1300 self.assertEqual(myObj.id, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E') |
|
1301 myObj.checkGuid() |
|
1302 self.assertEqual(myObj.errStrings(True, None), []) |
|
1303 self.assertEqual(myObj.errStrings(False, None), []) |
|
1304 |
|
1305 def test_guid_01(self): |
|
1306 """DitaId: basic read of an node with an GUID id fails""" |
|
1307 myXml = """<cxxClass id="25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>""" |
|
1308 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1309 myObj = DitaId(myTree.getroot()) |
|
1310 self.assertEqual(myObj.id, '25825EC4-341F-3EA4-94AA-7DCE380E6D2E') |
|
1311 myObj.checkGuid() |
|
1312 self.assertEqual( |
|
1313 myObj.errStrings(False, None), |
|
1314 [ |
|
1315 'GUID specification does not match id="25825EC4-341F-3EA4-94AA-7DCE380E6D2E"' |
|
1316 ]) |
|
1317 self.assertEqual( |
|
1318 myObj.errStrings(True, None), |
|
1319 [ |
|
1320 'GUID specification does not match id="%s"' % GENERIC_STRING, |
|
1321 ]) |
|
1322 |
|
1323 def test_cmp_eq_00(self): |
|
1324 """DitaId: cmp(), == of two identical nodes""" |
|
1325 myXml = """<cxxClass id="class_big_endian"/>""" |
|
1326 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1327 myObj_00 = DitaId(myTree.getroot()) |
|
1328 myObj_01 = DitaId(myTree.getroot()) |
|
1329 self.assertEqual(cmp(myObj_00, myObj_01), 0) |
|
1330 self.assertEqual((myObj_00 == myObj_01), True) |
|
1331 |
|
1332 def test_cmp_eq_01(self): |
|
1333 """DitaId: cmp(), == of two identical nodes from different elements.""" |
|
1334 myXml_00 = """<cxxClass id="big_endian"/>""" |
|
1335 myTree_00 = etree.parse(StringIO.StringIO(myXml_00)) |
|
1336 myObj_00 = DitaId(myTree_00.getroot()) |
|
1337 myXml_01 = """<cxxStruct id="big_endian"/>""" |
|
1338 myTree_01 = etree.parse(StringIO.StringIO(myXml_01)) |
|
1339 myObj_01 = DitaId(myTree_01.getroot()) |
|
1340 self.assertEqual(cmp(myObj_00, myObj_01), 0) |
|
1341 self.assertEqual((myObj_00 == myObj_01), True) |
|
1342 |
|
1343 def test_set(self): |
|
1344 """DitaId: read of an node with an id several times into a set and check unique,""" |
|
1345 myXml = """<cxxClass id="class_big_endian"/>""" |
|
1346 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1347 s = set() |
|
1348 i = 0 |
|
1349 while i < 8: |
|
1350 s.add(DitaId(myTree.getroot())) |
|
1351 i += 1 |
|
1352 self.assertEqual(len(s), 1) |
|
1353 self.assertEqual(DitaId(myTree.getroot()) in s, True) |
|
1354 |
|
1355 def test_map(self): |
|
1356 """DitaId: read of an node with an id several times into a map and check unique,""" |
|
1357 myXml = """<cxxClass id="class_big_endian"/>""" |
|
1358 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1359 m = {} |
|
1360 i = 0 |
|
1361 while i < 8: |
|
1362 m[DitaId(myTree.getroot())] = 1 |
|
1363 i += 1 |
|
1364 self.assertEqual(len(m), 1) |
|
1365 self.assertEqual(m.has_key(DitaId(myTree.getroot())), True) |
|
1366 |
|
1367 def test_error_hash(self): |
|
1368 """DitaId: error with a '#' in an id""" |
|
1369 myXml = """<cxxClass id="class_#big_endian"/>""" |
|
1370 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1371 myObj = DitaId(myTree.getroot()) |
|
1372 self.assertEqual(myObj.id, 'class_#big_endian') |
|
1373 self.assertEqual(str(myObj), 'class_#big_endian') |
|
1374 self.assertEqual( |
|
1375 myObj.errStrings(True, None), |
|
1376 [ |
|
1377 genericStringForErrorCode(100), |
|
1378 ] |
|
1379 ) |
|
1380 self.assertEqual( |
|
1381 myObj.errStrings(False, None), |
|
1382 [ |
|
1383 'Character \'#\' not allowed in id="class_#big_endian"', |
|
1384 ] |
|
1385 ) |
|
1386 |
|
1387 |
|
1388 |
|
1389 class TestDitaRef(unittest.TestCase): |
|
1390 def setUp(self): |
|
1391 pass |
|
1392 |
|
1393 def tearDown(self): |
|
1394 pass |
|
1395 |
|
1396 def testSetUpTearDown(self): |
|
1397 """DitaRef: test setUp() and tearDown().""" |
|
1398 pass |
|
1399 |
|
1400 def test_basic(self): |
|
1401 """DitaRef: basic read of an xref node, no fragment""" |
|
1402 myXml = """<xref href="class_big_endian"/>""" |
|
1403 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1404 myObj = DitaRef(myTree.getroot()) |
|
1405 self.assertEqual(myObj.href, 'class_big_endian') |
|
1406 self.assertEqual(myObj.path, 'class_big_endian') |
|
1407 self.assertEqual(myObj.elem, 'xref') |
|
1408 self.assertEqual(str(myObj), 'xref class_big_endian') |
|
1409 self.assertEqual(myObj.fragment, '') |
|
1410 self.assertEqual(myObj.scheme, '') |
|
1411 self.assertEqual(myObj.errStrings(False, None), []) |
|
1412 self.assertEqual(myObj.errStrings(True, None), []) |
|
1413 |
|
1414 def test_basic_frag(self): |
|
1415 """DitaRef: basic read of an xref node, with fragment""" |
|
1416 myXml = """<xref href="class_big_endian.xml#function"/>""" |
|
1417 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1418 myObj = DitaRef(myTree.getroot()) |
|
1419 self.assertEqual(myObj.href, 'class_big_endian.xml#function') |
|
1420 self.assertEqual(myObj.path, 'class_big_endian.xml') |
|
1421 self.assertEqual(myObj.fragment, 'function') |
|
1422 self.assertEqual(myObj.scheme, '') |
|
1423 self.assertEqual(myObj.errStrings(False, None), []) |
|
1424 self.assertEqual(myObj.errStrings(True, None), []) |
|
1425 |
|
1426 def test_file_frag_00(self): |
|
1427 """DitaRef: accessing an xref node, with a file and a fragment""" |
|
1428 myXml = """<xref href="class_big_endian.xml#function"/>""" |
|
1429 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1430 myObj = DitaRef(myTree.getroot()) |
|
1431 self.assertEqual(myObj.href, 'class_big_endian.xml#function') |
|
1432 self.assertEqual(myObj.path, 'class_big_endian.xml') |
|
1433 self.assertEqual(myObj.fragment, 'function') |
|
1434 self.assertEqual(myObj.scheme, '') |
|
1435 srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml')) |
|
1436 expPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'class_big_endian.xml')) |
|
1437 self.assertEqual( |
|
1438 myObj.fileFragment(srcPath), |
|
1439 (expPath, 'function') |
|
1440 ) |
|
1441 self.assertEqual(myObj.errStrings(False, None), []) |
|
1442 self.assertEqual(myObj.errStrings(True, None), []) |
|
1443 |
|
1444 def test_file_frag_01(self): |
|
1445 """DitaRef: accessing an xref node, with a file and a fragment and relative path with '\\'.""" |
|
1446 myXml = """<xref href="..\\chips\\class_big_endian.xml#function"/>""" |
|
1447 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1448 myObj = DitaRef(myTree.getroot()) |
|
1449 srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml')) |
|
1450 expPath = normalisePath(os.path.join('C:%s' % os.sep, 'chips', 'class_big_endian.xml')) |
|
1451 self.assertEqual( |
|
1452 myObj.fileFragment(srcPath), |
|
1453 (expPath, 'function') |
|
1454 ) |
|
1455 self.assertEqual(myObj.errStrings(False, None), []) |
|
1456 self.assertEqual(myObj.errStrings(True, None), []) |
|
1457 |
|
1458 def test_file_frag_02(self): |
|
1459 """DitaRef: accessing an xref node, with a file and a fragment and relative path with '/'.""" |
|
1460 myXml = """<xref href="../chips/class_big_endian.xml#function"/>""" |
|
1461 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1462 myObj = DitaRef(myTree.getroot()) |
|
1463 srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml')) |
|
1464 expPath = normalisePath(os.path.join('C:%s' % os.sep, 'chips', 'class_big_endian.xml')) |
|
1465 self.assertEqual( |
|
1466 myObj.fileFragment(srcPath), |
|
1467 (expPath, 'function') |
|
1468 ) |
|
1469 self.assertEqual(myObj.errStrings(False, None), []) |
|
1470 self.assertEqual(myObj.errStrings(True, None), []) |
|
1471 |
|
1472 def test_file_frag_03(self): |
|
1473 """DitaRef: accessing an xref node, with a no file but with a fragment""" |
|
1474 myXml = """<xref href="#function"/>""" |
|
1475 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1476 myObj = DitaRef(myTree.getroot()) |
|
1477 self.assertEqual(myObj.href, '#function') |
|
1478 self.assertEqual(myObj.path, '') |
|
1479 self.assertEqual(myObj.fragment, 'function') |
|
1480 self.assertEqual(myObj.scheme, '') |
|
1481 srcPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml')) |
|
1482 expPath = normalisePath(os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml')) |
|
1483 self.assertEqual( |
|
1484 myObj.fileFragment(srcPath), |
|
1485 (expPath, 'function') |
|
1486 ) |
|
1487 self.assertEqual(myObj.errStrings(False, None), []) |
|
1488 self.assertEqual(myObj.errStrings(True, None), []) |
|
1489 |
|
1490 def test_basic_scheme(self): |
|
1491 """DitaRef: an xref node with a URI scheme""" |
|
1492 myXml = """<xref href="http://www.cwi.nl:80/%7Eguido/Python.html#fragment"/>""" |
|
1493 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1494 myObj = DitaRef(myTree.getroot()) |
|
1495 self.assertEqual(myObj.href, 'http://www.cwi.nl:80/%7Eguido/Python.html#fragment') |
|
1496 self.assertEqual(myObj.path, '/%7Eguido/Python.html') |
|
1497 self.assertEqual(myObj.fragment, 'fragment') |
|
1498 self.assertEqual(myObj.scheme, 'http') |
|
1499 self.assertEqual(myObj.errStrings(False, None), []) |
|
1500 self.assertEqual(myObj.errStrings(True, None), []) |
|
1501 |
|
1502 def test_basic_scheme_file_frag(self): |
|
1503 """DitaRef: an xref node with a URI scheme, invoking fileFragment()""" |
|
1504 myXml = """<xref href="http://www.cwi.nl:80/%7Eguido/Python.html#fragment"/>""" |
|
1505 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1506 myObj = DitaRef(myTree.getroot()) |
|
1507 self.assertEqual(myObj.href, 'http://www.cwi.nl:80/%7Eguido/Python.html#fragment') |
|
1508 self.assertEqual(myObj.path, '/%7Eguido/Python.html') |
|
1509 self.assertEqual(myObj.fragment, 'fragment') |
|
1510 self.assertEqual(myObj.scheme, 'http') |
|
1511 srcPath = os.path.join('C:%s' % os.sep, 'spam', 'eggs.xml') |
|
1512 self.assertEqual( |
|
1513 myObj.fileFragment(srcPath), |
|
1514 (None, None) |
|
1515 ) |
|
1516 self.assertEqual(myObj.errStrings(False, None), []) |
|
1517 self.assertEqual(myObj.errStrings(True, None), []) |
|
1518 |
|
1519 def test_fail_no_href(self): |
|
1520 """DitaRef: Fails on an xref node with no href attribute""" |
|
1521 myXml = """<xref />""" |
|
1522 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1523 myObj = DitaRef(myTree.getroot()) |
|
1524 self.assertEqual( |
|
1525 myObj.errStrings(False, None), |
|
1526 [ |
|
1527 'Reference element "xref" is missing href=... attribute', |
|
1528 ] |
|
1529 ) |
|
1530 self.assertEqual( |
|
1531 myObj.errStrings(True, None), |
|
1532 [ |
|
1533 'Reference element "%s" is missing href=... attribute' % GENERIC_STRING, |
|
1534 ] |
|
1535 ) |
|
1536 |
|
1537 def test_fail_bad_frag(self): |
|
1538 """DitaRef: Fails on an xref node with href attribute that has multiple '#' characters""" |
|
1539 myXml = """<xref href="a#b#c" />""" |
|
1540 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1541 myObj = DitaRef(myTree.getroot()) |
|
1542 self.assertEqual( |
|
1543 myObj.errStrings(False, None), |
|
1544 [ |
|
1545 'Multiple \'#\' not allowed in reference "a#b#c"', |
|
1546 ] |
|
1547 ) |
|
1548 self.assertEqual( |
|
1549 myObj.errStrings(True, None), |
|
1550 [ |
|
1551 'Multiple \'#\' not allowed in reference "%s"' % GENERIC_STRING, |
|
1552 ] |
|
1553 ) |
|
1554 |
|
1555 def test_guid_00(self): |
|
1556 """DitaRef: basic read of an node with an GUID file/fragment reference""" |
|
1557 myXml = """<xref href="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>""" |
|
1558 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1559 myObj = DitaRef(myTree.getroot()) |
|
1560 self.assertEqual(myObj.href, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E') |
|
1561 self.assertEqual(myObj.path, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml') |
|
1562 self.assertEqual(myObj.elem, 'xref') |
|
1563 self.assertEqual(str(myObj), 'xref GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E') |
|
1564 self.assertEqual(myObj.fragment, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E') |
|
1565 self.assertEqual(myObj.scheme, '') |
|
1566 self.assertEqual(myObj.errStrings(False, None), []) |
|
1567 self.assertEqual(myObj.errStrings(True, None), []) |
|
1568 |
|
1569 def test_guid_01(self): |
|
1570 """DitaRef: basic read of an node with an GUID file part fails""" |
|
1571 myXml = """<xref href="GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E"/>""" |
|
1572 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1573 myObj = DitaRef(myTree.getroot()) |
|
1574 self.assertEqual(myObj.href, 'GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E') |
|
1575 self.assertEqual(myObj.path, 'GUID-.xml') |
|
1576 self.assertEqual(myObj.elem, 'xref') |
|
1577 self.assertEqual(str(myObj), 'xref GUID-.xml#GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E') |
|
1578 self.assertEqual(myObj.fragment, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E') |
|
1579 self.assertEqual(myObj.errStrings(False, None), []) |
|
1580 self.assertEqual(myObj.errStrings(True, None), []) |
|
1581 myObj.checkGuid() |
|
1582 self.assertEqual( |
|
1583 myObj.errStrings(False, None), |
|
1584 [ |
|
1585 'GUID specification does not match file reference "GUID-.xml"' |
|
1586 ]) |
|
1587 self.assertEqual( |
|
1588 myObj.errStrings(True, None), |
|
1589 [ |
|
1590 genericStringForErrorCode(203), |
|
1591 ] |
|
1592 ) |
|
1593 |
|
1594 def test_guid_02(self): |
|
1595 """DitaRef: basic read of an node with an GUID fragment part fails""" |
|
1596 myXml = """<xref href="GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4"/>""" |
|
1597 myTree = etree.parse(StringIO.StringIO(myXml)) |
|
1598 myObj = DitaRef(myTree.getroot()) |
|
1599 self.assertEqual(myObj.href, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4') |
|
1600 self.assertEqual(myObj.path, 'GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml') |
|
1601 self.assertEqual(myObj.elem, 'xref') |
|
1602 self.assertEqual(str(myObj), 'xref GUID-25825EC4-341F-3EA4-94AA-7DCE380E6D2E.xml#GUID-25825EC4') |
|
1603 self.assertEqual(myObj.fragment, 'GUID-25825EC4') |
|
1604 self.assertEqual(myObj.errStrings(False, None), []) |
|
1605 self.assertEqual(myObj.errStrings(True, None), []) |
|
1606 myObj.checkGuid() |
|
1607 self.assertEqual( |
|
1608 myObj.errStrings(False, None), |
|
1609 [ |
|
1610 'GUID specification does not match fragment reference "GUID-25825EC4"' |
|
1611 ]) |
|
1612 self.assertEqual( |
|
1613 myObj.errStrings(True, None), |
|
1614 [ |
|
1615 genericStringForErrorCode(204), |
|
1616 ] |
|
1617 ) |
|
1618 |
|
1619 class TestDitaFile(unittest.TestCase): |
|
1620 def setUp(self): |
|
1621 pass |
|
1622 |
|
1623 def tearDown(self): |
|
1624 pass |
|
1625 |
|
1626 def testSetUpTearDown(self): |
|
1627 """DitaFile: test setUp() and tearDown().""" |
|
1628 pass |
|
1629 |
|
1630 def test_Basic(self): |
|
1631 """DitaFile: basic read of an XML file""" |
|
1632 myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?> |
|
1633 <!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" > |
|
1634 <cxxClass id="class_big_endian"> |
|
1635 <apiName>BigEndian</apiName> |
|
1636 <shortdesc/> |
|
1637 <cxxClassDetail> |
|
1638 <cxxClassDefinition> |
|
1639 <cxxClassAccessSpecifier value="public"/> |
|
1640 <cxxClassAPIItemLocation> |
|
1641 <cxxClassDeclarationFile name="filePath" value="K:/sf/os/commsfw/datacommsserver/esockserver/inc/es_sock.h"/> |
|
1642 <cxxClassDeclarationFileLine name="lineNumber" value="1520"/> |
|
1643 <cxxClassDefinitionFile name="filePath" value="K:/sf/os/commsfw/datacommsserver/esockserver/inc/es_sock.h"/> |
|
1644 <cxxClassDefinitionFileLineStart name="lineNumber" value="1516"/> |
|
1645 <cxxClassDefinitionFileLineEnd name="lineNumber" value="1526"/> |
|
1646 </cxxClassAPIItemLocation> |
|
1647 </cxxClassDefinition> |
|
1648 <apiDesc> |
|
1649 <p>Inserts and extracts integers in big-endian format. </p> |
|
1650 </apiDesc> |
|
1651 </cxxClassDetail> |
|
1652 <cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f"> |
|
1653 </cxxFunction> |
|
1654 <cxxFunction id="class_big_endian_1aedf702f5c0118e4294d1a6d9684f8441"> |
|
1655 </cxxFunction> |
|
1656 <cxxFunction id="class_big_endian_1ae266722f7bb965c971155a3315bad484"> |
|
1657 </cxxFunction> |
|
1658 <cxxFunction id="class_big_endian_1a497d5248ea259f8490fb40ac4f2aafb2"> |
|
1659 </cxxFunction> |
|
1660 </cxxClass>""" |
|
1661 myFile = StringIO.StringIO(myXml) |
|
1662 myObj = DitaFileObj(myFile, 'foo') |
|
1663 self.assertEqual(myObj.identity, normalisePath('foo')) |
|
1664 self.assertEqual(myObj.doctype, 'cxxClass') |
|
1665 self.assertEqual(myObj.rootId, 'class_big_endian') |
|
1666 #print myObj.idMap() |
|
1667 self.assertEqual( |
|
1668 myObj.idElemMap(), |
|
1669 { |
|
1670 'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f' : 'cxxFunction', |
|
1671 'class_big_endian_1aedf702f5c0118e4294d1a6d9684f8441' : 'cxxFunction', |
|
1672 'class_big_endian' : 'cxxClass', |
|
1673 'class_big_endian_1a497d5248ea259f8490fb40ac4f2aafb2' : 'cxxFunction', |
|
1674 'class_big_endian_1ae266722f7bb965c971155a3315bad484' : 'cxxFunction', |
|
1675 } |
|
1676 ) |
|
1677 self.assertEqual(myObj.errStrings(False, None), []) |
|
1678 self.assertEqual(myObj.errStrings(True, None), []) |
|
1679 |
|
1680 def test_missing_file(self): |
|
1681 """DitaFile: read an missing XML file""" |
|
1682 myObj = DitaFileObj(None, 'foo') |
|
1683 self.assertEqual( |
|
1684 myObj.errStrings(False, None), |
|
1685 [ |
|
1686 'Failed to open: "%s"' % normalisePath('foo'), |
|
1687 ] |
|
1688 ) |
|
1689 self.assertEqual( |
|
1690 myObj.errStrings(True, None), |
|
1691 [ |
|
1692 genericStringForErrorCode(400), |
|
1693 ] |
|
1694 ) |
|
1695 |
|
1696 def test_IllFormedFile(self): |
|
1697 """DitaFile: read an ill-formed XML file""" |
|
1698 myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?> |
|
1699 <!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" > |
|
1700 <cxxClass id="class_big_endian"> |
|
1701 """ |
|
1702 myFile = StringIO.StringIO(myXml) |
|
1703 myObj = DitaFileObj(myFile, 'foo') |
|
1704 self.assertEqual(myObj.identity, normalisePath('foo')) |
|
1705 self.assertEqual(myObj.doctype, None) |
|
1706 self.assertEqual(myObj.rootId, None) |
|
1707 #print myObj.idMap() |
|
1708 self.assertEqual(myObj.idElemMap(), {}) |
|
1709 self.assertEqual( |
|
1710 myObj.errStrings(False, None), |
|
1711 [ |
|
1712 'Can not parse: "no element found: line 4, column 0"', |
|
1713 ] |
|
1714 ) |
|
1715 self.assertEqual( |
|
1716 myObj.errStrings(True, None), |
|
1717 [ |
|
1718 genericStringForErrorCode(404), |
|
1719 ] |
|
1720 ) |
|
1721 |
|
1722 def test_missing_root_id(self): |
|
1723 """DitaFile: read of an XML file with no id on root element""" |
|
1724 myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?> |
|
1725 <!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" > |
|
1726 <cxxClass> |
|
1727 <xref href="OtherClass">OtherClass</xref> |
|
1728 <cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f"/> |
|
1729 </cxxClass>""" |
|
1730 myFile = StringIO.StringIO(myXml) |
|
1731 myObj = DitaFileObj(myFile, 'foo') |
|
1732 self.assertEqual(myObj.identity, normalisePath('foo')) |
|
1733 self.assertEqual(myObj.doctype, 'cxxClass') |
|
1734 self.assertEqual(myObj.rootId, None) |
|
1735 self.assertEqual( |
|
1736 myObj.idElemMap(), |
|
1737 { |
|
1738 'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f' : 'cxxFunction', |
|
1739 } |
|
1740 ) |
|
1741 self.assertEqual(myObj.errStrings(False, None), [genericStringForErrorCode(402)]) |
|
1742 self.assertEqual(myObj.errStrings(True, None), [genericStringForErrorCode(402)]) |
|
1743 |
|
1744 def test_duplicate_id(self): |
|
1745 """DitaFile: duplicate IDs""" |
|
1746 myXml = """<root id="AnID"> |
|
1747 <elem id="AnID"/> |
|
1748 </root>""" |
|
1749 myFile = StringIO.StringIO(myXml) |
|
1750 myObj = DitaFileObj(myFile, 'spam.xml') |
|
1751 self.assertEqual(myObj.identity, normalisePath('spam.xml')) |
|
1752 self.assertEqual(myObj.doctype, 'root') |
|
1753 self.assertEqual(myObj.rootId, 'AnID') |
|
1754 self.assertEqual(myObj.idElemMap(), {}) |
|
1755 self.assertEqual( |
|
1756 myObj.errStrings(False, None), |
|
1757 [ |
|
1758 'Multiple id="AnID"', |
|
1759 ] |
|
1760 ) |
|
1761 self.assertEqual(myObj.errStrings(True, None), [genericStringForErrorCode(401)]) |
|
1762 |
|
1763 def test_ismap_00(self): |
|
1764 """DitaFile: Is a map for <map>.""" |
|
1765 myXml = """<map id="myMap"/>""" |
|
1766 myFile = StringIO.StringIO(myXml) |
|
1767 myObj = DitaFileObj(myFile, 'spam.xml') |
|
1768 self.assertEqual(myObj.isMap, True) |
|
1769 |
|
1770 def test_ismap_01(self): |
|
1771 """DitaFile: Is a map for <cxxAPIMap>.""" |
|
1772 myXml = """<cxxAPIMap id="myMap"/>""" |
|
1773 myFile = StringIO.StringIO(myXml) |
|
1774 myObj = DitaFileObj(myFile, 'spam.xml') |
|
1775 self.assertEqual(myObj.isMap, True) |
|
1776 |
|
1777 def test_Basic_01(self): |
|
1778 """DitaFile: read of an simple XML file with id and xref""" |
|
1779 myXml = """<?xml version='1.0' encoding='UTF-8' standalone='no'?> |
|
1780 <!DOCTYPE cxxClass PUBLIC "-//NOKIA//DTD DITA C++ API Class Reference Type v0.1.0//EN" "dtd/cxxClass.dtd" > |
|
1781 <cxxClass id="class_big_endian"> |
|
1782 <xref href="OtherClass">OtherClass</xref> |
|
1783 <cxxFunction id="class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f"/> |
|
1784 </cxxClass>""" |
|
1785 myFile = StringIO.StringIO(myXml) |
|
1786 myObj = DitaFileObj(myFile, 'foo') |
|
1787 self.assertEqual(myObj.identity, normalisePath('foo')) |
|
1788 self.assertEqual(myObj.doctype, 'cxxClass') |
|
1789 self.assertEqual(myObj.rootId, 'class_big_endian') |
|
1790 self.assertEqual(myObj.isMap, False) |
|
1791 self.assertEqual(len(myObj.idS), 2) |
|
1792 self.assertEqual(len(myObj.refS), 1) |
|
1793 self.assertEqual(myObj.hasId('class_big_endian'), True) |
|
1794 self.assertEqual(myObj.hasId('class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'), True) |
|
1795 self.assertEqual(myObj.hasId('noID'), False) |
|
1796 self.assertEqual(myObj.idElem('class_big_endian'), 'cxxClass') |
|
1797 self.assertEqual(myObj.idElem('noID'), None) |
|
1798 self.assertEqual( |
|
1799 myObj.idElem('class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f'), |
|
1800 'cxxFunction' |
|
1801 ) |
|
1802 #print myObj.idMap() |
|
1803 self.assertEqual( |
|
1804 myObj.idElemMap(), |
|
1805 { |
|
1806 'class_big_endian_1a9f78fb092e713acf6ffe3e8e11f1626f' : 'cxxFunction', |
|
1807 'class_big_endian' : 'cxxClass', |
|
1808 } |
|
1809 ) |
|
1810 self.assertEqual(myObj.errStrings(False, None), []) |
|
1811 self.assertEqual(myObj.errStrings(True, None), []) |
|
1812 |
|
1813 class TestDitaFileSet(unittest.TestCase): |
|
1814 def setUp(self): |
|
1815 pass |
|
1816 |
|
1817 def tearDown(self): |
|
1818 pass |
|
1819 |
|
1820 def testSetUpTearDown(self): |
|
1821 """DitaFileSet: test setUp() and tearDown().""" |
|
1822 pass |
|
1823 |
|
1824 def test_None(self): |
|
1825 """DitaFileSet: read of None.""" |
|
1826 myO = DitaFileSet(None) |
|
1827 myO.finalise() |
|
1828 self.assertEqual(myO.errStrings(False, None), ['Not a directory: None']) |
|
1829 self.assertEqual(myO.errStrings(True, None), ['Not a directory: %s' % GENERIC_STRING, ]) |
|
1830 self.assertEqual(myO.errCountMap, {500 : 1}) |
|
1831 |
|
1832 def test_basic(self): |
|
1833 """DitaFileSet: Test reading a map and a couple of files.""" |
|
1834 myO = DitaFileSet(None, procDir=False) |
|
1835 myO._addFileObj( |
|
1836 StringIO.StringIO( |
|
1837 """<map id="map_00"> |
|
1838 <topicref href="spam.dita" /> |
|
1839 <topicref href="eggs.dita" /> |
|
1840 </map>""" |
|
1841 ), |
|
1842 'map.ditamap' |
|
1843 ) |
|
1844 myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita') |
|
1845 myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'eggs.dita') |
|
1846 myO.finalise() |
|
1847 #print 'HI' |
|
1848 #myO.writeErrors(False) |
|
1849 self.assertEqual(myO.allErrStrings(False, None), []) |
|
1850 self.assertEqual(myO.allErrStrings(True, None), []) |
|
1851 self.assertEqual(myO.errCountMap, {}) |
|
1852 |
|
1853 def test_duplicate_paths(self): |
|
1854 """DitaFileSet: Test reading a couple of files in duplicate paths.""" |
|
1855 myO = DitaFileSet(None, procDir=False) |
|
1856 myO._addFileObj( |
|
1857 StringIO.StringIO( |
|
1858 """<map id="map_00"> |
|
1859 <topicref href="spam.dita" /> |
|
1860 </map>""" |
|
1861 ), |
|
1862 'map.ditamap' |
|
1863 ) |
|
1864 myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita') |
|
1865 myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'spam.dita') |
|
1866 myO.finalise() |
|
1867 self.assertEqual( |
|
1868 myO.errStrings(False, None), |
|
1869 [ |
|
1870 'Duplicate file path: "%s"' % normalisePath('spam.dita'), |
|
1871 ] |
|
1872 ) |
|
1873 self.assertEqual(myO.errStrings(True, None), [genericStringForErrorCode(504),]) |
|
1874 self.assertEqual(myO.errCountMap, {504 : 1}) |
|
1875 |
|
1876 def test_duplicate_ids(self): |
|
1877 """DitaFileSet: Test reading a map and a couple of files with duplicate IDs.""" |
|
1878 myO = DitaFileSet(None, procDir=False) |
|
1879 myO._addFileObj( |
|
1880 StringIO.StringIO( |
|
1881 """<map id="map_00"> |
|
1882 <topicref href="spam.dita" /> |
|
1883 <topicref href="eggs.dita" /> |
|
1884 <topicref href="chips.dita" /> |
|
1885 </map>""" |
|
1886 ), |
|
1887 'map.ditamap' |
|
1888 ) |
|
1889 myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'spam.dita') |
|
1890 myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'eggs.dita') |
|
1891 myO._addFileObj(StringIO.StringIO('<topic id="chips"/>'), 'chips.dita') |
|
1892 myO.finalise() |
|
1893 #print 'HI' |
|
1894 #myO.writeErrors(False) |
|
1895 #pprint.pprint(myO.errStrings(False, None)) |
|
1896 self.assertEqual( |
|
1897 myO.errStrings(True, None), |
|
1898 [ |
|
1899 genericStringForErrorCode(505), |
|
1900 genericStringForErrorCode(501), |
|
1901 ] |
|
1902 ) |
|
1903 expErrs = [ |
|
1904 """Duplicate id="chips" in files: ('%s', '%s', '%s')""" \ |
|
1905 % (normalisePath('chips.dita'), normalisePath('eggs.dita'), normalisePath('spam.dita')), |
|
1906 """Duplicate root id="chips" in files: ('%s', '%s', '%s')""" \ |
|
1907 % (normalisePath('chips.dita'), normalisePath('eggs.dita'), normalisePath('spam.dita')), |
|
1908 ] |
|
1909 myErrs = myO.errStrings(False, None) |
|
1910 #=============================================================================== |
|
1911 # for i in range(2): |
|
1912 # if myErrs[i] != expErrs[i]: |
|
1913 # print myErrs[i] |
|
1914 # print expErrs[i] |
|
1915 # print |
|
1916 #=============================================================================== |
|
1917 self.assertEqual(myErrs, expErrs) |
|
1918 self.assertEqual(myO.errCountMap, {505: 1, 501: 1}) |
|
1919 |
|
1920 def test_lonely_topics(self): |
|
1921 """DitaFileSet: Test a couple of lonely topics.""" |
|
1922 myO = DitaFileSet(None, procDir=False) |
|
1923 myO._addFileObj(StringIO.StringIO('<spam id="spam"/>'), 'spam') |
|
1924 myO._addFileObj(StringIO.StringIO('<eggs id="eggs"/>'), 'eggs') |
|
1925 myO.finalise() |
|
1926 self.assertEqual( |
|
1927 myO.errStrings(False, None), |
|
1928 [ |
|
1929 'Topic id="%s" is not referenced by any map' % normalisePath('eggs'), |
|
1930 'Topic id="%s" is not referenced by any map' % normalisePath('spam'), |
|
1931 ] |
|
1932 ) |
|
1933 self.assertEqual( |
|
1934 myO.errStrings(True, None), |
|
1935 [ |
|
1936 genericStringForErrorCode(600), |
|
1937 ] |
|
1938 ) |
|
1939 |
|
1940 def test_map_cycles_00(self): |
|
1941 """DitaFileSet: Cyclic references between two maps.""" |
|
1942 myO = DitaFileSet(None, procDir=False) |
|
1943 myO._addFileObj( |
|
1944 StringIO.StringIO( |
|
1945 """<map id="map_00"> |
|
1946 <topicref href="map_01.ditamap" format="ditamap" /> |
|
1947 </map>""" |
|
1948 ), |
|
1949 'map_00.ditamap' |
|
1950 ) |
|
1951 myO._addFileObj( |
|
1952 StringIO.StringIO( |
|
1953 """<map id="map_01"> |
|
1954 <topicref href="map_00.ditamap" format="ditamap" /> |
|
1955 </map>""" |
|
1956 ), |
|
1957 'map_01.ditamap' |
|
1958 ) |
|
1959 myO.finalise() |
|
1960 #print 'HI test_map_cycles_00()' |
|
1961 #pprint.pprint(myO._retMapAdjList()) |
|
1962 self.assertEqual( |
|
1963 myO.errStrings(False, None), |
|
1964 [ |
|
1965 'Maps "%s" are in a a cycle.' % str( |
|
1966 ( |
|
1967 normalisePath('map_00.ditamap'), |
|
1968 normalisePath('map_01.ditamap'), |
|
1969 ) |
|
1970 ), |
|
1971 'Maps "%s" are in a a cycle.' % str( |
|
1972 ( |
|
1973 normalisePath('map_01.ditamap'), |
|
1974 normalisePath('map_00.ditamap'), |
|
1975 ) |
|
1976 ), |
|
1977 ] |
|
1978 ) |
|
1979 #print |
|
1980 #pprint.pprint(myO.allErrStrings(False, None)) |
|
1981 self.assertEqual(myO.allErrStrings(True, None), [genericStringForErrorCode(701)]) |
|
1982 self.assertEqual(myO.errCountMap, {701 : 4}) |
|
1983 |
|
1984 def test_map_cycles_01(self): |
|
1985 """DitaFileSet: Cyclic references between three maps.""" |
|
1986 myO = DitaFileSet(None, procDir=False) |
|
1987 myO._addFileObj( |
|
1988 StringIO.StringIO( |
|
1989 """<map id="map_00"> |
|
1990 <topicref href="map_01.ditamap" format="ditamap" /> |
|
1991 </map>""" |
|
1992 ), |
|
1993 'map_00.ditamap' |
|
1994 ) |
|
1995 myO._addFileObj( |
|
1996 StringIO.StringIO( |
|
1997 """<map id="map_01"> |
|
1998 <topicref href="map_02.ditamap" format="ditamap" /> |
|
1999 </map>""" |
|
2000 ), |
|
2001 'map_01.ditamap' |
|
2002 ) |
|
2003 myO._addFileObj( |
|
2004 StringIO.StringIO( |
|
2005 """<map id="map_02"> |
|
2006 <topicref href="map_00.ditamap" format="ditamap" /> |
|
2007 </map>""" |
|
2008 ), |
|
2009 'map_02.ditamap' |
|
2010 ) |
|
2011 myO.finalise() |
|
2012 #print 'HI test_map_cycles_00()' |
|
2013 #pprint.pprint(myO._retMapAdjList()) |
|
2014 self.assertEqual( |
|
2015 myO.errStrings(False, None), |
|
2016 [ |
|
2017 'Maps "%s" are in a a cycle.' % str( |
|
2018 ( |
|
2019 normalisePath('map_00.ditamap'), |
|
2020 normalisePath('map_01.ditamap'), |
|
2021 normalisePath('map_02.ditamap'), |
|
2022 ) |
|
2023 ), |
|
2024 'Maps "%s" are in a a cycle.' % str( |
|
2025 ( |
|
2026 normalisePath('map_01.ditamap'), |
|
2027 normalisePath('map_02.ditamap'), |
|
2028 normalisePath('map_00.ditamap'), |
|
2029 ) |
|
2030 ), |
|
2031 'Maps "%s" are in a a cycle.' % str( |
|
2032 ( |
|
2033 normalisePath('map_02.ditamap'), |
|
2034 normalisePath('map_00.ditamap'), |
|
2035 normalisePath('map_01.ditamap'), |
|
2036 ) |
|
2037 ), |
|
2038 ] |
|
2039 ) |
|
2040 self.assertEqual(myO.errStrings(True, None), [genericStringForErrorCode(701)]) |
|
2041 self.assertEqual(myO.errCountMap, {701 : 6}) |
|
2042 |
|
2043 def test_refarc_00(self): |
|
2044 """DitaFileSet: Test ref arcing - all resolve.""" |
|
2045 myO = DitaFileSet(None, procDir=False) |
|
2046 myO._addFileObj( |
|
2047 StringIO.StringIO( |
|
2048 """<map id="map_00"> |
|
2049 <topicref href="spam.dita#spam" /> |
|
2050 <topicref href="eggs.dita#eggs" /> |
|
2051 </map>""" |
|
2052 ), |
|
2053 'map.ditamap' |
|
2054 ) |
|
2055 myO._addFileObj(StringIO.StringIO('<topic id="spam"/>'), 'spam.dita') |
|
2056 myO._addFileObj(StringIO.StringIO('<topic id="eggs"/>'), 'eggs.dita') |
|
2057 myO.finalise() |
|
2058 self.assertEqual(myO.errCountMap, {}) |
|
2059 self.assertEqual(myO.allErrStrings(False, None), []) |
|
2060 self.assertEqual(myO.allErrStrings(True, None), []) |
|
2061 self.assertEqual(myO.errStrings(False, None), []) |
|
2062 self.assertEqual(myO.errStrings(True, None), []) |
|
2063 |
|
2064 def test_refarc_fail_00(self): |
|
2065 """DitaFileSet: Test ref arcing - can't find file.""" |
|
2066 myO = DitaFileSet(None, procDir=False) |
|
2067 myO._addFileObj( |
|
2068 StringIO.StringIO( |
|
2069 """<map id="map_00"> |
|
2070 <topicref href="spam_.dita" /> |
|
2071 <topicref href="eggs_for_tea.dita" /> |
|
2072 </map>""" |
|
2073 ), |
|
2074 'map.ditamap' |
|
2075 ) |
|
2076 myO.finalise() |
|
2077 self.assertEqual(myO.errCountMap, {410: 2}) |
|
2078 #print 'HI' |
|
2079 #pprint.pprint(myO.allErrStrings(False, None)) |
|
2080 self.assertEqual( |
|
2081 myO.allErrStrings(False, None), |
|
2082 [ |
|
2083 'Can not resolve reference to file "%s"' % normalisePath('eggs_for_tea.dita'), |
|
2084 'Can not resolve reference to file "%s"' % normalisePath('spam_.dita'), |
|
2085 ] |
|
2086 ) |
|
2087 self.assertEqual( |
|
2088 myO.allErrStrings(True, None), |
|
2089 [ |
|
2090 'Can not resolve reference to file "..."', |
|
2091 ] |
|
2092 ) |
|
2093 self.assertEqual(myO.errStrings(False, None), []) |
|
2094 self.assertEqual(myO.errStrings(True, None), []) |
|
2095 |
|
2096 def test_refarc_fail_01(self): |
|
2097 """DitaFileSet: Test ref arcing - can't find fragment.""" |
|
2098 myO = DitaFileSet(None, procDir=False) |
|
2099 myO._addFileObj( |
|
2100 StringIO.StringIO( |
|
2101 """<map id="map_00"> |
|
2102 <topicref href="spam.dita#spam_" /> |
|
2103 <topicref href="eggs.dita#eggs_" /> |
|
2104 </map>""" |
|
2105 ), |
|
2106 'map.ditamap' |
|
2107 ) |
|
2108 myO._addFileObj(StringIO.StringIO('<spam id="spam"/>'), 'spam.dita') |
|
2109 myO._addFileObj(StringIO.StringIO('<eggs id="eggs"/>'), 'eggs.dita') |
|
2110 myO.finalise() |
|
2111 self.assertEqual(myO.errCountMap, {411: 2}) |
|
2112 #print 'HI' |
|
2113 #pprint.pprint(myO.allErrStrings(False, None)) |
|
2114 self.assertEqual( |
|
2115 myO.allErrStrings(False, None), |
|
2116 [ |
|
2117 'Can resolve reference to file "%s" but not to fragment "eggs_"' % normalisePath('eggs.dita'), |
|
2118 'Can resolve reference to file "%s" but not to fragment "spam_"' % normalisePath('spam.dita'), |
|
2119 ] |
|
2120 ) |
|
2121 self.assertEqual( |
|
2122 myO.allErrStrings(True, None), |
|
2123 [ |
|
2124 'Can resolve reference to file "%s" but not to fragment "%s"' % (GENERIC_STRING, GENERIC_STRING), |
|
2125 ] |
|
2126 ) |
|
2127 self.assertEqual(myO.errStrings(False, None), []) |
|
2128 self.assertEqual(myO.errStrings(True, None), []) |
|
2129 |
|
2130 def test_refarc_url_00(self): |
|
2131 """DitaFileSet: Test ref arcing - URL.""" |
|
2132 myO = DitaFileSet(None, procDir=False, testExt=True) |
|
2133 myO._addFileObj( |
|
2134 StringIO.StringIO( |
|
2135 """<map id="map_00"> |
|
2136 <topicref href="spam.dita#spam" /> |
|
2137 <topicref href="eggs.dita#eggs" /> |
|
2138 </map>""" |
|
2139 ), |
|
2140 'map.ditamap' |
|
2141 ) |
|
2142 myO._addFileObj(StringIO.StringIO("""<topic id="spam"> |
|
2143 <xref href="http://www.nokia.com">Nokia</xref> |
|
2144 </topic>"""), 'spam.dita') |
|
2145 myO._addFileObj(StringIO.StringIO("""<topic id="eggs"> |
|
2146 <xref href="http://www.google.com">Google</xref> |
|
2147 </topic>"""), 'eggs.dita') |
|
2148 myO.finalise() |
|
2149 #print 'HI' |
|
2150 #pprint.pprint(myO.allErrStrings(False, None)) |
|
2151 self.assertEqual(myO.errCountMap, {}) |
|
2152 self.assertEqual( |
|
2153 myO.allErrStrings(False, None), |
|
2154 [ |
|
2155 ] |
|
2156 ) |
|
2157 self.assertEqual( |
|
2158 myO.allErrStrings(True, None), |
|
2159 [ |
|
2160 ] |
|
2161 ) |
|
2162 self.assertEqual(myO.errStrings(False, None), []) |
|
2163 self.assertEqual(myO.errStrings(True, None), []) |
|
2164 |
|
2165 class TestDitaBookmapFileSet(unittest.TestCase): |
|
2166 def setUp(self): |
|
2167 pass |
|
2168 |
|
2169 def tearDown(self): |
|
2170 pass |
|
2171 |
|
2172 def testSetUpTearDown(self): |
|
2173 """TestDitaBookmapFileSet: test setUp() and tearDown().""" |
|
2174 pass |
|
2175 |
|
2176 def test_basic(self): |
|
2177 """TestDitaBookmapFileSet: Test reading a bookmap and a topic.""" |
|
2178 myO = DitaFileSet(None, procDir=False) |
|
2179 myO._addFileObj( |
|
2180 StringIO.StringIO( |
|
2181 """<?xml version="1.0" encoding="utf-8"?> |
|
2182 <!DOCTYPE bookmap PUBLIC "-//OASIS//DTD DITA BookMap//EN" |
|
2183 "bookmap.dtd"> |
|
2184 <bookmap id="GUID-5BDFDB6B-7801-4804-9F41-2BDC5BE53DDF"> |
|
2185 <booktitle> |
|
2186 <mainbooktitle>My Bookmap</mainbooktitle> |
|
2187 <booktitlealt>Alternate title</booktitlealt> |
|
2188 </booktitle> |
|
2189 <frontmatter id="GUID-DA857913-F826-4CF7-A135-93F2AEB48353"> |
|
2190 <topicref href="GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB.dita" id="GUID-994B1764-393F-401F-8571-CE0955AB6CA6" /> |
|
2191 </frontmatter> |
|
2192 </bookmap> |
|
2193 """ |
|
2194 ), |
|
2195 'bookmap.ditamap' |
|
2196 ) |
|
2197 myO._addFileObj(StringIO.StringIO("""<?xml version="1.0" encoding="utf-8"?> |
|
2198 <!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> |
|
2199 <concept id="GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB" xml:lang="en"> |
|
2200 <title>How to read and write a file</title> |
|
2201 </concept> |
|
2202 """), 'GUID-00025EAD-C4B6-5408-96A3-FFDBBBDC7CAB.dita') |
|
2203 myO.finalise() |
|
2204 #print |
|
2205 #myO.debugDump() |
|
2206 #print 'HI' |
|
2207 #myO.writeErrors(False) |
|
2208 self.assertEqual(myO.allErrStrings(False, None), []) |
|
2209 self.assertEqual(myO.allErrStrings(True, None), []) |
|
2210 self.assertEqual(myO.errCountMap, {}) |
|
2211 |
|
2212 class Special(unittest.TestCase): |
|
2213 pass |
|
2214 |
|
2215 def unitTest(theVerbosity=2): |
|
2216 suite = unittest.TestLoader().loadTestsFromTestCase(NullClass) |
|
2217 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCountDict)) |
|
2218 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaId)) |
|
2219 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaRef)) |
|
2220 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaFile)) |
|
2221 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaFileSet)) |
|
2222 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestDitaBookmapFileSet)) |
|
2223 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Special)) |
|
2224 myResult = unittest.TextTestRunner(verbosity=theVerbosity).run(suite) |
|
2225 return (myResult.testsRun, len(myResult.errors), len(myResult.failures)) |
|
2226 |
|
2227 ###################################### |
|
2228 # main() stuff |
|
2229 ###################################### |
|
2230 def main(): |
|
2231 print 'CMD: %s' % ' '.join(sys.argv) |
|
2232 usage = "usage: %prog [options] <Directory of XML content>" |
|
2233 parser = OptionParser(usage, version='%prog ' + __version__) |
|
2234 parser.add_option("-d", action="store_true", dest="dump", default=False, |
|
2235 help="Dump internal representation. [default: %default]") |
|
2236 parser.add_option( |
|
2237 "-e", "--errors", |
|
2238 type="str", |
|
2239 dest="error_codes", |
|
2240 default='All', |
|
2241 help="Only report on certain error codes (space seperated list). [default: \"%default\"]" |
|
2242 ) |
|
2243 parser.add_option("-f", "--file", dest="file", type="str", default='None', |
|
2244 help="Report of errors by file either 'None', 'generic', 'specific'. [default: %default]") |
|
2245 parser.add_option("-g", action="store_true", dest="guid", default=False, |
|
2246 help="Enforce GUID specification. [default: %default]") |
|
2247 parser.add_option( |
|
2248 "-j", "--jobs", |
|
2249 type="int", |
|
2250 dest="jobs", |
|
2251 default=-1, |
|
2252 help="Max processes when multiprocessing. 0 takes CPUs, -1 no MP. [default: %default]" |
|
2253 ) |
|
2254 parser.add_option( |
|
2255 "-l", "--loglevel", |
|
2256 type="int", |
|
2257 dest="loglevel", |
|
2258 default=20, |
|
2259 help="Log Level (debug=10, info=20, warning=30, [error=40], critical=50) [default: %default]" |
|
2260 ) |
|
2261 parser.add_option( |
|
2262 "-p", "--pattern", |
|
2263 type="str", |
|
2264 dest="pattern", |
|
2265 default=FNMATCH_STRING, |
|
2266 help="Pattern match. [default: \"%default\"]" |
|
2267 ) |
|
2268 parser.add_option("-r", action="store_true", dest="recursive", default=False, |
|
2269 help="Recursive. [default: %default]") |
|
2270 parser.add_option("-s", action="store_true", dest="shelve", default=False, |
|
2271 help="Use the shelve dBase rather than storing the internal representation in memory. This is slower but is useful for large data sets where a memory error might occur. [default: %default]") |
|
2272 parser.add_option("-u", action="store_true", dest="unit_test", default=False, |
|
2273 help="Execute unit tests and exit. [default: %default]") |
|
2274 parser.add_option("-x", action="store_true", dest="ext_url", default=False, |
|
2275 help="Test external |URLs. [default: %default]") |
|
2276 parser.add_option("-?", action="store_true", dest="query_errors", default=False, |
|
2277 help="Display the error types that are detected. [default: %default]") |
|
2278 (options, args) = parser.parse_args() |
|
2279 logging.basicConfig( |
|
2280 level=options.loglevel, |
|
2281 format='%(asctime)s %(levelname)-8s %(message)s', |
|
2282 stream=sys.stdout, |
|
2283 ) |
|
2284 if options.file not in ('None', 'generic', 'specific'): |
|
2285 parser.error("--file option must be: 'None' | 'generic' | 'specific'") |
|
2286 return 1 |
|
2287 if options.unit_test: |
|
2288 unitTest() |
|
2289 if options.query_errors: |
|
2290 writeGenericStringsForErrorCodes() |
|
2291 if len(args) < 1 and not options.unit_test: |
|
2292 parser.print_help() |
|
2293 parser.error("I can't do much without a path to the XML content.") |
|
2294 return 1 |
|
2295 elif len(args) == 1: |
|
2296 if options.jobs > -1: |
|
2297 myObj = retMpDitaFileSetObj( |
|
2298 args[0], |
|
2299 options.pattern.split(' '), |
|
2300 options.recursive, |
|
2301 options.jobs, |
|
2302 options.ext_url, |
|
2303 options.shelve, |
|
2304 ) |
|
2305 else: |
|
2306 myObj = DitaFileSet(args[0], |
|
2307 procDir=True, |
|
2308 thePatterns=options.pattern.split(' '), |
|
2309 recursive=options.recursive, |
|
2310 testExt=options.ext_url, |
|
2311 useDbase=options.shelve, |
|
2312 ) |
|
2313 #print 'MyObj:', myObj |
|
2314 if options.dump: |
|
2315 myObj.debugDump() |
|
2316 myObj.writeStatistics() |
|
2317 myObj.writeErrorSummary() |
|
2318 #pprint.pprint(myObj.statsMap) |
|
2319 # TODO: Write out the results in different ways |
|
2320 errFilter = set(PROBLEM_CODE_FORMAT.keys()) |
|
2321 if options.error_codes != 'All': |
|
2322 errFilter = set([int(i) for i in options.error_codes.split()]) |
|
2323 if options.file == 'generic': |
|
2324 print 'Generic problems:' |
|
2325 myObj.writeErrors(True, errFilter) |
|
2326 elif options.file == 'specific': |
|
2327 print 'Specific problems:' |
|
2328 myObj.writeErrors(False, errFilter) |
|
2329 elif len(args) > 1: |
|
2330 parser.error("Too many arguments, I need only one.") |
|
2331 return 1 |
|
2332 return 0 |
|
2333 |
|
2334 if __name__ == '__main__': |
|
2335 multiprocessing.freeze_support() |
|
2336 sys.exit(main()) |
|