python-2.5.2/win32/Lib/xml/dom/pulldom.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 import xml.sax
       
     2 import xml.sax.handler
       
     3 import types
       
     4 
       
     5 try:
       
     6     _StringTypes = [types.StringType, types.UnicodeType]
       
     7 except AttributeError:
       
     8     _StringTypes = [types.StringType]
       
     9 
       
    10 START_ELEMENT = "START_ELEMENT"
       
    11 END_ELEMENT = "END_ELEMENT"
       
    12 COMMENT = "COMMENT"
       
    13 START_DOCUMENT = "START_DOCUMENT"
       
    14 END_DOCUMENT = "END_DOCUMENT"
       
    15 PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
       
    16 IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
       
    17 CHARACTERS = "CHARACTERS"
       
    18 
       
    19 class PullDOM(xml.sax.ContentHandler):
       
    20     _locator = None
       
    21     document = None
       
    22 
       
    23     def __init__(self, documentFactory=None):
       
    24         from xml.dom import XML_NAMESPACE
       
    25         self.documentFactory = documentFactory
       
    26         self.firstEvent = [None, None]
       
    27         self.lastEvent = self.firstEvent
       
    28         self.elementStack = []
       
    29         self.push = self.elementStack.append
       
    30         try:
       
    31             self.pop = self.elementStack.pop
       
    32         except AttributeError:
       
    33             # use class' pop instead
       
    34             pass
       
    35         self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts
       
    36         self._current_context = self._ns_contexts[-1]
       
    37         self.pending_events = []
       
    38 
       
    39     def pop(self):
       
    40         result = self.elementStack[-1]
       
    41         del self.elementStack[-1]
       
    42         return result
       
    43 
       
    44     def setDocumentLocator(self, locator):
       
    45         self._locator = locator
       
    46 
       
    47     def startPrefixMapping(self, prefix, uri):
       
    48         if not hasattr(self, '_xmlns_attrs'):
       
    49             self._xmlns_attrs = []
       
    50         self._xmlns_attrs.append((prefix or 'xmlns', uri))
       
    51         self._ns_contexts.append(self._current_context.copy())
       
    52         self._current_context[uri] = prefix or None
       
    53 
       
    54     def endPrefixMapping(self, prefix):
       
    55         self._current_context = self._ns_contexts.pop()
       
    56 
       
    57     def startElementNS(self, name, tagName , attrs):
       
    58         # Retrieve xml namespace declaration attributes.
       
    59         xmlns_uri = 'http://www.w3.org/2000/xmlns/'
       
    60         xmlns_attrs = getattr(self, '_xmlns_attrs', None)
       
    61         if xmlns_attrs is not None:
       
    62             for aname, value in xmlns_attrs:
       
    63                 attrs._attrs[(xmlns_uri, aname)] = value
       
    64             self._xmlns_attrs = []
       
    65         uri, localname = name
       
    66         if uri:
       
    67             # When using namespaces, the reader may or may not
       
    68             # provide us with the original name. If not, create
       
    69             # *a* valid tagName from the current context.
       
    70             if tagName is None:
       
    71                 prefix = self._current_context[uri]
       
    72                 if prefix:
       
    73                     tagName = prefix + ":" + localname
       
    74                 else:
       
    75                     tagName = localname
       
    76             if self.document:
       
    77                 node = self.document.createElementNS(uri, tagName)
       
    78             else:
       
    79                 node = self.buildDocument(uri, tagName)
       
    80         else:
       
    81             # When the tagname is not prefixed, it just appears as
       
    82             # localname
       
    83             if self.document:
       
    84                 node = self.document.createElement(localname)
       
    85             else:
       
    86                 node = self.buildDocument(None, localname)
       
    87 
       
    88         for aname,value in attrs.items():
       
    89             a_uri, a_localname = aname
       
    90             if a_uri == xmlns_uri:
       
    91                 if a_localname == 'xmlns':
       
    92                     qname = a_localname
       
    93                 else:
       
    94                     qname = 'xmlns:' + a_localname
       
    95                 attr = self.document.createAttributeNS(a_uri, qname)
       
    96                 node.setAttributeNodeNS(attr)
       
    97             elif a_uri:
       
    98                 prefix = self._current_context[a_uri]
       
    99                 if prefix:
       
   100                     qname = prefix + ":" + a_localname
       
   101                 else:
       
   102                     qname = a_localname
       
   103                 attr = self.document.createAttributeNS(a_uri, qname)
       
   104                 node.setAttributeNodeNS(attr)
       
   105             else:
       
   106                 attr = self.document.createAttribute(a_localname)
       
   107                 node.setAttributeNode(attr)
       
   108             attr.value = value
       
   109 
       
   110         self.lastEvent[1] = [(START_ELEMENT, node), None]
       
   111         self.lastEvent = self.lastEvent[1]
       
   112         self.push(node)
       
   113 
       
   114     def endElementNS(self, name, tagName):
       
   115         self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
       
   116         self.lastEvent = self.lastEvent[1]
       
   117 
       
   118     def startElement(self, name, attrs):
       
   119         if self.document:
       
   120             node = self.document.createElement(name)
       
   121         else:
       
   122             node = self.buildDocument(None, name)
       
   123 
       
   124         for aname,value in attrs.items():
       
   125             attr = self.document.createAttribute(aname)
       
   126             attr.value = value
       
   127             node.setAttributeNode(attr)
       
   128 
       
   129         self.lastEvent[1] = [(START_ELEMENT, node), None]
       
   130         self.lastEvent = self.lastEvent[1]
       
   131         self.push(node)
       
   132 
       
   133     def endElement(self, name):
       
   134         self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
       
   135         self.lastEvent = self.lastEvent[1]
       
   136 
       
   137     def comment(self, s):
       
   138         if self.document:
       
   139             node = self.document.createComment(s)
       
   140             self.lastEvent[1] = [(COMMENT, node), None]
       
   141             self.lastEvent = self.lastEvent[1]
       
   142         else:
       
   143             event = [(COMMENT, s), None]
       
   144             self.pending_events.append(event)
       
   145 
       
   146     def processingInstruction(self, target, data):
       
   147         if self.document:
       
   148             node = self.document.createProcessingInstruction(target, data)
       
   149             self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
       
   150             self.lastEvent = self.lastEvent[1]
       
   151         else:
       
   152             event = [(PROCESSING_INSTRUCTION, target, data), None]
       
   153             self.pending_events.append(event)
       
   154 
       
   155     def ignorableWhitespace(self, chars):
       
   156         node = self.document.createTextNode(chars)
       
   157         self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
       
   158         self.lastEvent = self.lastEvent[1]
       
   159 
       
   160     def characters(self, chars):
       
   161         node = self.document.createTextNode(chars)
       
   162         self.lastEvent[1] = [(CHARACTERS, node), None]
       
   163         self.lastEvent = self.lastEvent[1]
       
   164 
       
   165     def startDocument(self):
       
   166         if self.documentFactory is None:
       
   167             import xml.dom.minidom
       
   168             self.documentFactory = xml.dom.minidom.Document.implementation
       
   169 
       
   170     def buildDocument(self, uri, tagname):
       
   171         # Can't do that in startDocument, since we need the tagname
       
   172         # XXX: obtain DocumentType
       
   173         node = self.documentFactory.createDocument(uri, tagname, None)
       
   174         self.document = node
       
   175         self.lastEvent[1] = [(START_DOCUMENT, node), None]
       
   176         self.lastEvent = self.lastEvent[1]
       
   177         self.push(node)
       
   178         # Put everything we have seen so far into the document
       
   179         for e in self.pending_events:
       
   180             if e[0][0] == PROCESSING_INSTRUCTION:
       
   181                 _,target,data = e[0]
       
   182                 n = self.document.createProcessingInstruction(target, data)
       
   183                 e[0] = (PROCESSING_INSTRUCTION, n)
       
   184             elif e[0][0] == COMMENT:
       
   185                 n = self.document.createComment(e[0][1])
       
   186                 e[0] = (COMMENT, n)
       
   187             else:
       
   188                 raise AssertionError("Unknown pending event ",e[0][0])
       
   189             self.lastEvent[1] = e
       
   190             self.lastEvent = e
       
   191         self.pending_events = None
       
   192         return node.firstChild
       
   193 
       
   194     def endDocument(self):
       
   195         self.lastEvent[1] = [(END_DOCUMENT, self.document), None]
       
   196         self.pop()
       
   197 
       
   198     def clear(self):
       
   199         "clear(): Explicitly release parsing structures"
       
   200         self.document = None
       
   201 
       
   202 class ErrorHandler:
       
   203     def warning(self, exception):
       
   204         print exception
       
   205     def error(self, exception):
       
   206         raise exception
       
   207     def fatalError(self, exception):
       
   208         raise exception
       
   209 
       
   210 class DOMEventStream:
       
   211     def __init__(self, stream, parser, bufsize):
       
   212         self.stream = stream
       
   213         self.parser = parser
       
   214         self.bufsize = bufsize
       
   215         if not hasattr(self.parser, 'feed'):
       
   216             self.getEvent = self._slurp
       
   217         self.reset()
       
   218 
       
   219     def reset(self):
       
   220         self.pulldom = PullDOM()
       
   221         # This content handler relies on namespace support
       
   222         self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
       
   223         self.parser.setContentHandler(self.pulldom)
       
   224 
       
   225     def __getitem__(self, pos):
       
   226         rc = self.getEvent()
       
   227         if rc:
       
   228             return rc
       
   229         raise IndexError
       
   230 
       
   231     def next(self):
       
   232         rc = self.getEvent()
       
   233         if rc:
       
   234             return rc
       
   235         raise StopIteration
       
   236 
       
   237     def __iter__(self):
       
   238         return self
       
   239 
       
   240     def expandNode(self, node):
       
   241         event = self.getEvent()
       
   242         parents = [node]
       
   243         while event:
       
   244             token, cur_node = event
       
   245             if cur_node is node:
       
   246                 return
       
   247             if token != END_ELEMENT:
       
   248                 parents[-1].appendChild(cur_node)
       
   249             if token == START_ELEMENT:
       
   250                 parents.append(cur_node)
       
   251             elif token == END_ELEMENT:
       
   252                 del parents[-1]
       
   253             event = self.getEvent()
       
   254 
       
   255     def getEvent(self):
       
   256         # use IncrementalParser interface, so we get the desired
       
   257         # pull effect
       
   258         if not self.pulldom.firstEvent[1]:
       
   259             self.pulldom.lastEvent = self.pulldom.firstEvent
       
   260         while not self.pulldom.firstEvent[1]:
       
   261             buf = self.stream.read(self.bufsize)
       
   262             if not buf:
       
   263                 self.parser.close()
       
   264                 return None
       
   265             self.parser.feed(buf)
       
   266         rc = self.pulldom.firstEvent[1][0]
       
   267         self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
       
   268         return rc
       
   269 
       
   270     def _slurp(self):
       
   271         """ Fallback replacement for getEvent() using the
       
   272             standard SAX2 interface, which means we slurp the
       
   273             SAX events into memory (no performance gain, but
       
   274             we are compatible to all SAX parsers).
       
   275         """
       
   276         self.parser.parse(self.stream)
       
   277         self.getEvent = self._emit
       
   278         return self._emit()
       
   279 
       
   280     def _emit(self):
       
   281         """ Fallback replacement for getEvent() that emits
       
   282             the events that _slurp() read previously.
       
   283         """
       
   284         rc = self.pulldom.firstEvent[1][0]
       
   285         self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
       
   286         return rc
       
   287 
       
   288     def clear(self):
       
   289         """clear(): Explicitly release parsing objects"""
       
   290         self.pulldom.clear()
       
   291         del self.pulldom
       
   292         self.parser = None
       
   293         self.stream = None
       
   294 
       
   295 class SAX2DOM(PullDOM):
       
   296 
       
   297     def startElementNS(self, name, tagName , attrs):
       
   298         PullDOM.startElementNS(self, name, tagName, attrs)
       
   299         curNode = self.elementStack[-1]
       
   300         parentNode = self.elementStack[-2]
       
   301         parentNode.appendChild(curNode)
       
   302 
       
   303     def startElement(self, name, attrs):
       
   304         PullDOM.startElement(self, name, attrs)
       
   305         curNode = self.elementStack[-1]
       
   306         parentNode = self.elementStack[-2]
       
   307         parentNode.appendChild(curNode)
       
   308 
       
   309     def processingInstruction(self, target, data):
       
   310         PullDOM.processingInstruction(self, target, data)
       
   311         node = self.lastEvent[0][1]
       
   312         parentNode = self.elementStack[-1]
       
   313         parentNode.appendChild(node)
       
   314 
       
   315     def ignorableWhitespace(self, chars):
       
   316         PullDOM.ignorableWhitespace(self, chars)
       
   317         node = self.lastEvent[0][1]
       
   318         parentNode = self.elementStack[-1]
       
   319         parentNode.appendChild(node)
       
   320 
       
   321     def characters(self, chars):
       
   322         PullDOM.characters(self, chars)
       
   323         node = self.lastEvent[0][1]
       
   324         parentNode = self.elementStack[-1]
       
   325         parentNode.appendChild(node)
       
   326 
       
   327 
       
   328 default_bufsize = (2 ** 14) - 20
       
   329 
       
   330 def parse(stream_or_string, parser=None, bufsize=None):
       
   331     if bufsize is None:
       
   332         bufsize = default_bufsize
       
   333     if type(stream_or_string) in _StringTypes:
       
   334         stream = open(stream_or_string)
       
   335     else:
       
   336         stream = stream_or_string
       
   337     if not parser:
       
   338         parser = xml.sax.make_parser()
       
   339     return DOMEventStream(stream, parser, bufsize)
       
   340 
       
   341 def parseString(string, parser=None):
       
   342     try:
       
   343         from cStringIO import StringIO
       
   344     except ImportError:
       
   345         from StringIO import StringIO
       
   346 
       
   347     bufsize = len(string)
       
   348     buf = StringIO(string)
       
   349     if not parser:
       
   350         parser = xml.sax.make_parser()
       
   351     return DOMEventStream(buf, parser, bufsize)