python-2.5.2/win32/Lib/xml/etree/ElementTree.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 #
       
     2 # ElementTree
       
     3 # $Id: ElementTree.py 2326 2005-03-17 07:45:21Z fredrik $
       
     4 #
       
     5 # light-weight XML support for Python 1.5.2 and later.
       
     6 #
       
     7 # history:
       
     8 # 2001-10-20 fl   created (from various sources)
       
     9 # 2001-11-01 fl   return root from parse method
       
    10 # 2002-02-16 fl   sort attributes in lexical order
       
    11 # 2002-04-06 fl   TreeBuilder refactoring, added PythonDoc markup
       
    12 # 2002-05-01 fl   finished TreeBuilder refactoring
       
    13 # 2002-07-14 fl   added basic namespace support to ElementTree.write
       
    14 # 2002-07-25 fl   added QName attribute support
       
    15 # 2002-10-20 fl   fixed encoding in write
       
    16 # 2002-11-24 fl   changed default encoding to ascii; fixed attribute encoding
       
    17 # 2002-11-27 fl   accept file objects or file names for parse/write
       
    18 # 2002-12-04 fl   moved XMLTreeBuilder back to this module
       
    19 # 2003-01-11 fl   fixed entity encoding glitch for us-ascii
       
    20 # 2003-02-13 fl   added XML literal factory
       
    21 # 2003-02-21 fl   added ProcessingInstruction/PI factory
       
    22 # 2003-05-11 fl   added tostring/fromstring helpers
       
    23 # 2003-05-26 fl   added ElementPath support
       
    24 # 2003-07-05 fl   added makeelement factory method
       
    25 # 2003-07-28 fl   added more well-known namespace prefixes
       
    26 # 2003-08-15 fl   fixed typo in ElementTree.findtext (Thomas Dartsch)
       
    27 # 2003-09-04 fl   fall back on emulator if ElementPath is not installed
       
    28 # 2003-10-31 fl   markup updates
       
    29 # 2003-11-15 fl   fixed nested namespace bug
       
    30 # 2004-03-28 fl   added XMLID helper
       
    31 # 2004-06-02 fl   added default support to findtext
       
    32 # 2004-06-08 fl   fixed encoding of non-ascii element/attribute names
       
    33 # 2004-08-23 fl   take advantage of post-2.1 expat features
       
    34 # 2005-02-01 fl   added iterparse implementation
       
    35 # 2005-03-02 fl   fixed iterparse support for pre-2.2 versions
       
    36 #
       
    37 # Copyright (c) 1999-2005 by Fredrik Lundh.  All rights reserved.
       
    38 #
       
    39 # fredrik@pythonware.com
       
    40 # http://www.pythonware.com
       
    41 #
       
    42 # --------------------------------------------------------------------
       
    43 # The ElementTree toolkit is
       
    44 #
       
    45 # Copyright (c) 1999-2005 by Fredrik Lundh
       
    46 #
       
    47 # By obtaining, using, and/or copying this software and/or its
       
    48 # associated documentation, you agree that you have read, understood,
       
    49 # and will comply with the following terms and conditions:
       
    50 #
       
    51 # Permission to use, copy, modify, and distribute this software and
       
    52 # its associated documentation for any purpose and without fee is
       
    53 # hereby granted, provided that the above copyright notice appears in
       
    54 # all copies, and that both that copyright notice and this permission
       
    55 # notice appear in supporting documentation, and that the name of
       
    56 # Secret Labs AB or the author not be used in advertising or publicity
       
    57 # pertaining to distribution of the software without specific, written
       
    58 # prior permission.
       
    59 #
       
    60 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
       
    61 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
       
    62 # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
       
    63 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
       
    64 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
       
    65 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
       
    66 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
       
    67 # OF THIS SOFTWARE.
       
    68 # --------------------------------------------------------------------
       
    69 
       
    70 # Licensed to PSF under a Contributor Agreement.
       
    71 # See http://www.python.org/2.4/license for licensing details.
       
    72 
       
    73 __all__ = [
       
    74     # public symbols
       
    75     "Comment",
       
    76     "dump",
       
    77     "Element", "ElementTree",
       
    78     "fromstring",
       
    79     "iselement", "iterparse",
       
    80     "parse",
       
    81     "PI", "ProcessingInstruction",
       
    82     "QName",
       
    83     "SubElement",
       
    84     "tostring",
       
    85     "TreeBuilder",
       
    86     "VERSION", "XML",
       
    87     "XMLParser", "XMLTreeBuilder",
       
    88     ]
       
    89 
       
    90 ##
       
    91 # The <b>Element</b> type is a flexible container object, designed to
       
    92 # store hierarchical data structures in memory. The type can be
       
    93 # described as a cross between a list and a dictionary.
       
    94 # <p>
       
    95 # Each element has a number of properties associated with it:
       
    96 # <ul>
       
    97 # <li>a <i>tag</i>. This is a string identifying what kind of data
       
    98 # this element represents (the element type, in other words).</li>
       
    99 # <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
       
   100 # <li>a <i>text</i> string.</li>
       
   101 # <li>an optional <i>tail</i> string.</li>
       
   102 # <li>a number of <i>child elements</i>, stored in a Python sequence</li>
       
   103 # </ul>
       
   104 #
       
   105 # To create an element instance, use the {@link #Element} or {@link
       
   106 # #SubElement} factory functions.
       
   107 # <p>
       
   108 # The {@link #ElementTree} class can be used to wrap an element
       
   109 # structure, and convert it from and to XML.
       
   110 ##
       
   111 
       
   112 import string, sys, re
       
   113 
       
   114 class _SimpleElementPath:
       
   115     # emulate pre-1.2 find/findtext/findall behaviour
       
   116     def find(self, element, tag):
       
   117         for elem in element:
       
   118             if elem.tag == tag:
       
   119                 return elem
       
   120         return None
       
   121     def findtext(self, element, tag, default=None):
       
   122         for elem in element:
       
   123             if elem.tag == tag:
       
   124                 return elem.text or ""
       
   125         return default
       
   126     def findall(self, element, tag):
       
   127         if tag[:3] == ".//":
       
   128             return element.getiterator(tag[3:])
       
   129         result = []
       
   130         for elem in element:
       
   131             if elem.tag == tag:
       
   132                 result.append(elem)
       
   133         return result
       
   134 
       
   135 try:
       
   136     import ElementPath
       
   137 except ImportError:
       
   138     # FIXME: issue warning in this case?
       
   139     ElementPath = _SimpleElementPath()
       
   140 
       
   141 # TODO: add support for custom namespace resolvers/default namespaces
       
   142 # TODO: add improved support for incremental parsing
       
   143 
       
   144 VERSION = "1.2.6"
       
   145 
       
   146 ##
       
   147 # Internal element class.  This class defines the Element interface,
       
   148 # and provides a reference implementation of this interface.
       
   149 # <p>
       
   150 # You should not create instances of this class directly.  Use the
       
   151 # appropriate factory functions instead, such as {@link #Element}
       
   152 # and {@link #SubElement}.
       
   153 #
       
   154 # @see Element
       
   155 # @see SubElement
       
   156 # @see Comment
       
   157 # @see ProcessingInstruction
       
   158 
       
   159 class _ElementInterface:
       
   160     # <tag attrib>text<child/>...</tag>tail
       
   161 
       
   162     ##
       
   163     # (Attribute) Element tag.
       
   164 
       
   165     tag = None
       
   166 
       
   167     ##
       
   168     # (Attribute) Element attribute dictionary.  Where possible, use
       
   169     # {@link #_ElementInterface.get},
       
   170     # {@link #_ElementInterface.set},
       
   171     # {@link #_ElementInterface.keys}, and
       
   172     # {@link #_ElementInterface.items} to access
       
   173     # element attributes.
       
   174 
       
   175     attrib = None
       
   176 
       
   177     ##
       
   178     # (Attribute) Text before first subelement.  This is either a
       
   179     # string or the value None, if there was no text.
       
   180 
       
   181     text = None
       
   182 
       
   183     ##
       
   184     # (Attribute) Text after this element's end tag, but before the
       
   185     # next sibling element's start tag.  This is either a string or
       
   186     # the value None, if there was no text.
       
   187 
       
   188     tail = None # text after end tag, if any
       
   189 
       
   190     def __init__(self, tag, attrib):
       
   191         self.tag = tag
       
   192         self.attrib = attrib
       
   193         self._children = []
       
   194 
       
   195     def __repr__(self):
       
   196         return "<Element %s at %x>" % (self.tag, id(self))
       
   197 
       
   198     ##
       
   199     # Creates a new element object of the same type as this element.
       
   200     #
       
   201     # @param tag Element tag.
       
   202     # @param attrib Element attributes, given as a dictionary.
       
   203     # @return A new element instance.
       
   204 
       
   205     def makeelement(self, tag, attrib):
       
   206         return Element(tag, attrib)
       
   207 
       
   208     ##
       
   209     # Returns the number of subelements.
       
   210     #
       
   211     # @return The number of subelements.
       
   212 
       
   213     def __len__(self):
       
   214         return len(self._children)
       
   215 
       
   216     ##
       
   217     # Returns the given subelement.
       
   218     #
       
   219     # @param index What subelement to return.
       
   220     # @return The given subelement.
       
   221     # @exception IndexError If the given element does not exist.
       
   222 
       
   223     def __getitem__(self, index):
       
   224         return self._children[index]
       
   225 
       
   226     ##
       
   227     # Replaces the given subelement.
       
   228     #
       
   229     # @param index What subelement to replace.
       
   230     # @param element The new element value.
       
   231     # @exception IndexError If the given element does not exist.
       
   232     # @exception AssertionError If element is not a valid object.
       
   233 
       
   234     def __setitem__(self, index, element):
       
   235         assert iselement(element)
       
   236         self._children[index] = element
       
   237 
       
   238     ##
       
   239     # Deletes the given subelement.
       
   240     #
       
   241     # @param index What subelement to delete.
       
   242     # @exception IndexError If the given element does not exist.
       
   243 
       
   244     def __delitem__(self, index):
       
   245         del self._children[index]
       
   246 
       
   247     ##
       
   248     # Returns a list containing subelements in the given range.
       
   249     #
       
   250     # @param start The first subelement to return.
       
   251     # @param stop The first subelement that shouldn't be returned.
       
   252     # @return A sequence object containing subelements.
       
   253 
       
   254     def __getslice__(self, start, stop):
       
   255         return self._children[start:stop]
       
   256 
       
   257     ##
       
   258     # Replaces a number of subelements with elements from a sequence.
       
   259     #
       
   260     # @param start The first subelement to replace.
       
   261     # @param stop The first subelement that shouldn't be replaced.
       
   262     # @param elements A sequence object with zero or more elements.
       
   263     # @exception AssertionError If a sequence member is not a valid object.
       
   264 
       
   265     def __setslice__(self, start, stop, elements):
       
   266         for element in elements:
       
   267             assert iselement(element)
       
   268         self._children[start:stop] = list(elements)
       
   269 
       
   270     ##
       
   271     # Deletes a number of subelements.
       
   272     #
       
   273     # @param start The first subelement to delete.
       
   274     # @param stop The first subelement to leave in there.
       
   275 
       
   276     def __delslice__(self, start, stop):
       
   277         del self._children[start:stop]
       
   278 
       
   279     ##
       
   280     # Adds a subelement to the end of this element.
       
   281     #
       
   282     # @param element The element to add.
       
   283     # @exception AssertionError If a sequence member is not a valid object.
       
   284 
       
   285     def append(self, element):
       
   286         assert iselement(element)
       
   287         self._children.append(element)
       
   288 
       
   289     ##
       
   290     # Inserts a subelement at the given position in this element.
       
   291     #
       
   292     # @param index Where to insert the new subelement.
       
   293     # @exception AssertionError If the element is not a valid object.
       
   294 
       
   295     def insert(self, index, element):
       
   296         assert iselement(element)
       
   297         self._children.insert(index, element)
       
   298 
       
   299     ##
       
   300     # Removes a matching subelement.  Unlike the <b>find</b> methods,
       
   301     # this method compares elements based on identity, not on tag
       
   302     # value or contents.
       
   303     #
       
   304     # @param element What element to remove.
       
   305     # @exception ValueError If a matching element could not be found.
       
   306     # @exception AssertionError If the element is not a valid object.
       
   307 
       
   308     def remove(self, element):
       
   309         assert iselement(element)
       
   310         self._children.remove(element)
       
   311 
       
   312     ##
       
   313     # Returns all subelements.  The elements are returned in document
       
   314     # order.
       
   315     #
       
   316     # @return A list of subelements.
       
   317     # @defreturn list of Element instances
       
   318 
       
   319     def getchildren(self):
       
   320         return self._children
       
   321 
       
   322     ##
       
   323     # Finds the first matching subelement, by tag name or path.
       
   324     #
       
   325     # @param path What element to look for.
       
   326     # @return The first matching element, or None if no element was found.
       
   327     # @defreturn Element or None
       
   328 
       
   329     def find(self, path):
       
   330         return ElementPath.find(self, path)
       
   331 
       
   332     ##
       
   333     # Finds text for the first matching subelement, by tag name or path.
       
   334     #
       
   335     # @param path What element to look for.
       
   336     # @param default What to return if the element was not found.
       
   337     # @return The text content of the first matching element, or the
       
   338     #     default value no element was found.  Note that if the element
       
   339     #     has is found, but has no text content, this method returns an
       
   340     #     empty string.
       
   341     # @defreturn string
       
   342 
       
   343     def findtext(self, path, default=None):
       
   344         return ElementPath.findtext(self, path, default)
       
   345 
       
   346     ##
       
   347     # Finds all matching subelements, by tag name or path.
       
   348     #
       
   349     # @param path What element to look for.
       
   350     # @return A list or iterator containing all matching elements,
       
   351     #    in document order.
       
   352     # @defreturn list of Element instances
       
   353 
       
   354     def findall(self, path):
       
   355         return ElementPath.findall(self, path)
       
   356 
       
   357     ##
       
   358     # Resets an element.  This function removes all subelements, clears
       
   359     # all attributes, and sets the text and tail attributes to None.
       
   360 
       
   361     def clear(self):
       
   362         self.attrib.clear()
       
   363         self._children = []
       
   364         self.text = self.tail = None
       
   365 
       
   366     ##
       
   367     # Gets an element attribute.
       
   368     #
       
   369     # @param key What attribute to look for.
       
   370     # @param default What to return if the attribute was not found.
       
   371     # @return The attribute value, or the default value, if the
       
   372     #     attribute was not found.
       
   373     # @defreturn string or None
       
   374 
       
   375     def get(self, key, default=None):
       
   376         return self.attrib.get(key, default)
       
   377 
       
   378     ##
       
   379     # Sets an element attribute.
       
   380     #
       
   381     # @param key What attribute to set.
       
   382     # @param value The attribute value.
       
   383 
       
   384     def set(self, key, value):
       
   385         self.attrib[key] = value
       
   386 
       
   387     ##
       
   388     # Gets a list of attribute names.  The names are returned in an
       
   389     # arbitrary order (just like for an ordinary Python dictionary).
       
   390     #
       
   391     # @return A list of element attribute names.
       
   392     # @defreturn list of strings
       
   393 
       
   394     def keys(self):
       
   395         return self.attrib.keys()
       
   396 
       
   397     ##
       
   398     # Gets element attributes, as a sequence.  The attributes are
       
   399     # returned in an arbitrary order.
       
   400     #
       
   401     # @return A list of (name, value) tuples for all attributes.
       
   402     # @defreturn list of (string, string) tuples
       
   403 
       
   404     def items(self):
       
   405         return self.attrib.items()
       
   406 
       
   407     ##
       
   408     # Creates a tree iterator.  The iterator loops over this element
       
   409     # and all subelements, in document order, and returns all elements
       
   410     # with a matching tag.
       
   411     # <p>
       
   412     # If the tree structure is modified during iteration, the result
       
   413     # is undefined.
       
   414     #
       
   415     # @param tag What tags to look for (default is to return all elements).
       
   416     # @return A list or iterator containing all the matching elements.
       
   417     # @defreturn list or iterator
       
   418 
       
   419     def getiterator(self, tag=None):
       
   420         nodes = []
       
   421         if tag == "*":
       
   422             tag = None
       
   423         if tag is None or self.tag == tag:
       
   424             nodes.append(self)
       
   425         for node in self._children:
       
   426             nodes.extend(node.getiterator(tag))
       
   427         return nodes
       
   428 
       
   429 # compatibility
       
   430 _Element = _ElementInterface
       
   431 
       
   432 ##
       
   433 # Element factory.  This function returns an object implementing the
       
   434 # standard Element interface.  The exact class or type of that object
       
   435 # is implementation dependent, but it will always be compatible with
       
   436 # the {@link #_ElementInterface} class in this module.
       
   437 # <p>
       
   438 # The element name, attribute names, and attribute values can be
       
   439 # either 8-bit ASCII strings or Unicode strings.
       
   440 #
       
   441 # @param tag The element name.
       
   442 # @param attrib An optional dictionary, containing element attributes.
       
   443 # @param **extra Additional attributes, given as keyword arguments.
       
   444 # @return An element instance.
       
   445 # @defreturn Element
       
   446 
       
   447 def Element(tag, attrib={}, **extra):
       
   448     attrib = attrib.copy()
       
   449     attrib.update(extra)
       
   450     return _ElementInterface(tag, attrib)
       
   451 
       
   452 ##
       
   453 # Subelement factory.  This function creates an element instance, and
       
   454 # appends it to an existing element.
       
   455 # <p>
       
   456 # The element name, attribute names, and attribute values can be
       
   457 # either 8-bit ASCII strings or Unicode strings.
       
   458 #
       
   459 # @param parent The parent element.
       
   460 # @param tag The subelement name.
       
   461 # @param attrib An optional dictionary, containing element attributes.
       
   462 # @param **extra Additional attributes, given as keyword arguments.
       
   463 # @return An element instance.
       
   464 # @defreturn Element
       
   465 
       
   466 def SubElement(parent, tag, attrib={}, **extra):
       
   467     attrib = attrib.copy()
       
   468     attrib.update(extra)
       
   469     element = parent.makeelement(tag, attrib)
       
   470     parent.append(element)
       
   471     return element
       
   472 
       
   473 ##
       
   474 # Comment element factory.  This factory function creates a special
       
   475 # element that will be serialized as an XML comment.
       
   476 # <p>
       
   477 # The comment string can be either an 8-bit ASCII string or a Unicode
       
   478 # string.
       
   479 #
       
   480 # @param text A string containing the comment string.
       
   481 # @return An element instance, representing a comment.
       
   482 # @defreturn Element
       
   483 
       
   484 def Comment(text=None):
       
   485     element = Element(Comment)
       
   486     element.text = text
       
   487     return element
       
   488 
       
   489 ##
       
   490 # PI element factory.  This factory function creates a special element
       
   491 # that will be serialized as an XML processing instruction.
       
   492 #
       
   493 # @param target A string containing the PI target.
       
   494 # @param text A string containing the PI contents, if any.
       
   495 # @return An element instance, representing a PI.
       
   496 # @defreturn Element
       
   497 
       
   498 def ProcessingInstruction(target, text=None):
       
   499     element = Element(ProcessingInstruction)
       
   500     element.text = target
       
   501     if text:
       
   502         element.text = element.text + " " + text
       
   503     return element
       
   504 
       
   505 PI = ProcessingInstruction
       
   506 
       
   507 ##
       
   508 # QName wrapper.  This can be used to wrap a QName attribute value, in
       
   509 # order to get proper namespace handling on output.
       
   510 #
       
   511 # @param text A string containing the QName value, in the form {uri}local,
       
   512 #     or, if the tag argument is given, the URI part of a QName.
       
   513 # @param tag Optional tag.  If given, the first argument is interpreted as
       
   514 #     an URI, and this argument is interpreted as a local name.
       
   515 # @return An opaque object, representing the QName.
       
   516 
       
   517 class QName:
       
   518     def __init__(self, text_or_uri, tag=None):
       
   519         if tag:
       
   520             text_or_uri = "{%s}%s" % (text_or_uri, tag)
       
   521         self.text = text_or_uri
       
   522     def __str__(self):
       
   523         return self.text
       
   524     def __hash__(self):
       
   525         return hash(self.text)
       
   526     def __cmp__(self, other):
       
   527         if isinstance(other, QName):
       
   528             return cmp(self.text, other.text)
       
   529         return cmp(self.text, other)
       
   530 
       
   531 ##
       
   532 # ElementTree wrapper class.  This class represents an entire element
       
   533 # hierarchy, and adds some extra support for serialization to and from
       
   534 # standard XML.
       
   535 #
       
   536 # @param element Optional root element.
       
   537 # @keyparam file Optional file handle or name.  If given, the
       
   538 #     tree is initialized with the contents of this XML file.
       
   539 
       
   540 class ElementTree:
       
   541 
       
   542     def __init__(self, element=None, file=None):
       
   543         assert element is None or iselement(element)
       
   544         self._root = element # first node
       
   545         if file:
       
   546             self.parse(file)
       
   547 
       
   548     ##
       
   549     # Gets the root element for this tree.
       
   550     #
       
   551     # @return An element instance.
       
   552     # @defreturn Element
       
   553 
       
   554     def getroot(self):
       
   555         return self._root
       
   556 
       
   557     ##
       
   558     # Replaces the root element for this tree.  This discards the
       
   559     # current contents of the tree, and replaces it with the given
       
   560     # element.  Use with care.
       
   561     #
       
   562     # @param element An element instance.
       
   563 
       
   564     def _setroot(self, element):
       
   565         assert iselement(element)
       
   566         self._root = element
       
   567 
       
   568     ##
       
   569     # Loads an external XML document into this element tree.
       
   570     #
       
   571     # @param source A file name or file object.
       
   572     # @param parser An optional parser instance.  If not given, the
       
   573     #     standard {@link XMLTreeBuilder} parser is used.
       
   574     # @return The document root element.
       
   575     # @defreturn Element
       
   576 
       
   577     def parse(self, source, parser=None):
       
   578         if not hasattr(source, "read"):
       
   579             source = open(source, "rb")
       
   580         if not parser:
       
   581             parser = XMLTreeBuilder()
       
   582         while 1:
       
   583             data = source.read(32768)
       
   584             if not data:
       
   585                 break
       
   586             parser.feed(data)
       
   587         self._root = parser.close()
       
   588         return self._root
       
   589 
       
   590     ##
       
   591     # Creates a tree iterator for the root element.  The iterator loops
       
   592     # over all elements in this tree, in document order.
       
   593     #
       
   594     # @param tag What tags to look for (default is to return all elements)
       
   595     # @return An iterator.
       
   596     # @defreturn iterator
       
   597 
       
   598     def getiterator(self, tag=None):
       
   599         assert self._root is not None
       
   600         return self._root.getiterator(tag)
       
   601 
       
   602     ##
       
   603     # Finds the first toplevel element with given tag.
       
   604     # Same as getroot().find(path).
       
   605     #
       
   606     # @param path What element to look for.
       
   607     # @return The first matching element, or None if no element was found.
       
   608     # @defreturn Element or None
       
   609 
       
   610     def find(self, path):
       
   611         assert self._root is not None
       
   612         if path[:1] == "/":
       
   613             path = "." + path
       
   614         return self._root.find(path)
       
   615 
       
   616     ##
       
   617     # Finds the element text for the first toplevel element with given
       
   618     # tag.  Same as getroot().findtext(path).
       
   619     #
       
   620     # @param path What toplevel element to look for.
       
   621     # @param default What to return if the element was not found.
       
   622     # @return The text content of the first matching element, or the
       
   623     #     default value no element was found.  Note that if the element
       
   624     #     has is found, but has no text content, this method returns an
       
   625     #     empty string.
       
   626     # @defreturn string
       
   627 
       
   628     def findtext(self, path, default=None):
       
   629         assert self._root is not None
       
   630         if path[:1] == "/":
       
   631             path = "." + path
       
   632         return self._root.findtext(path, default)
       
   633 
       
   634     ##
       
   635     # Finds all toplevel elements with the given tag.
       
   636     # Same as getroot().findall(path).
       
   637     #
       
   638     # @param path What element to look for.
       
   639     # @return A list or iterator containing all matching elements,
       
   640     #    in document order.
       
   641     # @defreturn list of Element instances
       
   642 
       
   643     def findall(self, path):
       
   644         assert self._root is not None
       
   645         if path[:1] == "/":
       
   646             path = "." + path
       
   647         return self._root.findall(path)
       
   648 
       
   649     ##
       
   650     # Writes the element tree to a file, as XML.
       
   651     #
       
   652     # @param file A file name, or a file object opened for writing.
       
   653     # @param encoding Optional output encoding (default is US-ASCII).
       
   654 
       
   655     def write(self, file, encoding="us-ascii"):
       
   656         assert self._root is not None
       
   657         if not hasattr(file, "write"):
       
   658             file = open(file, "wb")
       
   659         if not encoding:
       
   660             encoding = "us-ascii"
       
   661         elif encoding != "utf-8" and encoding != "us-ascii":
       
   662             file.write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
       
   663         self._write(file, self._root, encoding, {})
       
   664 
       
   665     def _write(self, file, node, encoding, namespaces):
       
   666         # write XML to file
       
   667         tag = node.tag
       
   668         if tag is Comment:
       
   669             file.write("<!-- %s -->" % _escape_cdata(node.text, encoding))
       
   670         elif tag is ProcessingInstruction:
       
   671             file.write("<?%s?>" % _escape_cdata(node.text, encoding))
       
   672         else:
       
   673             items = node.items()
       
   674             xmlns_items = [] # new namespaces in this scope
       
   675             try:
       
   676                 if isinstance(tag, QName) or tag[:1] == "{":
       
   677                     tag, xmlns = fixtag(tag, namespaces)
       
   678                     if xmlns: xmlns_items.append(xmlns)
       
   679             except TypeError:
       
   680                 _raise_serialization_error(tag)
       
   681             file.write("<" + _encode(tag, encoding))
       
   682             if items or xmlns_items:
       
   683                 items.sort() # lexical order
       
   684                 for k, v in items:
       
   685                     try:
       
   686                         if isinstance(k, QName) or k[:1] == "{":
       
   687                             k, xmlns = fixtag(k, namespaces)
       
   688                             if xmlns: xmlns_items.append(xmlns)
       
   689                     except TypeError:
       
   690                         _raise_serialization_error(k)
       
   691                     try:
       
   692                         if isinstance(v, QName):
       
   693                             v, xmlns = fixtag(v, namespaces)
       
   694                             if xmlns: xmlns_items.append(xmlns)
       
   695                     except TypeError:
       
   696                         _raise_serialization_error(v)
       
   697                     file.write(" %s=\"%s\"" % (_encode(k, encoding),
       
   698                                                _escape_attrib(v, encoding)))
       
   699                 for k, v in xmlns_items:
       
   700                     file.write(" %s=\"%s\"" % (_encode(k, encoding),
       
   701                                                _escape_attrib(v, encoding)))
       
   702             if node.text or len(node):
       
   703                 file.write(">")
       
   704                 if node.text:
       
   705                     file.write(_escape_cdata(node.text, encoding))
       
   706                 for n in node:
       
   707                     self._write(file, n, encoding, namespaces)
       
   708                 file.write("</" + _encode(tag, encoding) + ">")
       
   709             else:
       
   710                 file.write(" />")
       
   711             for k, v in xmlns_items:
       
   712                 del namespaces[v]
       
   713         if node.tail:
       
   714             file.write(_escape_cdata(node.tail, encoding))
       
   715 
       
   716 # --------------------------------------------------------------------
       
   717 # helpers
       
   718 
       
   719 ##
       
   720 # Checks if an object appears to be a valid element object.
       
   721 #
       
   722 # @param An element instance.
       
   723 # @return A true value if this is an element object.
       
   724 # @defreturn flag
       
   725 
       
   726 def iselement(element):
       
   727     # FIXME: not sure about this; might be a better idea to look
       
   728     # for tag/attrib/text attributes
       
   729     return isinstance(element, _ElementInterface) or hasattr(element, "tag")
       
   730 
       
   731 ##
       
   732 # Writes an element tree or element structure to sys.stdout.  This
       
   733 # function should be used for debugging only.
       
   734 # <p>
       
   735 # The exact output format is implementation dependent.  In this
       
   736 # version, it's written as an ordinary XML file.
       
   737 #
       
   738 # @param elem An element tree or an individual element.
       
   739 
       
   740 def dump(elem):
       
   741     # debugging
       
   742     if not isinstance(elem, ElementTree):
       
   743         elem = ElementTree(elem)
       
   744     elem.write(sys.stdout)
       
   745     tail = elem.getroot().tail
       
   746     if not tail or tail[-1] != "\n":
       
   747         sys.stdout.write("\n")
       
   748 
       
   749 def _encode(s, encoding):
       
   750     try:
       
   751         return s.encode(encoding)
       
   752     except AttributeError:
       
   753         return s # 1.5.2: assume the string uses the right encoding
       
   754 
       
   755 if sys.version[:3] == "1.5":
       
   756     _escape = re.compile(r"[&<>\"\x80-\xff]+") # 1.5.2
       
   757 else:
       
   758     _escape = re.compile(eval(r'u"[&<>\"\u0080-\uffff]+"'))
       
   759 
       
   760 _escape_map = {
       
   761     "&": "&amp;",
       
   762     "<": "&lt;",
       
   763     ">": "&gt;",
       
   764     '"': "&quot;",
       
   765 }
       
   766 
       
   767 _namespace_map = {
       
   768     # "well-known" namespace prefixes
       
   769     "http://www.w3.org/XML/1998/namespace": "xml",
       
   770     "http://www.w3.org/1999/xhtml": "html",
       
   771     "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
       
   772     "http://schemas.xmlsoap.org/wsdl/": "wsdl",
       
   773 }
       
   774 
       
   775 def _raise_serialization_error(text):
       
   776     raise TypeError(
       
   777         "cannot serialize %r (type %s)" % (text, type(text).__name__)
       
   778         )
       
   779 
       
   780 def _encode_entity(text, pattern=_escape):
       
   781     # map reserved and non-ascii characters to numerical entities
       
   782     def escape_entities(m, map=_escape_map):
       
   783         out = []
       
   784         append = out.append
       
   785         for char in m.group():
       
   786             text = map.get(char)
       
   787             if text is None:
       
   788                 text = "&#%d;" % ord(char)
       
   789             append(text)
       
   790         return string.join(out, "")
       
   791     try:
       
   792         return _encode(pattern.sub(escape_entities, text), "ascii")
       
   793     except TypeError:
       
   794         _raise_serialization_error(text)
       
   795 
       
   796 #
       
   797 # the following functions assume an ascii-compatible encoding
       
   798 # (or "utf-16")
       
   799 
       
   800 def _escape_cdata(text, encoding=None, replace=string.replace):
       
   801     # escape character data
       
   802     try:
       
   803         if encoding:
       
   804             try:
       
   805                 text = _encode(text, encoding)
       
   806             except UnicodeError:
       
   807                 return _encode_entity(text)
       
   808         text = replace(text, "&", "&amp;")
       
   809         text = replace(text, "<", "&lt;")
       
   810         text = replace(text, ">", "&gt;")
       
   811         return text
       
   812     except (TypeError, AttributeError):
       
   813         _raise_serialization_error(text)
       
   814 
       
   815 def _escape_attrib(text, encoding=None, replace=string.replace):
       
   816     # escape attribute value
       
   817     try:
       
   818         if encoding:
       
   819             try:
       
   820                 text = _encode(text, encoding)
       
   821             except UnicodeError:
       
   822                 return _encode_entity(text)
       
   823         text = replace(text, "&", "&amp;")
       
   824         text = replace(text, "'", "&apos;") # FIXME: overkill
       
   825         text = replace(text, "\"", "&quot;")
       
   826         text = replace(text, "<", "&lt;")
       
   827         text = replace(text, ">", "&gt;")
       
   828         return text
       
   829     except (TypeError, AttributeError):
       
   830         _raise_serialization_error(text)
       
   831 
       
   832 def fixtag(tag, namespaces):
       
   833     # given a decorated tag (of the form {uri}tag), return prefixed
       
   834     # tag and namespace declaration, if any
       
   835     if isinstance(tag, QName):
       
   836         tag = tag.text
       
   837     namespace_uri, tag = string.split(tag[1:], "}", 1)
       
   838     prefix = namespaces.get(namespace_uri)
       
   839     if prefix is None:
       
   840         prefix = _namespace_map.get(namespace_uri)
       
   841         if prefix is None:
       
   842             prefix = "ns%d" % len(namespaces)
       
   843         namespaces[namespace_uri] = prefix
       
   844         if prefix == "xml":
       
   845             xmlns = None
       
   846         else:
       
   847             xmlns = ("xmlns:%s" % prefix, namespace_uri)
       
   848     else:
       
   849         xmlns = None
       
   850     return "%s:%s" % (prefix, tag), xmlns
       
   851 
       
   852 ##
       
   853 # Parses an XML document into an element tree.
       
   854 #
       
   855 # @param source A filename or file object containing XML data.
       
   856 # @param parser An optional parser instance.  If not given, the
       
   857 #     standard {@link XMLTreeBuilder} parser is used.
       
   858 # @return An ElementTree instance
       
   859 
       
   860 def parse(source, parser=None):
       
   861     tree = ElementTree()
       
   862     tree.parse(source, parser)
       
   863     return tree
       
   864 
       
   865 ##
       
   866 # Parses an XML document into an element tree incrementally, and reports
       
   867 # what's going on to the user.
       
   868 #
       
   869 # @param source A filename or file object containing XML data.
       
   870 # @param events A list of events to report back.  If omitted, only "end"
       
   871 #     events are reported.
       
   872 # @return A (event, elem) iterator.
       
   873 
       
   874 class iterparse:
       
   875 
       
   876     def __init__(self, source, events=None):
       
   877         if not hasattr(source, "read"):
       
   878             source = open(source, "rb")
       
   879         self._file = source
       
   880         self._events = []
       
   881         self._index = 0
       
   882         self.root = self._root = None
       
   883         self._parser = XMLTreeBuilder()
       
   884         # wire up the parser for event reporting
       
   885         parser = self._parser._parser
       
   886         append = self._events.append
       
   887         if events is None:
       
   888             events = ["end"]
       
   889         for event in events:
       
   890             if event == "start":
       
   891                 try:
       
   892                     parser.ordered_attributes = 1
       
   893                     parser.specified_attributes = 1
       
   894                     def handler(tag, attrib_in, event=event, append=append,
       
   895                                 start=self._parser._start_list):
       
   896                         append((event, start(tag, attrib_in)))
       
   897                     parser.StartElementHandler = handler
       
   898                 except AttributeError:
       
   899                     def handler(tag, attrib_in, event=event, append=append,
       
   900                                 start=self._parser._start):
       
   901                         append((event, start(tag, attrib_in)))
       
   902                     parser.StartElementHandler = handler
       
   903             elif event == "end":
       
   904                 def handler(tag, event=event, append=append,
       
   905                             end=self._parser._end):
       
   906                     append((event, end(tag)))
       
   907                 parser.EndElementHandler = handler
       
   908             elif event == "start-ns":
       
   909                 def handler(prefix, uri, event=event, append=append):
       
   910                     try:
       
   911                         uri = _encode(uri, "ascii")
       
   912                     except UnicodeError:
       
   913                         pass
       
   914                     append((event, (prefix or "", uri)))
       
   915                 parser.StartNamespaceDeclHandler = handler
       
   916             elif event == "end-ns":
       
   917                 def handler(prefix, event=event, append=append):
       
   918                     append((event, None))
       
   919                 parser.EndNamespaceDeclHandler = handler
       
   920 
       
   921     def next(self):
       
   922         while 1:
       
   923             try:
       
   924                 item = self._events[self._index]
       
   925             except IndexError:
       
   926                 if self._parser is None:
       
   927                     self.root = self._root
       
   928                     try:
       
   929                         raise StopIteration
       
   930                     except NameError:
       
   931                         raise IndexError
       
   932                 # load event buffer
       
   933                 del self._events[:]
       
   934                 self._index = 0
       
   935                 data = self._file.read(16384)
       
   936                 if data:
       
   937                     self._parser.feed(data)
       
   938                 else:
       
   939                     self._root = self._parser.close()
       
   940                     self._parser = None
       
   941             else:
       
   942                 self._index = self._index + 1
       
   943                 return item
       
   944 
       
   945     try:
       
   946         iter
       
   947         def __iter__(self):
       
   948             return self
       
   949     except NameError:
       
   950         def __getitem__(self, index):
       
   951             return self.next()
       
   952 
       
   953 ##
       
   954 # Parses an XML document from a string constant.  This function can
       
   955 # be used to embed "XML literals" in Python code.
       
   956 #
       
   957 # @param source A string containing XML data.
       
   958 # @return An Element instance.
       
   959 # @defreturn Element
       
   960 
       
   961 def XML(text):
       
   962     parser = XMLTreeBuilder()
       
   963     parser.feed(text)
       
   964     return parser.close()
       
   965 
       
   966 ##
       
   967 # Parses an XML document from a string constant, and also returns
       
   968 # a dictionary which maps from element id:s to elements.
       
   969 #
       
   970 # @param source A string containing XML data.
       
   971 # @return A tuple containing an Element instance and a dictionary.
       
   972 # @defreturn (Element, dictionary)
       
   973 
       
   974 def XMLID(text):
       
   975     parser = XMLTreeBuilder()
       
   976     parser.feed(text)
       
   977     tree = parser.close()
       
   978     ids = {}
       
   979     for elem in tree.getiterator():
       
   980         id = elem.get("id")
       
   981         if id:
       
   982             ids[id] = elem
       
   983     return tree, ids
       
   984 
       
   985 ##
       
   986 # Parses an XML document from a string constant.  Same as {@link #XML}.
       
   987 #
       
   988 # @def fromstring(text)
       
   989 # @param source A string containing XML data.
       
   990 # @return An Element instance.
       
   991 # @defreturn Element
       
   992 
       
   993 fromstring = XML
       
   994 
       
   995 ##
       
   996 # Generates a string representation of an XML element, including all
       
   997 # subelements.
       
   998 #
       
   999 # @param element An Element instance.
       
  1000 # @return An encoded string containing the XML data.
       
  1001 # @defreturn string
       
  1002 
       
  1003 def tostring(element, encoding=None):
       
  1004     class dummy:
       
  1005         pass
       
  1006     data = []
       
  1007     file = dummy()
       
  1008     file.write = data.append
       
  1009     ElementTree(element).write(file, encoding)
       
  1010     return string.join(data, "")
       
  1011 
       
  1012 ##
       
  1013 # Generic element structure builder.  This builder converts a sequence
       
  1014 # of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
       
  1015 # #TreeBuilder.end} method calls to a well-formed element structure.
       
  1016 # <p>
       
  1017 # You can use this class to build an element structure using a custom XML
       
  1018 # parser, or a parser for some other XML-like format.
       
  1019 #
       
  1020 # @param element_factory Optional element factory.  This factory
       
  1021 #    is called to create new Element instances, as necessary.
       
  1022 
       
  1023 class TreeBuilder:
       
  1024 
       
  1025     def __init__(self, element_factory=None):
       
  1026         self._data = [] # data collector
       
  1027         self._elem = [] # element stack
       
  1028         self._last = None # last element
       
  1029         self._tail = None # true if we're after an end tag
       
  1030         if element_factory is None:
       
  1031             element_factory = _ElementInterface
       
  1032         self._factory = element_factory
       
  1033 
       
  1034     ##
       
  1035     # Flushes the parser buffers, and returns the toplevel documen
       
  1036     # element.
       
  1037     #
       
  1038     # @return An Element instance.
       
  1039     # @defreturn Element
       
  1040 
       
  1041     def close(self):
       
  1042         assert len(self._elem) == 0, "missing end tags"
       
  1043         assert self._last != None, "missing toplevel element"
       
  1044         return self._last
       
  1045 
       
  1046     def _flush(self):
       
  1047         if self._data:
       
  1048             if self._last is not None:
       
  1049                 text = string.join(self._data, "")
       
  1050                 if self._tail:
       
  1051                     assert self._last.tail is None, "internal error (tail)"
       
  1052                     self._last.tail = text
       
  1053                 else:
       
  1054                     assert self._last.text is None, "internal error (text)"
       
  1055                     self._last.text = text
       
  1056             self._data = []
       
  1057 
       
  1058     ##
       
  1059     # Adds text to the current element.
       
  1060     #
       
  1061     # @param data A string.  This should be either an 8-bit string
       
  1062     #    containing ASCII text, or a Unicode string.
       
  1063 
       
  1064     def data(self, data):
       
  1065         self._data.append(data)
       
  1066 
       
  1067     ##
       
  1068     # Opens a new element.
       
  1069     #
       
  1070     # @param tag The element name.
       
  1071     # @param attrib A dictionary containing element attributes.
       
  1072     # @return The opened element.
       
  1073     # @defreturn Element
       
  1074 
       
  1075     def start(self, tag, attrs):
       
  1076         self._flush()
       
  1077         self._last = elem = self._factory(tag, attrs)
       
  1078         if self._elem:
       
  1079             self._elem[-1].append(elem)
       
  1080         self._elem.append(elem)
       
  1081         self._tail = 0
       
  1082         return elem
       
  1083 
       
  1084     ##
       
  1085     # Closes the current element.
       
  1086     #
       
  1087     # @param tag The element name.
       
  1088     # @return The closed element.
       
  1089     # @defreturn Element
       
  1090 
       
  1091     def end(self, tag):
       
  1092         self._flush()
       
  1093         self._last = self._elem.pop()
       
  1094         assert self._last.tag == tag,\
       
  1095                "end tag mismatch (expected %s, got %s)" % (
       
  1096                    self._last.tag, tag)
       
  1097         self._tail = 1
       
  1098         return self._last
       
  1099 
       
  1100 ##
       
  1101 # Element structure builder for XML source data, based on the
       
  1102 # <b>expat</b> parser.
       
  1103 #
       
  1104 # @keyparam target Target object.  If omitted, the builder uses an
       
  1105 #     instance of the standard {@link #TreeBuilder} class.
       
  1106 # @keyparam html Predefine HTML entities.  This flag is not supported
       
  1107 #     by the current implementation.
       
  1108 # @see #ElementTree
       
  1109 # @see #TreeBuilder
       
  1110 
       
  1111 class XMLTreeBuilder:
       
  1112 
       
  1113     def __init__(self, html=0, target=None):
       
  1114         try:
       
  1115             from xml.parsers import expat
       
  1116         except ImportError:
       
  1117             raise ImportError(
       
  1118                 "No module named expat; use SimpleXMLTreeBuilder instead"
       
  1119                 )
       
  1120         self._parser = parser = expat.ParserCreate(None, "}")
       
  1121         if target is None:
       
  1122             target = TreeBuilder()
       
  1123         self._target = target
       
  1124         self._names = {} # name memo cache
       
  1125         # callbacks
       
  1126         parser.DefaultHandlerExpand = self._default
       
  1127         parser.StartElementHandler = self._start
       
  1128         parser.EndElementHandler = self._end
       
  1129         parser.CharacterDataHandler = self._data
       
  1130         # let expat do the buffering, if supported
       
  1131         try:
       
  1132             self._parser.buffer_text = 1
       
  1133         except AttributeError:
       
  1134             pass
       
  1135         # use new-style attribute handling, if supported
       
  1136         try:
       
  1137             self._parser.ordered_attributes = 1
       
  1138             self._parser.specified_attributes = 1
       
  1139             parser.StartElementHandler = self._start_list
       
  1140         except AttributeError:
       
  1141             pass
       
  1142         encoding = None
       
  1143         if not parser.returns_unicode:
       
  1144             encoding = "utf-8"
       
  1145         # target.xml(encoding, None)
       
  1146         self._doctype = None
       
  1147         self.entity = {}
       
  1148 
       
  1149     def _fixtext(self, text):
       
  1150         # convert text string to ascii, if possible
       
  1151         try:
       
  1152             return _encode(text, "ascii")
       
  1153         except UnicodeError:
       
  1154             return text
       
  1155 
       
  1156     def _fixname(self, key):
       
  1157         # expand qname, and convert name string to ascii, if possible
       
  1158         try:
       
  1159             name = self._names[key]
       
  1160         except KeyError:
       
  1161             name = key
       
  1162             if "}" in name:
       
  1163                 name = "{" + name
       
  1164             self._names[key] = name = self._fixtext(name)
       
  1165         return name
       
  1166 
       
  1167     def _start(self, tag, attrib_in):
       
  1168         fixname = self._fixname
       
  1169         tag = fixname(tag)
       
  1170         attrib = {}
       
  1171         for key, value in attrib_in.items():
       
  1172             attrib[fixname(key)] = self._fixtext(value)
       
  1173         return self._target.start(tag, attrib)
       
  1174 
       
  1175     def _start_list(self, tag, attrib_in):
       
  1176         fixname = self._fixname
       
  1177         tag = fixname(tag)
       
  1178         attrib = {}
       
  1179         if attrib_in:
       
  1180             for i in range(0, len(attrib_in), 2):
       
  1181                 attrib[fixname(attrib_in[i])] = self._fixtext(attrib_in[i+1])
       
  1182         return self._target.start(tag, attrib)
       
  1183 
       
  1184     def _data(self, text):
       
  1185         return self._target.data(self._fixtext(text))
       
  1186 
       
  1187     def _end(self, tag):
       
  1188         return self._target.end(self._fixname(tag))
       
  1189 
       
  1190     def _default(self, text):
       
  1191         prefix = text[:1]
       
  1192         if prefix == "&":
       
  1193             # deal with undefined entities
       
  1194             try:
       
  1195                 self._target.data(self.entity[text[1:-1]])
       
  1196             except KeyError:
       
  1197                 from xml.parsers import expat
       
  1198                 raise expat.error(
       
  1199                     "undefined entity %s: line %d, column %d" %
       
  1200                     (text, self._parser.ErrorLineNumber,
       
  1201                     self._parser.ErrorColumnNumber)
       
  1202                     )
       
  1203         elif prefix == "<" and text[:9] == "<!DOCTYPE":
       
  1204             self._doctype = [] # inside a doctype declaration
       
  1205         elif self._doctype is not None:
       
  1206             # parse doctype contents
       
  1207             if prefix == ">":
       
  1208                 self._doctype = None
       
  1209                 return
       
  1210             text = string.strip(text)
       
  1211             if not text:
       
  1212                 return
       
  1213             self._doctype.append(text)
       
  1214             n = len(self._doctype)
       
  1215             if n > 2:
       
  1216                 type = self._doctype[1]
       
  1217                 if type == "PUBLIC" and n == 4:
       
  1218                     name, type, pubid, system = self._doctype
       
  1219                 elif type == "SYSTEM" and n == 3:
       
  1220                     name, type, system = self._doctype
       
  1221                     pubid = None
       
  1222                 else:
       
  1223                     return
       
  1224                 if pubid:
       
  1225                     pubid = pubid[1:-1]
       
  1226                 self.doctype(name, pubid, system[1:-1])
       
  1227                 self._doctype = None
       
  1228 
       
  1229     ##
       
  1230     # Handles a doctype declaration.
       
  1231     #
       
  1232     # @param name Doctype name.
       
  1233     # @param pubid Public identifier.
       
  1234     # @param system System identifier.
       
  1235 
       
  1236     def doctype(self, name, pubid, system):
       
  1237         pass
       
  1238 
       
  1239     ##
       
  1240     # Feeds data to the parser.
       
  1241     #
       
  1242     # @param data Encoded data.
       
  1243 
       
  1244     def feed(self, data):
       
  1245         self._parser.Parse(data, 0)
       
  1246 
       
  1247     ##
       
  1248     # Finishes feeding data to the parser.
       
  1249     #
       
  1250     # @return An element structure.
       
  1251     # @defreturn Element
       
  1252 
       
  1253     def close(self):
       
  1254         self._parser.Parse("", 1) # end of data
       
  1255         tree = self._target.close()
       
  1256         del self._target, self._parser # get rid of circular references
       
  1257         return tree
       
  1258 
       
  1259 # compatibility
       
  1260 XMLParser = XMLTreeBuilder