| Home | Trees | Indices | Help | 
 | 
|---|
|  | 
   1  # 
   2  # ElementTree 
   3  # $Id: ElementTree.py 2326 2005-03-17 07:45:21Z fredrik $ 
   4  # 
   5  # light-weight XML support for Python 1.5.2 and later. 
   6  # 
   7  # history: 
   8  # 2001-10-20 fl   created (from various sources) 
   9  # 2001-11-01 fl   return root from parse method 
  10  # 2002-02-16 fl   sort attributes in lexical order 
  11  # 2002-04-06 fl   TreeBuilder refactoring, added PythonDoc markup 
  12  # 2002-05-01 fl   finished TreeBuilder refactoring 
  13  # 2002-07-14 fl   added basic namespace support to ElementTree.write 
  14  # 2002-07-25 fl   added QName attribute support 
  15  # 2002-10-20 fl   fixed encoding in write 
  16  # 2002-11-24 fl   changed default encoding to ascii; fixed attribute encoding 
  17  # 2002-11-27 fl   accept file objects or file names for parse/write 
  18  # 2002-12-04 fl   moved XMLTreeBuilder back to this module 
  19  # 2003-01-11 fl   fixed entity encoding glitch for us-ascii 
  20  # 2003-02-13 fl   added XML literal factory 
  21  # 2003-02-21 fl   added ProcessingInstruction/PI factory 
  22  # 2003-05-11 fl   added tostring/fromstring helpers 
  23  # 2003-05-26 fl   added ElementPath support 
  24  # 2003-07-05 fl   added makeelement factory method 
  25  # 2003-07-28 fl   added more well-known namespace prefixes 
  26  # 2003-08-15 fl   fixed typo in ElementTree.findtext (Thomas Dartsch) 
  27  # 2003-09-04 fl   fall back on emulator if ElementPath is not installed 
  28  # 2003-10-31 fl   markup updates 
  29  # 2003-11-15 fl   fixed nested namespace bug 
  30  # 2004-03-28 fl   added XMLID helper 
  31  # 2004-06-02 fl   added default support to findtext 
  32  # 2004-06-08 fl   fixed encoding of non-ascii element/attribute names 
  33  # 2004-08-23 fl   take advantage of post-2.1 expat features 
  34  # 2005-02-01 fl   added iterparse implementation 
  35  # 2005-03-02 fl   fixed iterparse support for pre-2.2 versions 
  36  # 
  37  # Copyright (c) 1999-2005 by Fredrik Lundh.  All rights reserved. 
  38  # 
  39  # fredrik@pythonware.com 
  40  # http://www.pythonware.com 
  41  # 
  42  # -------------------------------------------------------------------- 
  43  # The ElementTree toolkit is 
  44  # 
  45  # Copyright (c) 1999-2005 by Fredrik Lundh 
  46  # 
  47  # By obtaining, using, and/or copying this software and/or its 
  48  # associated documentation, you agree that you have read, understood, 
  49  # and will comply with the following terms and conditions: 
  50  # 
  51  # Permission to use, copy, modify, and distribute this software and 
  52  # its associated documentation for any purpose and without fee is 
  53  # hereby granted, provided that the above copyright notice appears in 
  54  # all copies, and that both that copyright notice and this permission 
  55  # notice appear in supporting documentation, and that the name of 
  56  # Secret Labs AB or the author not be used in advertising or publicity 
  57  # pertaining to distribution of the software without specific, written 
  58  # prior permission. 
  59  # 
  60  # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 
  61  # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 
  62  # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 
  63  # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 
  64  # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 
  65  # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 
  66  # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
  67  # OF THIS SOFTWARE. 
  68  # -------------------------------------------------------------------- 
  69   
  70  # Licensed to PSF under a Contributor Agreement. 
  71  # See http://www.python.org/2.4/license for licensing details. 
  72   
  73  __all__ = [ 
  74      # public symbols 
  75      "Comment", 
  76      "dump", 
  77      "Element", "ElementTree", 
  78      "fromstring", 
  79      "iselement", "iterparse", 
  80      "parse", 
  81      "PI", "ProcessingInstruction", 
  82      "QName", 
  83      "SubElement", 
  84      "tostring", 
  85      "TreeBuilder", 
  86      "VERSION", "XML", 
  87      "XMLParser", "XMLTreeBuilder", 
  88      ] 
  89   
  90  ## 
  91  # The <b>Element</b> type is a flexible container object, designed to 
  92  # store hierarchical data structures in memory. The type can be 
  93  # described as a cross between a list and a dictionary. 
  94  # <p> 
  95  # Each element has a number of properties associated with it: 
  96  # <ul> 
  97  # <li>a <i>tag</i>. This is a string identifying what kind of data 
  98  # this element represents (the element type, in other words).</li> 
  99  # <li>a number of <i>attributes</i>, stored in a Python dictionary.</li> 
 100  # <li>a <i>text</i> string.</li> 
 101  # <li>an optional <i>tail</i> string.</li> 
 102  # <li>a number of <i>child elements</i>, stored in a Python sequence</li> 
 103  # </ul> 
 104  # 
 105  # To create an element instance, use the {@link #Element} or {@link 
 106  # #SubElement} factory functions. 
 107  # <p> 
 108  # The {@link #ElementTree} class can be used to wrap an element 
 109  # structure, and convert it from and to XML. 
 110  ## 
 111   
 112  import string, sys, re 
 113   
 134   
 135  try: 
 136      import ElementPath 
 137  except ImportError: 
 138      # FIXME: issue warning in this case? 
 139      ElementPath = _SimpleElementPath() 
 140   
 141  # TODO: add support for custom namespace resolvers/default namespaces 
 142  # TODO: add improved support for incremental parsing 
 143   
 144  VERSION = "1.2.6" 
 145   
 146  ## 
 147  # Internal element class.  This class defines the Element interface, 
 148  # and provides a reference implementation of this interface. 
 149  # <p> 
 150  # You should not create instances of this class directly.  Use the 
 151  # appropriate factory functions instead, such as {@link #Element} 
 152  # and {@link #SubElement}. 
 153  # 
 154  # @see Element 
 155  # @see SubElement 
 156  # @see Comment 
 157  # @see ProcessingInstruction 
 158   
 160      # <tag attrib>text<child/>...</tag>tail 
 161   
 162      ## 
 163      # (Attribute) Element tag. 
 164   
 165      tag = None 
 166   
 167      ## 
 168      # (Attribute) Element attribute dictionary.  Where possible, use 
 169      # {@link #_ElementInterface.get}, 
 170      # {@link #_ElementInterface.set}, 
 171      # {@link #_ElementInterface.keys}, and 
 172      # {@link #_ElementInterface.items} to access 
 173      # element attributes. 
 174   
 175      attrib = None 
 176   
 177      ## 
 178      # (Attribute) Text before first subelement.  This is either a 
 179      # string or the value None, if there was no text. 
 180   
 181      text = None 
 182   
 183      ## 
 184      # (Attribute) Text after this element's end tag, but before the 
 185      # next sibling element's start tag.  This is either a string or 
 186      # the value None, if there was no text. 
 187   
 188      tail = None # text after end tag, if any 
 189   
 194   
 196          return "<Element %s at %x>" % (self.tag, id(self)) 
 197   
 198      ## 
 199      # Creates a new element object of the same type as this element. 
 200      # 
 201      # @param tag Element tag. 
 202      # @param attrib Element attributes, given as a dictionary. 
 203      # @return A new element instance. 
 204   
 207   
 208      ## 
 209      # Returns the number of subelements. 
 210      # 
 211      # @return The number of subelements. 
 212   
 215   
 216      ## 
 217      # Returns the given subelement. 
 218      # 
 219      # @param index What subelement to return. 
 220      # @return The given subelement. 
 221      # @exception IndexError If the given element does not exist. 
 222   
 225   
 226      ## 
 227      # Replaces the given subelement. 
 228      # 
 229      # @param index What subelement to replace. 
 230      # @param element The new element value. 
 231      # @exception IndexError If the given element does not exist. 
 232      # @exception AssertionError If element is not a valid object. 
 233   
 237   
 238      ## 
 239      # Deletes the given subelement. 
 240      # 
 241      # @param index What subelement to delete. 
 242      # @exception IndexError If the given element does not exist. 
 243   
 246   
 247      ## 
 248      # Returns a list containing subelements in the given range. 
 249      # 
 250      # @param start The first subelement to return. 
 251      # @param stop The first subelement that shouldn't be returned. 
 252      # @return A sequence object containing subelements. 
 253   
 256   
 257      ## 
 258      # Replaces a number of subelements with elements from a sequence. 
 259      # 
 260      # @param start The first subelement to replace. 
 261      # @param stop The first subelement that shouldn't be replaced. 
 262      # @param elements A sequence object with zero or more elements. 
 263      # @exception AssertionError If a sequence member is not a valid object. 
 264   
 266          for element in elements: 
 267              assert iselement(element) 
 268          self._children[start:stop] = list(elements) 
 269   
 270      ## 
 271      # Deletes a number of subelements. 
 272      # 
 273      # @param start The first subelement to delete. 
 274      # @param stop The first subelement to leave in there. 
 275   
 278   
 279      ## 
 280      # Adds a subelement to the end of this element. 
 281      # 
 282      # @param element The element to add. 
 283      # @exception AssertionError If a sequence member is not a valid object. 
 284   
 288   
 289      ## 
 290      # Inserts a subelement at the given position in this element. 
 291      # 
 292      # @param index Where to insert the new subelement. 
 293      # @exception AssertionError If the element is not a valid object. 
 294   
 298   
 299      ## 
 300      # Removes a matching subelement.  Unlike the <b>find</b> methods, 
 301      # this method compares elements based on identity, not on tag 
 302      # value or contents. 
 303      # 
 304      # @param element What element to remove. 
 305      # @exception ValueError If a matching element could not be found. 
 306      # @exception AssertionError If the element is not a valid object. 
 307   
 311   
 312      ## 
 313      # Returns all subelements.  The elements are returned in document 
 314      # order. 
 315      # 
 316      # @return A list of subelements. 
 317      # @defreturn list of Element instances 
 318   
 321   
 322      ## 
 323      # Finds the first matching subelement, by tag name or path. 
 324      # 
 325      # @param path What element to look for. 
 326      # @return The first matching element, or None if no element was found. 
 327      # @defreturn Element or None 
 328   
 330          return ElementPath.find(self, path) 
 331   
 332      ## 
 333      # Finds text for the first matching subelement, by tag name or path. 
 334      # 
 335      # @param path What element to look for. 
 336      # @param default What to return if the element was not found. 
 337      # @return The text content of the first matching element, or the 
 338      #     default value no element was found.  Note that if the element 
 339      #     has is found, but has no text content, this method returns an 
 340      #     empty string. 
 341      # @defreturn string 
 342   
 344          return ElementPath.findtext(self, path, default) 
 345   
 346      ## 
 347      # Finds all matching subelements, by tag name or path. 
 348      # 
 349      # @param path What element to look for. 
 350      # @return A list or iterator containing all matching elements, 
 351      #    in document order. 
 352      # @defreturn list of Element instances 
 353   
 355          return ElementPath.findall(self, path) 
 356   
 357      ## 
 358      # Resets an element.  This function removes all subelements, clears 
 359      # all attributes, and sets the text and tail attributes to None. 
 360   
 365   
 366      ## 
 367      # Gets an element attribute. 
 368      # 
 369      # @param key What attribute to look for. 
 370      # @param default What to return if the attribute was not found. 
 371      # @return The attribute value, or the default value, if the 
 372      #     attribute was not found. 
 373      # @defreturn string or None 
 374   
 376          return self.attrib.get(key, default) 
 377   
 378      ## 
 379      # Sets an element attribute. 
 380      # 
 381      # @param key What attribute to set. 
 382      # @param value The attribute value. 
 383   
 385          self.attrib[key] = value 
 386   
 387      ## 
 388      # Gets a list of attribute names.  The names are returned in an 
 389      # arbitrary order (just like for an ordinary Python dictionary). 
 390      # 
 391      # @return A list of element attribute names. 
 392      # @defreturn list of strings 
 393   
 395          return self.attrib.keys() 
 396   
 397      ## 
 398      # Gets element attributes, as a sequence.  The attributes are 
 399      # returned in an arbitrary order. 
 400      # 
 401      # @return A list of (name, value) tuples for all attributes. 
 402      # @defreturn list of (string, string) tuples 
 403   
 405          return self.attrib.items() 
 406   
 407      ## 
 408      # Creates a tree iterator.  The iterator loops over this element 
 409      # and all subelements, in document order, and returns all elements 
 410      # with a matching tag. 
 411      # <p> 
 412      # If the tree structure is modified during iteration, the result 
 413      # is undefined. 
 414      # 
 415      # @param tag What tags to look for (default is to return all elements). 
 416      # @return A list or iterator containing all the matching elements. 
 417      # @defreturn list or iterator 
 418   
 428   
 429  # compatibility 
 430  _Element = _ElementInterface 
 431   
 432  ## 
 433  # Element factory.  This function returns an object implementing the 
 434  # standard Element interface.  The exact class or type of that object 
 435  # is implementation dependent, but it will always be compatible with 
 436  # the {@link #_ElementInterface} class in this module. 
 437  # <p> 
 438  # The element name, attribute names, and attribute values can be 
 439  # either 8-bit ASCII strings or Unicode strings. 
 440  # 
 441  # @param tag The element name. 
 442  # @param attrib An optional dictionary, containing element attributes. 
 443  # @param **extra Additional attributes, given as keyword arguments. 
 444  # @return An element instance. 
 445  # @defreturn Element 
 446   
 451   
 452  ## 
 453  # Subelement factory.  This function creates an element instance, and 
 454  # appends it to an existing element. 
 455  # <p> 
 456  # The element name, attribute names, and attribute values can be 
 457  # either 8-bit ASCII strings or Unicode strings. 
 458  # 
 459  # @param parent The parent element. 
 460  # @param tag The subelement name. 
 461  # @param attrib An optional dictionary, containing element attributes. 
 462  # @param **extra Additional attributes, given as keyword arguments. 
 463  # @return An element instance. 
 464  # @defreturn Element 
 465   
 467      attrib = attrib.copy() 
 468      attrib.update(extra) 
 469      element = parent.makeelement(tag, attrib) 
 470      parent.append(element) 
 471      return element 
 472   
 473  ## 
 474  # Comment element factory.  This factory function creates a special 
 475  # element that will be serialized as an XML comment. 
 476  # <p> 
 477  # The comment string can be either an 8-bit ASCII string or a Unicode 
 478  # string. 
 479  # 
 480  # @param text A string containing the comment string. 
 481  # @return An element instance, representing a comment. 
 482  # @defreturn Element 
 483   
 488   
 489  ## 
 490  # PI element factory.  This factory function creates a special element 
 491  # that will be serialized as an XML processing instruction. 
 492  # 
 493  # @param target A string containing the PI target. 
 494  # @param text A string containing the PI contents, if any. 
 495  # @return An element instance, representing a PI. 
 496  # @defreturn Element 
 497   
 499      element = Element(ProcessingInstruction) 
 500      element.text = target 
 501      if text: 
 502          element.text = element.text + " " + text 
 503      return element 
 504   
 505  PI = ProcessingInstruction 
 506   
 507  ## 
 508  # QName wrapper.  This can be used to wrap a QName attribute value, in 
 509  # order to get proper namespace handling on output. 
 510  # 
 511  # @param text A string containing the QName value, in the form {uri}local, 
 512  #     or, if the tag argument is given, the URI part of a QName. 
 513  # @param tag Optional tag.  If given, the first argument is interpreted as 
 514  #     an URI, and this argument is interpreted as a local name. 
 515  # @return An opaque object, representing the QName. 
 516   
 530   
 531  ## 
 532  # ElementTree wrapper class.  This class represents an entire element 
 533  # hierarchy, and adds some extra support for serialization to and from 
 534  # standard XML. 
 535  # 
 536  # @param element Optional root element. 
 537  # @keyparam file Optional file handle or name.  If given, the 
 538  #     tree is initialized with the contents of this XML file. 
 539   
 541   
 543          assert element is None or iselement(element) 
 544          self._root = element # first node 
 545          if file: 
 546              self.parse(file) 
 547   
 548      ## 
 549      # Gets the root element for this tree. 
 550      # 
 551      # @return An element instance. 
 552      # @defreturn Element 
 553   
 556   
 557      ## 
 558      # Replaces the root element for this tree.  This discards the 
 559      # current contents of the tree, and replaces it with the given 
 560      # element.  Use with care. 
 561      # 
 562      # @param element An element instance. 
 563   
 567   
 568      ## 
 569      # Loads an external XML document into this element tree. 
 570      # 
 571      # @param source A file name or file object. 
 572      # @param parser An optional parser instance.  If not given, the 
 573      #     standard {@link XMLTreeBuilder} parser is used. 
 574      # @return The document root element. 
 575      # @defreturn Element 
 576   
 578          if not hasattr(source, "read"): 
 579              source = open(source, "rb") 
 580          if not parser: 
 581              parser = XMLTreeBuilder() 
 582          while 1: 
 583              data = source.read(32768) 
 584              if not data: 
 585                  break 
 586              parser.feed(data) 
 587          self._root = parser.close() 
 588          return self._root 
 589   
 590      ## 
 591      # Creates a tree iterator for the root element.  The iterator loops 
 592      # over all elements in this tree, in document order. 
 593      # 
 594      # @param tag What tags to look for (default is to return all elements) 
 595      # @return An iterator. 
 596      # @defreturn iterator 
 597   
 601   
 602      ## 
 603      # Finds the first toplevel element with given tag. 
 604      # Same as getroot().find(path). 
 605      # 
 606      # @param path What element to look for. 
 607      # @return The first matching element, or None if no element was found. 
 608      # @defreturn Element or None 
 609   
 611          assert self._root is not None 
 612          if path[:1] == "/": 
 613              path = "." + path 
 614          return self._root.find(path) 
 615   
 616      ## 
 617      # Finds the element text for the first toplevel element with given 
 618      # tag.  Same as getroot().findtext(path). 
 619      # 
 620      # @param path What toplevel element to look for. 
 621      # @param default What to return if the element was not found. 
 622      # @return The text content of the first matching element, or the 
 623      #     default value no element was found.  Note that if the element 
 624      #     has is found, but has no text content, this method returns an 
 625      #     empty string. 
 626      # @defreturn string 
 627   
 629          assert self._root is not None 
 630          if path[:1] == "/": 
 631              path = "." + path 
 632          return self._root.findtext(path, default) 
 633   
 634      ## 
 635      # Finds all toplevel elements with the given tag. 
 636      # Same as getroot().findall(path). 
 637      # 
 638      # @param path What element to look for. 
 639      # @return A list or iterator containing all matching elements, 
 640      #    in document order. 
 641      # @defreturn list of Element instances 
 642   
 644          assert self._root is not None 
 645          if path[:1] == "/": 
 646              path = "." + path 
 647          return self._root.findall(path) 
 648   
 649      ## 
 650      # Writes the element tree to a file, as XML. 
 651      # 
 652      # @param file A file name, or a file object opened for writing. 
 653      # @param encoding Optional output encoding (default is US-ASCII). 
 654   
 656          assert self._root is not None 
 657          if not hasattr(file, "write"): 
 658              file = open(file, "wb") 
 659          if not encoding: 
 660              encoding = "us-ascii" 
 661          elif encoding != "utf-8" and encoding != "us-ascii": 
 662              file.write("<?xml version='1.0' encoding='%s'?>\n" % encoding) 
 663          self._write(file, self._root, encoding, {}) 
 664   
 666          # write XML to file 
 667          tag = node.tag 
 668          if tag is Comment: 
 669              file.write("<!-- %s -->" % _escape_cdata(node.text, encoding)) 
 670          elif tag is ProcessingInstruction: 
 671              file.write("<?%s?>" % _escape_cdata(node.text, encoding)) 
 672          else: 
 673              items = node.items() 
 674              xmlns_items = [] # new namespaces in this scope 
 675              try: 
 676                  if isinstance(tag, QName) or tag[:1] == "{": 
 677                      tag, xmlns = fixtag(tag, namespaces) 
 678                      if xmlns: xmlns_items.append(xmlns) 
 679              except TypeError: 
 680                  _raise_serialization_error(tag) 
 681              file.write("<" + _encode(tag, encoding)) 
 682              if items or xmlns_items: 
 683                  items.sort() # lexical order 
 684                  for k, v in items: 
 685                      try: 
 686                          if isinstance(k, QName) or k[:1] == "{": 
 687                              k, xmlns = fixtag(k, namespaces) 
 688                              if xmlns: xmlns_items.append(xmlns) 
 689                      except TypeError: 
 690                          _raise_serialization_error(k) 
 691                      try: 
 692                          if isinstance(v, QName): 
 693                              v, xmlns = fixtag(v, namespaces) 
 694                              if xmlns: xmlns_items.append(xmlns) 
 695                      except TypeError: 
 696                          _raise_serialization_error(v) 
 697                      file.write(" %s=\"%s\"" % (_encode(k, encoding), 
 698                                                 _escape_attrib(v, encoding))) 
 699                  for k, v in xmlns_items: 
 700                      file.write(" %s=\"%s\"" % (_encode(k, encoding), 
 701                                                 _escape_attrib(v, encoding))) 
 702              if node.text or len(node): 
 703                  file.write(">") 
 704                  if node.text: 
 705                      file.write(_escape_cdata(node.text, encoding)) 
 706                  for n in node: 
 707                      self._write(file, n, encoding, namespaces) 
 708                  file.write("</" + _encode(tag, encoding) + ">") 
 709              else: 
 710                  file.write(" />") 
 711              for k, v in xmlns_items: 
 712                  del namespaces[v] 
 713          if node.tail: 
 714              file.write(_escape_cdata(node.tail, encoding)) 
 715   
 716  # -------------------------------------------------------------------- 
 717  # helpers 
 718   
 719  ## 
 720  # Checks if an object appears to be a valid element object. 
 721  # 
 722  # @param An element instance. 
 723  # @return A true value if this is an element object. 
 724  # @defreturn flag 
 725   
 727      # FIXME: not sure about this; might be a better idea to look 
 728      # for tag/attrib/text attributes 
 729      return isinstance(element, _ElementInterface) or hasattr(element, "tag") 
 730   
 731  ## 
 732  # Writes an element tree or element structure to sys.stdout.  This 
 733  # function should be used for debugging only. 
 734  # <p> 
 735  # The exact output format is implementation dependent.  In this 
 736  # version, it's written as an ordinary XML file. 
 737  # 
 738  # @param elem An element tree or an individual element. 
 739   
 741      # debugging 
 742      if not isinstance(elem, ElementTree): 
 743          elem = ElementTree(elem) 
 744      elem.write(sys.stdout) 
 745      tail = elem.getroot().tail 
 746      if not tail or tail[-1] != "\n": 
 747          sys.stdout.write("\n") 
 748   
 750      try: 
 751          return s.encode(encoding) 
 752      except AttributeError: 
 753          return s # 1.5.2: assume the string uses the right encoding 
 754   
 755  if sys.version[:3] == "1.5": 
 756      _escape = re.compile(r"[&<>\"\x80-\xff]+") # 1.5.2 
 757  else: 
 758      _escape = re.compile(eval(r'u"[&<>\"\u0080-\uffff]+"')) 
 759   
 760  _escape_map = { 
 761      "&": "&", 
 762      "<": "<", 
 763      ">": ">", 
 764      '"': """, 
 765  } 
 766   
 767  _namespace_map = { 
 768      # "well-known" namespace prefixes 
 769      "http://www.w3.org/XML/1998/namespace": "xml", 
 770      "http://www.w3.org/1999/xhtml": "html", 
 771      "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", 
 772      "http://schemas.xmlsoap.org/wsdl/": "wsdl", 
 773  } 
 774   
 779   
 781      # map reserved and non-ascii characters to numerical entities 
 782      def escape_entities(m, map=_escape_map): 
 783          out = [] 
 784          append = out.append 
 785          for char in m.group(): 
 786              text = map.get(char) 
 787              if text is None: 
 788                  text = "&#%d;" % ord(char) 
 789              append(text) 
 790          return string.join(out, "") 
 791      try: 
 792          return _encode(pattern.sub(escape_entities, text), "ascii") 
 793      except TypeError: 
 794          _raise_serialization_error(text) 
 795   
 796  # 
 797  # the following functions assume an ascii-compatible encoding 
 798  # (or "utf-16") 
 799   
 801      # escape character data 
 802      try: 
 803          if encoding: 
 804              try: 
 805                  text = _encode(text, encoding) 
 806              except UnicodeError: 
 807                  return _encode_entity(text) 
 808          text = replace(text, "&", "&") 
 809          text = replace(text, "<", "<") 
 810          text = replace(text, ">", ">") 
 811          return text 
 812      except (TypeError, AttributeError): 
 813          _raise_serialization_error(text) 
 814   
 816      # escape attribute value 
 817      try: 
 818          if encoding: 
 819              try: 
 820                  text = _encode(text, encoding) 
 821              except UnicodeError: 
 822                  return _encode_entity(text) 
 823          text = replace(text, "&", "&") 
 824          text = replace(text, "'", "'") # FIXME: overkill 
 825          text = replace(text, "\"", """) 
 826          text = replace(text, "<", "<") 
 827          text = replace(text, ">", ">") 
 828          return text 
 829      except (TypeError, AttributeError): 
 830          _raise_serialization_error(text) 
 831   
 833      # given a decorated tag (of the form {uri}tag), return prefixed 
 834      # tag and namespace declaration, if any 
 835      if isinstance(tag, QName): 
 836          tag = tag.text 
 837      namespace_uri, tag = string.split(tag[1:], "}", 1) 
 838      prefix = namespaces.get(namespace_uri) 
 839      if prefix is None: 
 840          prefix = _namespace_map.get(namespace_uri) 
 841          if prefix is None: 
 842              prefix = "ns%d" % len(namespaces) 
 843          namespaces[namespace_uri] = prefix 
 844          if prefix == "xml": 
 845              xmlns = None 
 846          else: 
 847              xmlns = ("xmlns:%s" % prefix, namespace_uri) 
 848      else: 
 849          xmlns = None 
 850      return "%s:%s" % (prefix, tag), xmlns 
 851   
 852  ## 
 853  # Parses an XML document into an element tree. 
 854  # 
 855  # @param source A filename or file object containing XML data. 
 856  # @param parser An optional parser instance.  If not given, the 
 857  #     standard {@link XMLTreeBuilder} parser is used. 
 858  # @return An ElementTree instance 
 859   
 864   
 865  ## 
 866  # Parses an XML document into an element tree incrementally, and reports 
 867  # what's going on to the user. 
 868  # 
 869  # @param source A filename or file object containing XML data. 
 870  # @param events A list of events to report back.  If omitted, only "end" 
 871  #     events are reported. 
 872  # @return A (event, elem) iterator. 
 873   
 875   
 877          if not hasattr(source, "read"): 
 878              source = open(source, "rb") 
 879          self._file = source 
 880          self._events = [] 
 881          self._index = 0 
 882          self.root = self._root = None 
 883          self._parser = XMLTreeBuilder() 
 884          # wire up the parser for event reporting 
 885          parser = self._parser._parser 
 886          append = self._events.append 
 887          if events is None: 
 888              events = ["end"] 
 889          for event in events: 
 890              if event == "start": 
 891                  try: 
 892                      parser.ordered_attributes = 1 
 893                      parser.specified_attributes = 1 
 894                      def handler(tag, attrib_in, event=event, append=append, 
 895                                  start=self._parser._start_list): 
 896                          append((event, start(tag, attrib_in))) 
 897                      parser.StartElementHandler = handler 
 898                  except AttributeError: 
 899                      def handler(tag, attrib_in, event=event, append=append, 
 900                                  start=self._parser._start): 
 901                          append((event, start(tag, attrib_in))) 
 902                      parser.StartElementHandler = handler 
 903              elif event == "end": 
 904                  def handler(tag, event=event, append=append, 
 905                              end=self._parser._end): 
 906                      append((event, end(tag))) 
 907                  parser.EndElementHandler = handler 
 908              elif event == "start-ns": 
 909                  def handler(prefix, uri, event=event, append=append): 
 910                      try: 
 911                          uri = _encode(uri, "ascii") 
 912                      except UnicodeError: 
 913                          pass 
 914                      append((event, (prefix or "", uri))) 
 915                  parser.StartNamespaceDeclHandler = handler 
 916              elif event == "end-ns": 
 917                  def handler(prefix, event=event, append=append): 
 918                      append((event, None)) 
 919                  parser.EndNamespaceDeclHandler = handler 
 920   
 922          while 1: 
 923              try: 
 924                  item = self._events[self._index] 
 925              except IndexError: 
 926                  if self._parser is None: 
 927                      self.root = self._root 
 928                      try: 
 929                          raise StopIteration 
 930                      except NameError: 
 931                          raise IndexError 
 932                  # load event buffer 
 933                  del self._events[:] 
 934                  self._index = 0 
 935                  data = self._file.read(16384) 
 936                  if data: 
 937                      self._parser.feed(data) 
 938                  else: 
 939                      self._root = self._parser.close() 
 940                      self._parser = None 
 941              else: 
 942                  self._index = self._index + 1 
 943                  return item 
 944   
 945      try: 
 946          iter 
 949      except NameError: 
 952   
 953  ## 
 954  # Parses an XML document from a string constant.  This function can 
 955  # be used to embed "XML literals" in Python code. 
 956  # 
 957  # @param source A string containing XML data. 
 958  # @return An Element instance. 
 959  # @defreturn Element 
 960   
 965   
 966  ## 
 967  # Parses an XML document from a string constant, and also returns 
 968  # a dictionary which maps from element id:s to elements. 
 969  # 
 970  # @param source A string containing XML data. 
 971  # @return A tuple containing an Element instance and a dictionary. 
 972  # @defreturn (Element, dictionary) 
 973   
 975      parser = XMLTreeBuilder() 
 976      parser.feed(text) 
 977      tree = parser.close() 
 978      ids = {} 
 979      for elem in tree.getiterator(): 
 980          id = elem.get("id") 
 981          if id: 
 982              ids[id] = elem 
 983      return tree, ids 
 984   
 985  ## 
 986  # Parses an XML document from a string constant.  Same as {@link #XML}. 
 987  # 
 988  # @def fromstring(text) 
 989  # @param source A string containing XML data. 
 990  # @return An Element instance. 
 991  # @defreturn Element 
 992   
 993  fromstring = XML 
 994   
 995  ## 
 996  # Generates a string representation of an XML element, including all 
 997  # subelements. 
 998  # 
 999  # @param element An Element instance. 
1000  # @return An encoded string containing the XML data. 
1001  # @defreturn string 
1002   
1006      data = [] 
1007      file = dummy() 
1008      file.write = data.append 
1009      ElementTree(element).write(file, encoding) 
1010      return string.join(data, "") 
1011   
1012  ## 
1013  # Generic element structure builder.  This builder converts a sequence 
1014  # of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link 
1015  # #TreeBuilder.end} method calls to a well-formed element structure. 
1016  # <p> 
1017  # You can use this class to build an element structure using a custom XML 
1018  # parser, or a parser for some other XML-like format. 
1019  # 
1020  # @param element_factory Optional element factory.  This factory 
1021  #    is called to create new Element instances, as necessary. 
1022   
1024   
1026          self._data = [] # data collector 
1027          self._elem = [] # element stack 
1028          self._last = None # last element 
1029          self._tail = None # true if we're after an end tag 
1030          if element_factory is None: 
1031              element_factory = _ElementInterface 
1032          self._factory = element_factory 
1033   
1034      ## 
1035      # Flushes the parser buffers, and returns the toplevel documen 
1036      # element. 
1037      # 
1038      # @return An Element instance. 
1039      # @defreturn Element 
1040   
1042          assert len(self._elem) == 0, "missing end tags" 
1043          assert self._last != None, "missing toplevel element" 
1044          return self._last 
1045   
1047          if self._data: 
1048              if self._last is not None: 
1049                  text = string.join(self._data, "") 
1050                  if self._tail: 
1051                      assert self._last.tail is None, "internal error (tail)" 
1052                      self._last.tail = text 
1053                  else: 
1054                      assert self._last.text is None, "internal error (text)" 
1055                      self._last.text = text 
1056              self._data = [] 
1057   
1058      ## 
1059      # Adds text to the current element. 
1060      # 
1061      # @param data A string.  This should be either an 8-bit string 
1062      #    containing ASCII text, or a Unicode string. 
1063   
1066   
1067      ## 
1068      # Opens a new element. 
1069      # 
1070      # @param tag The element name. 
1071      # @param attrib A dictionary containing element attributes. 
1072      # @return The opened element. 
1073      # @defreturn Element 
1074   
1076          self._flush() 
1077          self._last = elem = self._factory(tag, attrs) 
1078          if self._elem: 
1079              self._elem[-1].append(elem) 
1080          self._elem.append(elem) 
1081          self._tail = 0 
1082          return elem 
1083   
1084      ## 
1085      # Closes the current element. 
1086      # 
1087      # @param tag The element name. 
1088      # @return The closed element. 
1089      # @defreturn Element 
1090   
1099   
1100  ## 
1101  # Element structure builder for XML source data, based on the 
1102  # <b>expat</b> parser. 
1103  # 
1104  # @keyparam target Target object.  If omitted, the builder uses an 
1105  #     instance of the standard {@link #TreeBuilder} class. 
1106  # @keyparam html Predefine HTML entities.  This flag is not supported 
1107  #     by the current implementation. 
1108  # @see #ElementTree 
1109  # @see #TreeBuilder 
1110   
1112   
1114          try: 
1115              from xml.parsers import expat 
1116          except ImportError: 
1117              raise ImportError( 
1118                  "No module named expat; use SimpleXMLTreeBuilder instead" 
1119                  ) 
1120          self._parser = parser = expat.ParserCreate(None, "}") 
1121          if target is None: 
1122              target = TreeBuilder() 
1123          self._target = target 
1124          self._names = {} # name memo cache 
1125          # callbacks 
1126          parser.DefaultHandlerExpand = self._default 
1127          parser.StartElementHandler = self._start 
1128          parser.EndElementHandler = self._end 
1129          parser.CharacterDataHandler = self._data 
1130          # let expat do the buffering, if supported 
1131          try: 
1132              self._parser.buffer_text = 1 
1133          except AttributeError: 
1134              pass 
1135          # use new-style attribute handling, if supported 
1136          try: 
1137              self._parser.ordered_attributes = 1 
1138              self._parser.specified_attributes = 1 
1139              parser.StartElementHandler = self._start_list 
1140          except AttributeError: 
1141              pass 
1142          encoding = None 
1143          if not parser.returns_unicode: 
1144              encoding = "utf-8" 
1145          # target.xml(encoding, None) 
1146          self._doctype = None 
1147          self.entity = {} 
1148   
1150          # convert text string to ascii, if possible 
1151          try: 
1152              return _encode(text, "ascii") 
1153          except UnicodeError: 
1154              return text 
1155   
1157          # expand qname, and convert name string to ascii, if possible 
1158          try: 
1159              name = self._names[key] 
1160          except KeyError: 
1161              name = key 
1162              if "}" in name: 
1163                  name = "{" + name 
1164              self._names[key] = name = self._fixtext(name) 
1165          return name 
1166   
1168          fixname = self._fixname 
1169          tag = fixname(tag) 
1170          attrib = {} 
1171          for key, value in attrib_in.items(): 
1172              attrib[fixname(key)] = self._fixtext(value) 
1173          return self._target.start(tag, attrib) 
1174   
1176          fixname = self._fixname 
1177          tag = fixname(tag) 
1178          attrib = {} 
1179          if attrib_in: 
1180              for i in range(0, len(attrib_in), 2): 
1181                  attrib[fixname(attrib_in[i])] = self._fixtext(attrib_in[i+1]) 
1182          return self._target.start(tag, attrib) 
1183   
1185          return self._target.data(self._fixtext(text)) 
1186   
1188          return self._target.end(self._fixname(tag)) 
1189   
1191          prefix = text[:1] 
1192          if prefix == "&": 
1193              # deal with undefined entities 
1194              try: 
1195                  self._target.data(self.entity[text[1:-1]]) 
1196              except KeyError: 
1197                  from xml.parsers import expat 
1198                  raise expat.error( 
1199                      "undefined entity %s: line %d, column %d" % 
1200                      (text, self._parser.ErrorLineNumber, 
1201                      self._parser.ErrorColumnNumber) 
1202                      ) 
1203          elif prefix == "<" and text[:9] == "<!DOCTYPE": 
1204              self._doctype = [] # inside a doctype declaration 
1205          elif self._doctype is not None: 
1206              # parse doctype contents 
1207              if prefix == ">": 
1208                  self._doctype = None 
1209                  return 
1210              text = string.strip(text) 
1211              if not text: 
1212                  return 
1213              self._doctype.append(text) 
1214              n = len(self._doctype) 
1215              if n > 2: 
1216                  type = self._doctype[1] 
1217                  if type == "PUBLIC" and n == 4: 
1218                      name, type, pubid, system = self._doctype 
1219                  elif type == "SYSTEM" and n == 3: 
1220                      name, type, system = self._doctype 
1221                      pubid = None 
1222                  else: 
1223                      return 
1224                  if pubid: 
1225                      pubid = pubid[1:-1] 
1226                  self.doctype(name, pubid, system[1:-1]) 
1227                  self._doctype = None 
1228   
1229      ## 
1230      # Handles a doctype declaration. 
1231      # 
1232      # @param name Doctype name. 
1233      # @param pubid Public identifier. 
1234      # @param system System identifier. 
1235   
1238   
1239      ## 
1240      # Feeds data to the parser. 
1241      # 
1242      # @param data Encoded data. 
1243   
1246   
1247      ## 
1248      # Finishes feeding data to the parser. 
1249      # 
1250      # @return An element structure. 
1251      # @defreturn Element 
1252   
1258   
1259  # compatibility 
1260  XMLParser = XMLTreeBuilder 
1261   
| Home | Trees | Indices | Help | 
 | 
|---|
| Generated by Epydoc 3.0beta1 on Mon Oct 29 21:00:08 2007 | http://epydoc.sourceforge.net |