| Home | Trees | Indices | Help |
|
|---|
|
|
1 #
2 # ElementTree
3 # $Id: ElementTree.py 2326 2005-03-17 07:45:21Z fredrik $
4 #
5 # light-weight XML support for Python 1.5.2 and later.
6 #
7 # history:
8 # 2001-10-20 fl created (from various sources)
9 # 2001-11-01 fl return root from parse method
10 # 2002-02-16 fl sort attributes in lexical order
11 # 2002-04-06 fl TreeBuilder refactoring, added PythonDoc markup
12 # 2002-05-01 fl finished TreeBuilder refactoring
13 # 2002-07-14 fl added basic namespace support to ElementTree.write
14 # 2002-07-25 fl added QName attribute support
15 # 2002-10-20 fl fixed encoding in write
16 # 2002-11-24 fl changed default encoding to ascii; fixed attribute encoding
17 # 2002-11-27 fl accept file objects or file names for parse/write
18 # 2002-12-04 fl moved XMLTreeBuilder back to this module
19 # 2003-01-11 fl fixed entity encoding glitch for us-ascii
20 # 2003-02-13 fl added XML literal factory
21 # 2003-02-21 fl added ProcessingInstruction/PI factory
22 # 2003-05-11 fl added tostring/fromstring helpers
23 # 2003-05-26 fl added ElementPath support
24 # 2003-07-05 fl added makeelement factory method
25 # 2003-07-28 fl added more well-known namespace prefixes
26 # 2003-08-15 fl fixed typo in ElementTree.findtext (Thomas Dartsch)
27 # 2003-09-04 fl fall back on emulator if ElementPath is not installed
28 # 2003-10-31 fl markup updates
29 # 2003-11-15 fl fixed nested namespace bug
30 # 2004-03-28 fl added XMLID helper
31 # 2004-06-02 fl added default support to findtext
32 # 2004-06-08 fl fixed encoding of non-ascii element/attribute names
33 # 2004-08-23 fl take advantage of post-2.1 expat features
34 # 2005-02-01 fl added iterparse implementation
35 # 2005-03-02 fl fixed iterparse support for pre-2.2 versions
36 #
37 # Copyright (c) 1999-2005 by Fredrik Lundh. All rights reserved.
38 #
39 # fredrik@pythonware.com
40 # http://www.pythonware.com
41 #
42 # --------------------------------------------------------------------
43 # The ElementTree toolkit is
44 #
45 # Copyright (c) 1999-2005 by Fredrik Lundh
46 #
47 # By obtaining, using, and/or copying this software and/or its
48 # associated documentation, you agree that you have read, understood,
49 # and will comply with the following terms and conditions:
50 #
51 # Permission to use, copy, modify, and distribute this software and
52 # its associated documentation for any purpose and without fee is
53 # hereby granted, provided that the above copyright notice appears in
54 # all copies, and that both that copyright notice and this permission
55 # notice appear in supporting documentation, and that the name of
56 # Secret Labs AB or the author not be used in advertising or publicity
57 # pertaining to distribution of the software without specific, written
58 # prior permission.
59 #
60 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
61 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
62 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
63 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
64 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
65 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
66 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
67 # OF THIS SOFTWARE.
68 # --------------------------------------------------------------------
69
70 # Licensed to PSF under a Contributor Agreement.
71 # See http://www.python.org/2.4/license for licensing details.
72
73 __all__ = [
74 # public symbols
75 "Comment",
76 "dump",
77 "Element", "ElementTree",
78 "fromstring",
79 "iselement", "iterparse",
80 "parse",
81 "PI", "ProcessingInstruction",
82 "QName",
83 "SubElement",
84 "tostring",
85 "TreeBuilder",
86 "VERSION", "XML",
87 "XMLParser", "XMLTreeBuilder",
88 ]
89
90 ##
91 # The <b>Element</b> type is a flexible container object, designed to
92 # store hierarchical data structures in memory. The type can be
93 # described as a cross between a list and a dictionary.
94 # <p>
95 # Each element has a number of properties associated with it:
96 # <ul>
97 # <li>a <i>tag</i>. This is a string identifying what kind of data
98 # this element represents (the element type, in other words).</li>
99 # <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
100 # <li>a <i>text</i> string.</li>
101 # <li>an optional <i>tail</i> string.</li>
102 # <li>a number of <i>child elements</i>, stored in a Python sequence</li>
103 # </ul>
104 #
105 # To create an element instance, use the {@link #Element} or {@link
106 # #SubElement} factory functions.
107 # <p>
108 # The {@link #ElementTree} class can be used to wrap an element
109 # structure, and convert it from and to XML.
110 ##
111
112 import string, sys, re
113
134
135 try:
136 import ElementPath
137 except ImportError:
138 # FIXME: issue warning in this case?
139 ElementPath = _SimpleElementPath()
140
141 # TODO: add support for custom namespace resolvers/default namespaces
142 # TODO: add improved support for incremental parsing
143
144 VERSION = "1.2.6"
145
146 ##
147 # Internal element class. This class defines the Element interface,
148 # and provides a reference implementation of this interface.
149 # <p>
150 # You should not create instances of this class directly. Use the
151 # appropriate factory functions instead, such as {@link #Element}
152 # and {@link #SubElement}.
153 #
154 # @see Element
155 # @see SubElement
156 # @see Comment
157 # @see ProcessingInstruction
158
160 # <tag attrib>text<child/>...</tag>tail
161
162 ##
163 # (Attribute) Element tag.
164
165 tag = None
166
167 ##
168 # (Attribute) Element attribute dictionary. Where possible, use
169 # {@link #_ElementInterface.get},
170 # {@link #_ElementInterface.set},
171 # {@link #_ElementInterface.keys}, and
172 # {@link #_ElementInterface.items} to access
173 # element attributes.
174
175 attrib = None
176
177 ##
178 # (Attribute) Text before first subelement. This is either a
179 # string or the value None, if there was no text.
180
181 text = None
182
183 ##
184 # (Attribute) Text after this element's end tag, but before the
185 # next sibling element's start tag. This is either a string or
186 # the value None, if there was no text.
187
188 tail = None # text after end tag, if any
189
194
196 return "<Element %s at %x>" % (self.tag, id(self))
197
198 ##
199 # Creates a new element object of the same type as this element.
200 #
201 # @param tag Element tag.
202 # @param attrib Element attributes, given as a dictionary.
203 # @return A new element instance.
204
207
208 ##
209 # Returns the number of subelements.
210 #
211 # @return The number of subelements.
212
215
216 ##
217 # Returns the given subelement.
218 #
219 # @param index What subelement to return.
220 # @return The given subelement.
221 # @exception IndexError If the given element does not exist.
222
225
226 ##
227 # Replaces the given subelement.
228 #
229 # @param index What subelement to replace.
230 # @param element The new element value.
231 # @exception IndexError If the given element does not exist.
232 # @exception AssertionError If element is not a valid object.
233
237
238 ##
239 # Deletes the given subelement.
240 #
241 # @param index What subelement to delete.
242 # @exception IndexError If the given element does not exist.
243
246
247 ##
248 # Returns a list containing subelements in the given range.
249 #
250 # @param start The first subelement to return.
251 # @param stop The first subelement that shouldn't be returned.
252 # @return A sequence object containing subelements.
253
256
257 ##
258 # Replaces a number of subelements with elements from a sequence.
259 #
260 # @param start The first subelement to replace.
261 # @param stop The first subelement that shouldn't be replaced.
262 # @param elements A sequence object with zero or more elements.
263 # @exception AssertionError If a sequence member is not a valid object.
264
266 for element in elements:
267 assert iselement(element)
268 self._children[start:stop] = list(elements)
269
270 ##
271 # Deletes a number of subelements.
272 #
273 # @param start The first subelement to delete.
274 # @param stop The first subelement to leave in there.
275
278
279 ##
280 # Adds a subelement to the end of this element.
281 #
282 # @param element The element to add.
283 # @exception AssertionError If a sequence member is not a valid object.
284
288
289 ##
290 # Inserts a subelement at the given position in this element.
291 #
292 # @param index Where to insert the new subelement.
293 # @exception AssertionError If the element is not a valid object.
294
298
299 ##
300 # Removes a matching subelement. Unlike the <b>find</b> methods,
301 # this method compares elements based on identity, not on tag
302 # value or contents.
303 #
304 # @param element What element to remove.
305 # @exception ValueError If a matching element could not be found.
306 # @exception AssertionError If the element is not a valid object.
307
311
312 ##
313 # Returns all subelements. The elements are returned in document
314 # order.
315 #
316 # @return A list of subelements.
317 # @defreturn list of Element instances
318
321
322 ##
323 # Finds the first matching subelement, by tag name or path.
324 #
325 # @param path What element to look for.
326 # @return The first matching element, or None if no element was found.
327 # @defreturn Element or None
328
330 return ElementPath.find(self, path)
331
332 ##
333 # Finds text for the first matching subelement, by tag name or path.
334 #
335 # @param path What element to look for.
336 # @param default What to return if the element was not found.
337 # @return The text content of the first matching element, or the
338 # default value no element was found. Note that if the element
339 # has is found, but has no text content, this method returns an
340 # empty string.
341 # @defreturn string
342
344 return ElementPath.findtext(self, path, default)
345
346 ##
347 # Finds all matching subelements, by tag name or path.
348 #
349 # @param path What element to look for.
350 # @return A list or iterator containing all matching elements,
351 # in document order.
352 # @defreturn list of Element instances
353
355 return ElementPath.findall(self, path)
356
357 ##
358 # Resets an element. This function removes all subelements, clears
359 # all attributes, and sets the text and tail attributes to None.
360
365
366 ##
367 # Gets an element attribute.
368 #
369 # @param key What attribute to look for.
370 # @param default What to return if the attribute was not found.
371 # @return The attribute value, or the default value, if the
372 # attribute was not found.
373 # @defreturn string or None
374
376 return self.attrib.get(key, default)
377
378 ##
379 # Sets an element attribute.
380 #
381 # @param key What attribute to set.
382 # @param value The attribute value.
383
385 self.attrib[key] = value
386
387 ##
388 # Gets a list of attribute names. The names are returned in an
389 # arbitrary order (just like for an ordinary Python dictionary).
390 #
391 # @return A list of element attribute names.
392 # @defreturn list of strings
393
395 return self.attrib.keys()
396
397 ##
398 # Gets element attributes, as a sequence. The attributes are
399 # returned in an arbitrary order.
400 #
401 # @return A list of (name, value) tuples for all attributes.
402 # @defreturn list of (string, string) tuples
403
405 return self.attrib.items()
406
407 ##
408 # Creates a tree iterator. The iterator loops over this element
409 # and all subelements, in document order, and returns all elements
410 # with a matching tag.
411 # <p>
412 # If the tree structure is modified during iteration, the result
413 # is undefined.
414 #
415 # @param tag What tags to look for (default is to return all elements).
416 # @return A list or iterator containing all the matching elements.
417 # @defreturn list or iterator
418
428
429 # compatibility
430 _Element = _ElementInterface
431
432 ##
433 # Element factory. This function returns an object implementing the
434 # standard Element interface. The exact class or type of that object
435 # is implementation dependent, but it will always be compatible with
436 # the {@link #_ElementInterface} class in this module.
437 # <p>
438 # The element name, attribute names, and attribute values can be
439 # either 8-bit ASCII strings or Unicode strings.
440 #
441 # @param tag The element name.
442 # @param attrib An optional dictionary, containing element attributes.
443 # @param **extra Additional attributes, given as keyword arguments.
444 # @return An element instance.
445 # @defreturn Element
446
451
452 ##
453 # Subelement factory. This function creates an element instance, and
454 # appends it to an existing element.
455 # <p>
456 # The element name, attribute names, and attribute values can be
457 # either 8-bit ASCII strings or Unicode strings.
458 #
459 # @param parent The parent element.
460 # @param tag The subelement name.
461 # @param attrib An optional dictionary, containing element attributes.
462 # @param **extra Additional attributes, given as keyword arguments.
463 # @return An element instance.
464 # @defreturn Element
465
467 attrib = attrib.copy()
468 attrib.update(extra)
469 element = parent.makeelement(tag, attrib)
470 parent.append(element)
471 return element
472
473 ##
474 # Comment element factory. This factory function creates a special
475 # element that will be serialized as an XML comment.
476 # <p>
477 # The comment string can be either an 8-bit ASCII string or a Unicode
478 # string.
479 #
480 # @param text A string containing the comment string.
481 # @return An element instance, representing a comment.
482 # @defreturn Element
483
488
489 ##
490 # PI element factory. This factory function creates a special element
491 # that will be serialized as an XML processing instruction.
492 #
493 # @param target A string containing the PI target.
494 # @param text A string containing the PI contents, if any.
495 # @return An element instance, representing a PI.
496 # @defreturn Element
497
499 element = Element(ProcessingInstruction)
500 element.text = target
501 if text:
502 element.text = element.text + " " + text
503 return element
504
505 PI = ProcessingInstruction
506
507 ##
508 # QName wrapper. This can be used to wrap a QName attribute value, in
509 # order to get proper namespace handling on output.
510 #
511 # @param text A string containing the QName value, in the form {uri}local,
512 # or, if the tag argument is given, the URI part of a QName.
513 # @param tag Optional tag. If given, the first argument is interpreted as
514 # an URI, and this argument is interpreted as a local name.
515 # @return An opaque object, representing the QName.
516
530
531 ##
532 # ElementTree wrapper class. This class represents an entire element
533 # hierarchy, and adds some extra support for serialization to and from
534 # standard XML.
535 #
536 # @param element Optional root element.
537 # @keyparam file Optional file handle or name. If given, the
538 # tree is initialized with the contents of this XML file.
539
541
543 assert element is None or iselement(element)
544 self._root = element # first node
545 if file:
546 self.parse(file)
547
548 ##
549 # Gets the root element for this tree.
550 #
551 # @return An element instance.
552 # @defreturn Element
553
556
557 ##
558 # Replaces the root element for this tree. This discards the
559 # current contents of the tree, and replaces it with the given
560 # element. Use with care.
561 #
562 # @param element An element instance.
563
567
568 ##
569 # Loads an external XML document into this element tree.
570 #
571 # @param source A file name or file object.
572 # @param parser An optional parser instance. If not given, the
573 # standard {@link XMLTreeBuilder} parser is used.
574 # @return The document root element.
575 # @defreturn Element
576
578 if not hasattr(source, "read"):
579 source = open(source, "rb")
580 if not parser:
581 parser = XMLTreeBuilder()
582 while 1:
583 data = source.read(32768)
584 if not data:
585 break
586 parser.feed(data)
587 self._root = parser.close()
588 return self._root
589
590 ##
591 # Creates a tree iterator for the root element. The iterator loops
592 # over all elements in this tree, in document order.
593 #
594 # @param tag What tags to look for (default is to return all elements)
595 # @return An iterator.
596 # @defreturn iterator
597
601
602 ##
603 # Finds the first toplevel element with given tag.
604 # Same as getroot().find(path).
605 #
606 # @param path What element to look for.
607 # @return The first matching element, or None if no element was found.
608 # @defreturn Element or None
609
611 assert self._root is not None
612 if path[:1] == "/":
613 path = "." + path
614 return self._root.find(path)
615
616 ##
617 # Finds the element text for the first toplevel element with given
618 # tag. Same as getroot().findtext(path).
619 #
620 # @param path What toplevel element to look for.
621 # @param default What to return if the element was not found.
622 # @return The text content of the first matching element, or the
623 # default value no element was found. Note that if the element
624 # has is found, but has no text content, this method returns an
625 # empty string.
626 # @defreturn string
627
629 assert self._root is not None
630 if path[:1] == "/":
631 path = "." + path
632 return self._root.findtext(path, default)
633
634 ##
635 # Finds all toplevel elements with the given tag.
636 # Same as getroot().findall(path).
637 #
638 # @param path What element to look for.
639 # @return A list or iterator containing all matching elements,
640 # in document order.
641 # @defreturn list of Element instances
642
644 assert self._root is not None
645 if path[:1] == "/":
646 path = "." + path
647 return self._root.findall(path)
648
649 ##
650 # Writes the element tree to a file, as XML.
651 #
652 # @param file A file name, or a file object opened for writing.
653 # @param encoding Optional output encoding (default is US-ASCII).
654
656 assert self._root is not None
657 if not hasattr(file, "write"):
658 file = open(file, "wb")
659 if not encoding:
660 encoding = "us-ascii"
661 elif encoding != "utf-8" and encoding != "us-ascii":
662 file.write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
663 self._write(file, self._root, encoding, {})
664
666 # write XML to file
667 tag = node.tag
668 if tag is Comment:
669 file.write("<!-- %s -->" % _escape_cdata(node.text, encoding))
670 elif tag is ProcessingInstruction:
671 file.write("<?%s?>" % _escape_cdata(node.text, encoding))
672 else:
673 items = node.items()
674 xmlns_items = [] # new namespaces in this scope
675 try:
676 if isinstance(tag, QName) or tag[:1] == "{":
677 tag, xmlns = fixtag(tag, namespaces)
678 if xmlns: xmlns_items.append(xmlns)
679 except TypeError:
680 _raise_serialization_error(tag)
681 file.write("<" + _encode(tag, encoding))
682 if items or xmlns_items:
683 items.sort() # lexical order
684 for k, v in items:
685 try:
686 if isinstance(k, QName) or k[:1] == "{":
687 k, xmlns = fixtag(k, namespaces)
688 if xmlns: xmlns_items.append(xmlns)
689 except TypeError:
690 _raise_serialization_error(k)
691 try:
692 if isinstance(v, QName):
693 v, xmlns = fixtag(v, namespaces)
694 if xmlns: xmlns_items.append(xmlns)
695 except TypeError:
696 _raise_serialization_error(v)
697 file.write(" %s=\"%s\"" % (_encode(k, encoding),
698 _escape_attrib(v, encoding)))
699 for k, v in xmlns_items:
700 file.write(" %s=\"%s\"" % (_encode(k, encoding),
701 _escape_attrib(v, encoding)))
702 if node.text or len(node):
703 file.write(">")
704 if node.text:
705 file.write(_escape_cdata(node.text, encoding))
706 for n in node:
707 self._write(file, n, encoding, namespaces)
708 file.write("</" + _encode(tag, encoding) + ">")
709 else:
710 file.write(" />")
711 for k, v in xmlns_items:
712 del namespaces[v]
713 if node.tail:
714 file.write(_escape_cdata(node.tail, encoding))
715
716 # --------------------------------------------------------------------
717 # helpers
718
719 ##
720 # Checks if an object appears to be a valid element object.
721 #
722 # @param An element instance.
723 # @return A true value if this is an element object.
724 # @defreturn flag
725
727 # FIXME: not sure about this; might be a better idea to look
728 # for tag/attrib/text attributes
729 return isinstance(element, _ElementInterface) or hasattr(element, "tag")
730
731 ##
732 # Writes an element tree or element structure to sys.stdout. This
733 # function should be used for debugging only.
734 # <p>
735 # The exact output format is implementation dependent. In this
736 # version, it's written as an ordinary XML file.
737 #
738 # @param elem An element tree or an individual element.
739
741 # debugging
742 if not isinstance(elem, ElementTree):
743 elem = ElementTree(elem)
744 elem.write(sys.stdout)
745 tail = elem.getroot().tail
746 if not tail or tail[-1] != "\n":
747 sys.stdout.write("\n")
748
750 try:
751 return s.encode(encoding)
752 except AttributeError:
753 return s # 1.5.2: assume the string uses the right encoding
754
755 if sys.version[:3] == "1.5":
756 _escape = re.compile(r"[&<>\"\x80-\xff]+") # 1.5.2
757 else:
758 _escape = re.compile(eval(r'u"[&<>\"\u0080-\uffff]+"'))
759
760 _escape_map = {
761 "&": "&",
762 "<": "<",
763 ">": ">",
764 '"': """,
765 }
766
767 _namespace_map = {
768 # "well-known" namespace prefixes
769 "http://www.w3.org/XML/1998/namespace": "xml",
770 "http://www.w3.org/1999/xhtml": "html",
771 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
772 "http://schemas.xmlsoap.org/wsdl/": "wsdl",
773 }
774
779
781 # map reserved and non-ascii characters to numerical entities
782 def escape_entities(m, map=_escape_map):
783 out = []
784 append = out.append
785 for char in m.group():
786 text = map.get(char)
787 if text is None:
788 text = "&#%d;" % ord(char)
789 append(text)
790 return string.join(out, "")
791 try:
792 return _encode(pattern.sub(escape_entities, text), "ascii")
793 except TypeError:
794 _raise_serialization_error(text)
795
796 #
797 # the following functions assume an ascii-compatible encoding
798 # (or "utf-16")
799
801 # escape character data
802 try:
803 if encoding:
804 try:
805 text = _encode(text, encoding)
806 except UnicodeError:
807 return _encode_entity(text)
808 text = replace(text, "&", "&")
809 text = replace(text, "<", "<")
810 text = replace(text, ">", ">")
811 return text
812 except (TypeError, AttributeError):
813 _raise_serialization_error(text)
814
816 # escape attribute value
817 try:
818 if encoding:
819 try:
820 text = _encode(text, encoding)
821 except UnicodeError:
822 return _encode_entity(text)
823 text = replace(text, "&", "&")
824 text = replace(text, "'", "'") # FIXME: overkill
825 text = replace(text, "\"", """)
826 text = replace(text, "<", "<")
827 text = replace(text, ">", ">")
828 return text
829 except (TypeError, AttributeError):
830 _raise_serialization_error(text)
831
833 # given a decorated tag (of the form {uri}tag), return prefixed
834 # tag and namespace declaration, if any
835 if isinstance(tag, QName):
836 tag = tag.text
837 namespace_uri, tag = string.split(tag[1:], "}", 1)
838 prefix = namespaces.get(namespace_uri)
839 if prefix is None:
840 prefix = _namespace_map.get(namespace_uri)
841 if prefix is None:
842 prefix = "ns%d" % len(namespaces)
843 namespaces[namespace_uri] = prefix
844 if prefix == "xml":
845 xmlns = None
846 else:
847 xmlns = ("xmlns:%s" % prefix, namespace_uri)
848 else:
849 xmlns = None
850 return "%s:%s" % (prefix, tag), xmlns
851
852 ##
853 # Parses an XML document into an element tree.
854 #
855 # @param source A filename or file object containing XML data.
856 # @param parser An optional parser instance. If not given, the
857 # standard {@link XMLTreeBuilder} parser is used.
858 # @return An ElementTree instance
859
864
865 ##
866 # Parses an XML document into an element tree incrementally, and reports
867 # what's going on to the user.
868 #
869 # @param source A filename or file object containing XML data.
870 # @param events A list of events to report back. If omitted, only "end"
871 # events are reported.
872 # @return A (event, elem) iterator.
873
875
877 if not hasattr(source, "read"):
878 source = open(source, "rb")
879 self._file = source
880 self._events = []
881 self._index = 0
882 self.root = self._root = None
883 self._parser = XMLTreeBuilder()
884 # wire up the parser for event reporting
885 parser = self._parser._parser
886 append = self._events.append
887 if events is None:
888 events = ["end"]
889 for event in events:
890 if event == "start":
891 try:
892 parser.ordered_attributes = 1
893 parser.specified_attributes = 1
894 def handler(tag, attrib_in, event=event, append=append,
895 start=self._parser._start_list):
896 append((event, start(tag, attrib_in)))
897 parser.StartElementHandler = handler
898 except AttributeError:
899 def handler(tag, attrib_in, event=event, append=append,
900 start=self._parser._start):
901 append((event, start(tag, attrib_in)))
902 parser.StartElementHandler = handler
903 elif event == "end":
904 def handler(tag, event=event, append=append,
905 end=self._parser._end):
906 append((event, end(tag)))
907 parser.EndElementHandler = handler
908 elif event == "start-ns":
909 def handler(prefix, uri, event=event, append=append):
910 try:
911 uri = _encode(uri, "ascii")
912 except UnicodeError:
913 pass
914 append((event, (prefix or "", uri)))
915 parser.StartNamespaceDeclHandler = handler
916 elif event == "end-ns":
917 def handler(prefix, event=event, append=append):
918 append((event, None))
919 parser.EndNamespaceDeclHandler = handler
920
922 while 1:
923 try:
924 item = self._events[self._index]
925 except IndexError:
926 if self._parser is None:
927 self.root = self._root
928 try:
929 raise StopIteration
930 except NameError:
931 raise IndexError
932 # load event buffer
933 del self._events[:]
934 self._index = 0
935 data = self._file.read(16384)
936 if data:
937 self._parser.feed(data)
938 else:
939 self._root = self._parser.close()
940 self._parser = None
941 else:
942 self._index = self._index + 1
943 return item
944
945 try:
946 iter
949 except NameError:
952
953 ##
954 # Parses an XML document from a string constant. This function can
955 # be used to embed "XML literals" in Python code.
956 #
957 # @param source A string containing XML data.
958 # @return An Element instance.
959 # @defreturn Element
960
965
966 ##
967 # Parses an XML document from a string constant, and also returns
968 # a dictionary which maps from element id:s to elements.
969 #
970 # @param source A string containing XML data.
971 # @return A tuple containing an Element instance and a dictionary.
972 # @defreturn (Element, dictionary)
973
975 parser = XMLTreeBuilder()
976 parser.feed(text)
977 tree = parser.close()
978 ids = {}
979 for elem in tree.getiterator():
980 id = elem.get("id")
981 if id:
982 ids[id] = elem
983 return tree, ids
984
985 ##
986 # Parses an XML document from a string constant. Same as {@link #XML}.
987 #
988 # @def fromstring(text)
989 # @param source A string containing XML data.
990 # @return An Element instance.
991 # @defreturn Element
992
993 fromstring = XML
994
995 ##
996 # Generates a string representation of an XML element, including all
997 # subelements.
998 #
999 # @param element An Element instance.
1000 # @return An encoded string containing the XML data.
1001 # @defreturn string
1002
1006 data = []
1007 file = dummy()
1008 file.write = data.append
1009 ElementTree(element).write(file, encoding)
1010 return string.join(data, "")
1011
1012 ##
1013 # Generic element structure builder. This builder converts a sequence
1014 # of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
1015 # #TreeBuilder.end} method calls to a well-formed element structure.
1016 # <p>
1017 # You can use this class to build an element structure using a custom XML
1018 # parser, or a parser for some other XML-like format.
1019 #
1020 # @param element_factory Optional element factory. This factory
1021 # is called to create new Element instances, as necessary.
1022
1024
1026 self._data = [] # data collector
1027 self._elem = [] # element stack
1028 self._last = None # last element
1029 self._tail = None # true if we're after an end tag
1030 if element_factory is None:
1031 element_factory = _ElementInterface
1032 self._factory = element_factory
1033
1034 ##
1035 # Flushes the parser buffers, and returns the toplevel documen
1036 # element.
1037 #
1038 # @return An Element instance.
1039 # @defreturn Element
1040
1042 assert len(self._elem) == 0, "missing end tags"
1043 assert self._last != None, "missing toplevel element"
1044 return self._last
1045
1047 if self._data:
1048 if self._last is not None:
1049 text = string.join(self._data, "")
1050 if self._tail:
1051 assert self._last.tail is None, "internal error (tail)"
1052 self._last.tail = text
1053 else:
1054 assert self._last.text is None, "internal error (text)"
1055 self._last.text = text
1056 self._data = []
1057
1058 ##
1059 # Adds text to the current element.
1060 #
1061 # @param data A string. This should be either an 8-bit string
1062 # containing ASCII text, or a Unicode string.
1063
1066
1067 ##
1068 # Opens a new element.
1069 #
1070 # @param tag The element name.
1071 # @param attrib A dictionary containing element attributes.
1072 # @return The opened element.
1073 # @defreturn Element
1074
1076 self._flush()
1077 self._last = elem = self._factory(tag, attrs)
1078 if self._elem:
1079 self._elem[-1].append(elem)
1080 self._elem.append(elem)
1081 self._tail = 0
1082 return elem
1083
1084 ##
1085 # Closes the current element.
1086 #
1087 # @param tag The element name.
1088 # @return The closed element.
1089 # @defreturn Element
1090
1099
1100 ##
1101 # Element structure builder for XML source data, based on the
1102 # <b>expat</b> parser.
1103 #
1104 # @keyparam target Target object. If omitted, the builder uses an
1105 # instance of the standard {@link #TreeBuilder} class.
1106 # @keyparam html Predefine HTML entities. This flag is not supported
1107 # by the current implementation.
1108 # @see #ElementTree
1109 # @see #TreeBuilder
1110
1112
1114 try:
1115 from xml.parsers import expat
1116 except ImportError:
1117 raise ImportError(
1118 "No module named expat; use SimpleXMLTreeBuilder instead"
1119 )
1120 self._parser = parser = expat.ParserCreate(None, "}")
1121 if target is None:
1122 target = TreeBuilder()
1123 self._target = target
1124 self._names = {} # name memo cache
1125 # callbacks
1126 parser.DefaultHandlerExpand = self._default
1127 parser.StartElementHandler = self._start
1128 parser.EndElementHandler = self._end
1129 parser.CharacterDataHandler = self._data
1130 # let expat do the buffering, if supported
1131 try:
1132 self._parser.buffer_text = 1
1133 except AttributeError:
1134 pass
1135 # use new-style attribute handling, if supported
1136 try:
1137 self._parser.ordered_attributes = 1
1138 self._parser.specified_attributes = 1
1139 parser.StartElementHandler = self._start_list
1140 except AttributeError:
1141 pass
1142 encoding = None
1143 if not parser.returns_unicode:
1144 encoding = "utf-8"
1145 # target.xml(encoding, None)
1146 self._doctype = None
1147 self.entity = {}
1148
1150 # convert text string to ascii, if possible
1151 try:
1152 return _encode(text, "ascii")
1153 except UnicodeError:
1154 return text
1155
1157 # expand qname, and convert name string to ascii, if possible
1158 try:
1159 name = self._names[key]
1160 except KeyError:
1161 name = key
1162 if "}" in name:
1163 name = "{" + name
1164 self._names[key] = name = self._fixtext(name)
1165 return name
1166
1168 fixname = self._fixname
1169 tag = fixname(tag)
1170 attrib = {}
1171 for key, value in attrib_in.items():
1172 attrib[fixname(key)] = self._fixtext(value)
1173 return self._target.start(tag, attrib)
1174
1176 fixname = self._fixname
1177 tag = fixname(tag)
1178 attrib = {}
1179 if attrib_in:
1180 for i in range(0, len(attrib_in), 2):
1181 attrib[fixname(attrib_in[i])] = self._fixtext(attrib_in[i+1])
1182 return self._target.start(tag, attrib)
1183
1185 return self._target.data(self._fixtext(text))
1186
1188 return self._target.end(self._fixname(tag))
1189
1191 prefix = text[:1]
1192 if prefix == "&":
1193 # deal with undefined entities
1194 try:
1195 self._target.data(self.entity[text[1:-1]])
1196 except KeyError:
1197 from xml.parsers import expat
1198 raise expat.error(
1199 "undefined entity %s: line %d, column %d" %
1200 (text, self._parser.ErrorLineNumber,
1201 self._parser.ErrorColumnNumber)
1202 )
1203 elif prefix == "<" and text[:9] == "<!DOCTYPE":
1204 self._doctype = [] # inside a doctype declaration
1205 elif self._doctype is not None:
1206 # parse doctype contents
1207 if prefix == ">":
1208 self._doctype = None
1209 return
1210 text = string.strip(text)
1211 if not text:
1212 return
1213 self._doctype.append(text)
1214 n = len(self._doctype)
1215 if n > 2:
1216 type = self._doctype[1]
1217 if type == "PUBLIC" and n == 4:
1218 name, type, pubid, system = self._doctype
1219 elif type == "SYSTEM" and n == 3:
1220 name, type, system = self._doctype
1221 pubid = None
1222 else:
1223 return
1224 if pubid:
1225 pubid = pubid[1:-1]
1226 self.doctype(name, pubid, system[1:-1])
1227 self._doctype = None
1228
1229 ##
1230 # Handles a doctype declaration.
1231 #
1232 # @param name Doctype name.
1233 # @param pubid Public identifier.
1234 # @param system System identifier.
1235
1238
1239 ##
1240 # Feeds data to the parser.
1241 #
1242 # @param data Encoded data.
1243
1246
1247 ##
1248 # Finishes feeding data to the parser.
1249 #
1250 # @return An element structure.
1251 # @defreturn Element
1252
1258
1259 # compatibility
1260 XMLParser = XMLTreeBuilder
1261
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0beta1 on Mon Oct 29 21:00:08 2007 | http://epydoc.sourceforge.net |