|
1 """ |
|
2 This module contains the core classes of version 2.0 of SAX for Python. |
|
3 This file provides only default classes with absolutely minimum |
|
4 functionality, from which drivers and applications can be subclassed. |
|
5 |
|
6 Many of these classes are empty and are included only as documentation |
|
7 of the interfaces. |
|
8 |
|
9 $Id: handler.py 35816 2004-05-06 03:47:48Z fdrake $ |
|
10 """ |
|
11 |
|
12 version = '2.0beta' |
|
13 |
|
14 #============================================================================ |
|
15 # |
|
16 # HANDLER INTERFACES |
|
17 # |
|
18 #============================================================================ |
|
19 |
|
20 # ===== ERRORHANDLER ===== |
|
21 |
|
22 class ErrorHandler: |
|
23 """Basic interface for SAX error handlers. |
|
24 |
|
25 If you create an object that implements this interface, then |
|
26 register the object with your XMLReader, the parser will call the |
|
27 methods in your object to report all warnings and errors. There |
|
28 are three levels of errors available: warnings, (possibly) |
|
29 recoverable errors, and unrecoverable errors. All methods take a |
|
30 SAXParseException as the only parameter.""" |
|
31 |
|
32 def error(self, exception): |
|
33 "Handle a recoverable error." |
|
34 raise exception |
|
35 |
|
36 def fatalError(self, exception): |
|
37 "Handle a non-recoverable error." |
|
38 raise exception |
|
39 |
|
40 def warning(self, exception): |
|
41 "Handle a warning." |
|
42 print exception |
|
43 |
|
44 |
|
45 # ===== CONTENTHANDLER ===== |
|
46 |
|
47 class ContentHandler: |
|
48 """Interface for receiving logical document content events. |
|
49 |
|
50 This is the main callback interface in SAX, and the one most |
|
51 important to applications. The order of events in this interface |
|
52 mirrors the order of the information in the document.""" |
|
53 |
|
54 def __init__(self): |
|
55 self._locator = None |
|
56 |
|
57 def setDocumentLocator(self, locator): |
|
58 """Called by the parser to give the application a locator for |
|
59 locating the origin of document events. |
|
60 |
|
61 SAX parsers are strongly encouraged (though not absolutely |
|
62 required) to supply a locator: if it does so, it must supply |
|
63 the locator to the application by invoking this method before |
|
64 invoking any of the other methods in the DocumentHandler |
|
65 interface. |
|
66 |
|
67 The locator allows the application to determine the end |
|
68 position of any document-related event, even if the parser is |
|
69 not reporting an error. Typically, the application will use |
|
70 this information for reporting its own errors (such as |
|
71 character content that does not match an application's |
|
72 business rules). The information returned by the locator is |
|
73 probably not sufficient for use with a search engine. |
|
74 |
|
75 Note that the locator will return correct information only |
|
76 during the invocation of the events in this interface. The |
|
77 application should not attempt to use it at any other time.""" |
|
78 self._locator = locator |
|
79 |
|
80 def startDocument(self): |
|
81 """Receive notification of the beginning of a document. |
|
82 |
|
83 The SAX parser will invoke this method only once, before any |
|
84 other methods in this interface or in DTDHandler (except for |
|
85 setDocumentLocator).""" |
|
86 |
|
87 def endDocument(self): |
|
88 """Receive notification of the end of a document. |
|
89 |
|
90 The SAX parser will invoke this method only once, and it will |
|
91 be the last method invoked during the parse. The parser shall |
|
92 not invoke this method until it has either abandoned parsing |
|
93 (because of an unrecoverable error) or reached the end of |
|
94 input.""" |
|
95 |
|
96 def startPrefixMapping(self, prefix, uri): |
|
97 """Begin the scope of a prefix-URI Namespace mapping. |
|
98 |
|
99 The information from this event is not necessary for normal |
|
100 Namespace processing: the SAX XML reader will automatically |
|
101 replace prefixes for element and attribute names when the |
|
102 http://xml.org/sax/features/namespaces feature is true (the |
|
103 default). |
|
104 |
|
105 There are cases, however, when applications need to use |
|
106 prefixes in character data or in attribute values, where they |
|
107 cannot safely be expanded automatically; the |
|
108 start/endPrefixMapping event supplies the information to the |
|
109 application to expand prefixes in those contexts itself, if |
|
110 necessary. |
|
111 |
|
112 Note that start/endPrefixMapping events are not guaranteed to |
|
113 be properly nested relative to each-other: all |
|
114 startPrefixMapping events will occur before the corresponding |
|
115 startElement event, and all endPrefixMapping events will occur |
|
116 after the corresponding endElement event, but their order is |
|
117 not guaranteed.""" |
|
118 |
|
119 def endPrefixMapping(self, prefix): |
|
120 """End the scope of a prefix-URI mapping. |
|
121 |
|
122 See startPrefixMapping for details. This event will always |
|
123 occur after the corresponding endElement event, but the order |
|
124 of endPrefixMapping events is not otherwise guaranteed.""" |
|
125 |
|
126 def startElement(self, name, attrs): |
|
127 """Signals the start of an element in non-namespace mode. |
|
128 |
|
129 The name parameter contains the raw XML 1.0 name of the |
|
130 element type as a string and the attrs parameter holds an |
|
131 instance of the Attributes class containing the attributes of |
|
132 the element.""" |
|
133 |
|
134 def endElement(self, name): |
|
135 """Signals the end of an element in non-namespace mode. |
|
136 |
|
137 The name parameter contains the name of the element type, just |
|
138 as with the startElement event.""" |
|
139 |
|
140 def startElementNS(self, name, qname, attrs): |
|
141 """Signals the start of an element in namespace mode. |
|
142 |
|
143 The name parameter contains the name of the element type as a |
|
144 (uri, localname) tuple, the qname parameter the raw XML 1.0 |
|
145 name used in the source document, and the attrs parameter |
|
146 holds an instance of the Attributes class containing the |
|
147 attributes of the element. |
|
148 |
|
149 The uri part of the name tuple is None for elements which have |
|
150 no namespace.""" |
|
151 |
|
152 def endElementNS(self, name, qname): |
|
153 """Signals the end of an element in namespace mode. |
|
154 |
|
155 The name parameter contains the name of the element type, just |
|
156 as with the startElementNS event.""" |
|
157 |
|
158 def characters(self, content): |
|
159 """Receive notification of character data. |
|
160 |
|
161 The Parser will call this method to report each chunk of |
|
162 character data. SAX parsers may return all contiguous |
|
163 character data in a single chunk, or they may split it into |
|
164 several chunks; however, all of the characters in any single |
|
165 event must come from the same external entity so that the |
|
166 Locator provides useful information.""" |
|
167 |
|
168 def ignorableWhitespace(self, whitespace): |
|
169 """Receive notification of ignorable whitespace in element content. |
|
170 |
|
171 Validating Parsers must use this method to report each chunk |
|
172 of ignorable whitespace (see the W3C XML 1.0 recommendation, |
|
173 section 2.10): non-validating parsers may also use this method |
|
174 if they are capable of parsing and using content models. |
|
175 |
|
176 SAX parsers may return all contiguous whitespace in a single |
|
177 chunk, or they may split it into several chunks; however, all |
|
178 of the characters in any single event must come from the same |
|
179 external entity, so that the Locator provides useful |
|
180 information.""" |
|
181 |
|
182 def processingInstruction(self, target, data): |
|
183 """Receive notification of a processing instruction. |
|
184 |
|
185 The Parser will invoke this method once for each processing |
|
186 instruction found: note that processing instructions may occur |
|
187 before or after the main document element. |
|
188 |
|
189 A SAX parser should never report an XML declaration (XML 1.0, |
|
190 section 2.8) or a text declaration (XML 1.0, section 4.3.1) |
|
191 using this method.""" |
|
192 |
|
193 def skippedEntity(self, name): |
|
194 """Receive notification of a skipped entity. |
|
195 |
|
196 The Parser will invoke this method once for each entity |
|
197 skipped. Non-validating processors may skip entities if they |
|
198 have not seen the declarations (because, for example, the |
|
199 entity was declared in an external DTD subset). All processors |
|
200 may skip external entities, depending on the values of the |
|
201 http://xml.org/sax/features/external-general-entities and the |
|
202 http://xml.org/sax/features/external-parameter-entities |
|
203 properties.""" |
|
204 |
|
205 |
|
206 # ===== DTDHandler ===== |
|
207 |
|
208 class DTDHandler: |
|
209 """Handle DTD events. |
|
210 |
|
211 This interface specifies only those DTD events required for basic |
|
212 parsing (unparsed entities and attributes).""" |
|
213 |
|
214 def notationDecl(self, name, publicId, systemId): |
|
215 "Handle a notation declaration event." |
|
216 |
|
217 def unparsedEntityDecl(self, name, publicId, systemId, ndata): |
|
218 "Handle an unparsed entity declaration event." |
|
219 |
|
220 |
|
221 # ===== ENTITYRESOLVER ===== |
|
222 |
|
223 class EntityResolver: |
|
224 """Basic interface for resolving entities. If you create an object |
|
225 implementing this interface, then register the object with your |
|
226 Parser, the parser will call the method in your object to |
|
227 resolve all external entities. Note that DefaultHandler implements |
|
228 this interface with the default behaviour.""" |
|
229 |
|
230 def resolveEntity(self, publicId, systemId): |
|
231 """Resolve the system identifier of an entity and return either |
|
232 the system identifier to read from as a string, or an InputSource |
|
233 to read from.""" |
|
234 return systemId |
|
235 |
|
236 |
|
237 #============================================================================ |
|
238 # |
|
239 # CORE FEATURES |
|
240 # |
|
241 #============================================================================ |
|
242 |
|
243 feature_namespaces = "http://xml.org/sax/features/namespaces" |
|
244 # true: Perform Namespace processing (default). |
|
245 # false: Optionally do not perform Namespace processing |
|
246 # (implies namespace-prefixes). |
|
247 # access: (parsing) read-only; (not parsing) read/write |
|
248 |
|
249 feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes" |
|
250 # true: Report the original prefixed names and attributes used for Namespace |
|
251 # declarations. |
|
252 # false: Do not report attributes used for Namespace declarations, and |
|
253 # optionally do not report original prefixed names (default). |
|
254 # access: (parsing) read-only; (not parsing) read/write |
|
255 |
|
256 feature_string_interning = "http://xml.org/sax/features/string-interning" |
|
257 # true: All element names, prefixes, attribute names, Namespace URIs, and |
|
258 # local names are interned using the built-in intern function. |
|
259 # false: Names are not necessarily interned, although they may be (default). |
|
260 # access: (parsing) read-only; (not parsing) read/write |
|
261 |
|
262 feature_validation = "http://xml.org/sax/features/validation" |
|
263 # true: Report all validation errors (implies external-general-entities and |
|
264 # external-parameter-entities). |
|
265 # false: Do not report validation errors. |
|
266 # access: (parsing) read-only; (not parsing) read/write |
|
267 |
|
268 feature_external_ges = "http://xml.org/sax/features/external-general-entities" |
|
269 # true: Include all external general (text) entities. |
|
270 # false: Do not include external general entities. |
|
271 # access: (parsing) read-only; (not parsing) read/write |
|
272 |
|
273 feature_external_pes = "http://xml.org/sax/features/external-parameter-entities" |
|
274 # true: Include all external parameter entities, including the external |
|
275 # DTD subset. |
|
276 # false: Do not include any external parameter entities, even the external |
|
277 # DTD subset. |
|
278 # access: (parsing) read-only; (not parsing) read/write |
|
279 |
|
280 all_features = [feature_namespaces, |
|
281 feature_namespace_prefixes, |
|
282 feature_string_interning, |
|
283 feature_validation, |
|
284 feature_external_ges, |
|
285 feature_external_pes] |
|
286 |
|
287 |
|
288 #============================================================================ |
|
289 # |
|
290 # CORE PROPERTIES |
|
291 # |
|
292 #============================================================================ |
|
293 |
|
294 property_lexical_handler = "http://xml.org/sax/properties/lexical-handler" |
|
295 # data type: xml.sax.sax2lib.LexicalHandler |
|
296 # description: An optional extension handler for lexical events like comments. |
|
297 # access: read/write |
|
298 |
|
299 property_declaration_handler = "http://xml.org/sax/properties/declaration-handler" |
|
300 # data type: xml.sax.sax2lib.DeclHandler |
|
301 # description: An optional extension handler for DTD-related events other |
|
302 # than notations and unparsed entities. |
|
303 # access: read/write |
|
304 |
|
305 property_dom_node = "http://xml.org/sax/properties/dom-node" |
|
306 # data type: org.w3c.dom.Node |
|
307 # description: When parsing, the current DOM node being visited if this is |
|
308 # a DOM iterator; when not parsing, the root DOM node for |
|
309 # iteration. |
|
310 # access: (parsing) read-only; (not parsing) read/write |
|
311 |
|
312 property_xml_string = "http://xml.org/sax/properties/xml-string" |
|
313 # data type: String |
|
314 # description: The literal string of characters that was the source for |
|
315 # the current event. |
|
316 # access: read-only |
|
317 |
|
318 property_encoding = "http://www.python.org/sax/properties/encoding" |
|
319 # data type: String |
|
320 # description: The name of the encoding to assume for input data. |
|
321 # access: write: set the encoding, e.g. established by a higher-level |
|
322 # protocol. May change during parsing (e.g. after |
|
323 # processing a META tag) |
|
324 # read: return the current encoding (possibly established through |
|
325 # auto-detection. |
|
326 # initial value: UTF-8 |
|
327 # |
|
328 |
|
329 property_interning_dict = "http://www.python.org/sax/properties/interning-dict" |
|
330 # data type: Dictionary |
|
331 # description: The dictionary used to intern common strings in the document |
|
332 # access: write: Request that the parser uses a specific dictionary, to |
|
333 # allow interning across different documents |
|
334 # read: return the current interning dictionary, or None |
|
335 # |
|
336 |
|
337 all_properties = [property_lexical_handler, |
|
338 property_dom_node, |
|
339 property_declaration_handler, |
|
340 property_xml_string, |
|
341 property_encoding, |
|
342 property_interning_dict] |