|
1 """Implementation of the DOM Level 3 'LS-Load' feature.""" |
|
2 |
|
3 import copy |
|
4 import xml.dom |
|
5 |
|
6 from xml.dom.NodeFilter import NodeFilter |
|
7 |
|
8 |
|
9 __all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"] |
|
10 |
|
11 |
|
12 class Options: |
|
13 """Features object that has variables set for each DOMBuilder feature. |
|
14 |
|
15 The DOMBuilder class uses an instance of this class to pass settings to |
|
16 the ExpatBuilder class. |
|
17 """ |
|
18 |
|
19 # Note that the DOMBuilder class in LoadSave constrains which of these |
|
20 # values can be set using the DOM Level 3 LoadSave feature. |
|
21 |
|
22 namespaces = 1 |
|
23 namespace_declarations = True |
|
24 validation = False |
|
25 external_parameter_entities = True |
|
26 external_general_entities = True |
|
27 external_dtd_subset = True |
|
28 validate_if_schema = False |
|
29 validate = False |
|
30 datatype_normalization = False |
|
31 create_entity_ref_nodes = True |
|
32 entities = True |
|
33 whitespace_in_element_content = True |
|
34 cdata_sections = True |
|
35 comments = True |
|
36 charset_overrides_xml_encoding = True |
|
37 infoset = False |
|
38 supported_mediatypes_only = False |
|
39 |
|
40 errorHandler = None |
|
41 filter = None |
|
42 |
|
43 |
|
44 class DOMBuilder: |
|
45 entityResolver = None |
|
46 errorHandler = None |
|
47 filter = None |
|
48 |
|
49 ACTION_REPLACE = 1 |
|
50 ACTION_APPEND_AS_CHILDREN = 2 |
|
51 ACTION_INSERT_AFTER = 3 |
|
52 ACTION_INSERT_BEFORE = 4 |
|
53 |
|
54 _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN, |
|
55 ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE) |
|
56 |
|
57 def __init__(self): |
|
58 self._options = Options() |
|
59 |
|
60 def _get_entityResolver(self): |
|
61 return self.entityResolver |
|
62 def _set_entityResolver(self, entityResolver): |
|
63 self.entityResolver = entityResolver |
|
64 |
|
65 def _get_errorHandler(self): |
|
66 return self.errorHandler |
|
67 def _set_errorHandler(self, errorHandler): |
|
68 self.errorHandler = errorHandler |
|
69 |
|
70 def _get_filter(self): |
|
71 return self.filter |
|
72 def _set_filter(self, filter): |
|
73 self.filter = filter |
|
74 |
|
75 def setFeature(self, name, state): |
|
76 if self.supportsFeature(name): |
|
77 state = state and 1 or 0 |
|
78 try: |
|
79 settings = self._settings[(_name_xform(name), state)] |
|
80 except KeyError: |
|
81 raise xml.dom.NotSupportedErr( |
|
82 "unsupported feature: %r" % (name,)) |
|
83 else: |
|
84 for name, value in settings: |
|
85 setattr(self._options, name, value) |
|
86 else: |
|
87 raise xml.dom.NotFoundErr("unknown feature: " + repr(name)) |
|
88 |
|
89 def supportsFeature(self, name): |
|
90 return hasattr(self._options, _name_xform(name)) |
|
91 |
|
92 def canSetFeature(self, name, state): |
|
93 key = (_name_xform(name), state and 1 or 0) |
|
94 return self._settings.has_key(key) |
|
95 |
|
96 # This dictionary maps from (feature,value) to a list of |
|
97 # (option,value) pairs that should be set on the Options object. |
|
98 # If a (feature,value) setting is not in this dictionary, it is |
|
99 # not supported by the DOMBuilder. |
|
100 # |
|
101 _settings = { |
|
102 ("namespace_declarations", 0): [ |
|
103 ("namespace_declarations", 0)], |
|
104 ("namespace_declarations", 1): [ |
|
105 ("namespace_declarations", 1)], |
|
106 ("validation", 0): [ |
|
107 ("validation", 0)], |
|
108 ("external_general_entities", 0): [ |
|
109 ("external_general_entities", 0)], |
|
110 ("external_general_entities", 1): [ |
|
111 ("external_general_entities", 1)], |
|
112 ("external_parameter_entities", 0): [ |
|
113 ("external_parameter_entities", 0)], |
|
114 ("external_parameter_entities", 1): [ |
|
115 ("external_parameter_entities", 1)], |
|
116 ("validate_if_schema", 0): [ |
|
117 ("validate_if_schema", 0)], |
|
118 ("create_entity_ref_nodes", 0): [ |
|
119 ("create_entity_ref_nodes", 0)], |
|
120 ("create_entity_ref_nodes", 1): [ |
|
121 ("create_entity_ref_nodes", 1)], |
|
122 ("entities", 0): [ |
|
123 ("create_entity_ref_nodes", 0), |
|
124 ("entities", 0)], |
|
125 ("entities", 1): [ |
|
126 ("entities", 1)], |
|
127 ("whitespace_in_element_content", 0): [ |
|
128 ("whitespace_in_element_content", 0)], |
|
129 ("whitespace_in_element_content", 1): [ |
|
130 ("whitespace_in_element_content", 1)], |
|
131 ("cdata_sections", 0): [ |
|
132 ("cdata_sections", 0)], |
|
133 ("cdata_sections", 1): [ |
|
134 ("cdata_sections", 1)], |
|
135 ("comments", 0): [ |
|
136 ("comments", 0)], |
|
137 ("comments", 1): [ |
|
138 ("comments", 1)], |
|
139 ("charset_overrides_xml_encoding", 0): [ |
|
140 ("charset_overrides_xml_encoding", 0)], |
|
141 ("charset_overrides_xml_encoding", 1): [ |
|
142 ("charset_overrides_xml_encoding", 1)], |
|
143 ("infoset", 0): [], |
|
144 ("infoset", 1): [ |
|
145 ("namespace_declarations", 0), |
|
146 ("validate_if_schema", 0), |
|
147 ("create_entity_ref_nodes", 0), |
|
148 ("entities", 0), |
|
149 ("cdata_sections", 0), |
|
150 ("datatype_normalization", 1), |
|
151 ("whitespace_in_element_content", 1), |
|
152 ("comments", 1), |
|
153 ("charset_overrides_xml_encoding", 1)], |
|
154 ("supported_mediatypes_only", 0): [ |
|
155 ("supported_mediatypes_only", 0)], |
|
156 ("namespaces", 0): [ |
|
157 ("namespaces", 0)], |
|
158 ("namespaces", 1): [ |
|
159 ("namespaces", 1)], |
|
160 } |
|
161 |
|
162 def getFeature(self, name): |
|
163 xname = _name_xform(name) |
|
164 try: |
|
165 return getattr(self._options, xname) |
|
166 except AttributeError: |
|
167 if name == "infoset": |
|
168 options = self._options |
|
169 return (options.datatype_normalization |
|
170 and options.whitespace_in_element_content |
|
171 and options.comments |
|
172 and options.charset_overrides_xml_encoding |
|
173 and not (options.namespace_declarations |
|
174 or options.validate_if_schema |
|
175 or options.create_entity_ref_nodes |
|
176 or options.entities |
|
177 or options.cdata_sections)) |
|
178 raise xml.dom.NotFoundErr("feature %s not known" % repr(name)) |
|
179 |
|
180 def parseURI(self, uri): |
|
181 if self.entityResolver: |
|
182 input = self.entityResolver.resolveEntity(None, uri) |
|
183 else: |
|
184 input = DOMEntityResolver().resolveEntity(None, uri) |
|
185 return self.parse(input) |
|
186 |
|
187 def parse(self, input): |
|
188 options = copy.copy(self._options) |
|
189 options.filter = self.filter |
|
190 options.errorHandler = self.errorHandler |
|
191 fp = input.byteStream |
|
192 if fp is None and options.systemId: |
|
193 import urllib2 |
|
194 fp = urllib2.urlopen(input.systemId) |
|
195 return self._parse_bytestream(fp, options) |
|
196 |
|
197 def parseWithContext(self, input, cnode, action): |
|
198 if action not in self._legal_actions: |
|
199 raise ValueError("not a legal action") |
|
200 raise NotImplementedError("Haven't written this yet...") |
|
201 |
|
202 def _parse_bytestream(self, stream, options): |
|
203 import xml.dom.expatbuilder |
|
204 builder = xml.dom.expatbuilder.makeBuilder(options) |
|
205 return builder.parseFile(stream) |
|
206 |
|
207 |
|
208 def _name_xform(name): |
|
209 return name.lower().replace('-', '_') |
|
210 |
|
211 |
|
212 class DOMEntityResolver(object): |
|
213 __slots__ = '_opener', |
|
214 |
|
215 def resolveEntity(self, publicId, systemId): |
|
216 assert systemId is not None |
|
217 source = DOMInputSource() |
|
218 source.publicId = publicId |
|
219 source.systemId = systemId |
|
220 source.byteStream = self._get_opener().open(systemId) |
|
221 |
|
222 # determine the encoding if the transport provided it |
|
223 source.encoding = self._guess_media_encoding(source) |
|
224 |
|
225 # determine the base URI is we can |
|
226 import posixpath, urlparse |
|
227 parts = urlparse.urlparse(systemId) |
|
228 scheme, netloc, path, params, query, fragment = parts |
|
229 # XXX should we check the scheme here as well? |
|
230 if path and not path.endswith("/"): |
|
231 path = posixpath.dirname(path) + "/" |
|
232 parts = scheme, netloc, path, params, query, fragment |
|
233 source.baseURI = urlparse.urlunparse(parts) |
|
234 |
|
235 return source |
|
236 |
|
237 def _get_opener(self): |
|
238 try: |
|
239 return self._opener |
|
240 except AttributeError: |
|
241 self._opener = self._create_opener() |
|
242 return self._opener |
|
243 |
|
244 def _create_opener(self): |
|
245 import urllib2 |
|
246 return urllib2.build_opener() |
|
247 |
|
248 def _guess_media_encoding(self, source): |
|
249 info = source.byteStream.info() |
|
250 if info.has_key("Content-Type"): |
|
251 for param in info.getplist(): |
|
252 if param.startswith("charset="): |
|
253 return param.split("=", 1)[1].lower() |
|
254 |
|
255 |
|
256 class DOMInputSource(object): |
|
257 __slots__ = ('byteStream', 'characterStream', 'stringData', |
|
258 'encoding', 'publicId', 'systemId', 'baseURI') |
|
259 |
|
260 def __init__(self): |
|
261 self.byteStream = None |
|
262 self.characterStream = None |
|
263 self.stringData = None |
|
264 self.encoding = None |
|
265 self.publicId = None |
|
266 self.systemId = None |
|
267 self.baseURI = None |
|
268 |
|
269 def _get_byteStream(self): |
|
270 return self.byteStream |
|
271 def _set_byteStream(self, byteStream): |
|
272 self.byteStream = byteStream |
|
273 |
|
274 def _get_characterStream(self): |
|
275 return self.characterStream |
|
276 def _set_characterStream(self, characterStream): |
|
277 self.characterStream = characterStream |
|
278 |
|
279 def _get_stringData(self): |
|
280 return self.stringData |
|
281 def _set_stringData(self, data): |
|
282 self.stringData = data |
|
283 |
|
284 def _get_encoding(self): |
|
285 return self.encoding |
|
286 def _set_encoding(self, encoding): |
|
287 self.encoding = encoding |
|
288 |
|
289 def _get_publicId(self): |
|
290 return self.publicId |
|
291 def _set_publicId(self, publicId): |
|
292 self.publicId = publicId |
|
293 |
|
294 def _get_systemId(self): |
|
295 return self.systemId |
|
296 def _set_systemId(self, systemId): |
|
297 self.systemId = systemId |
|
298 |
|
299 def _get_baseURI(self): |
|
300 return self.baseURI |
|
301 def _set_baseURI(self, uri): |
|
302 self.baseURI = uri |
|
303 |
|
304 |
|
305 class DOMBuilderFilter: |
|
306 """Element filter which can be used to tailor construction of |
|
307 a DOM instance. |
|
308 """ |
|
309 |
|
310 # There's really no need for this class; concrete implementations |
|
311 # should just implement the endElement() and startElement() |
|
312 # methods as appropriate. Using this makes it easy to only |
|
313 # implement one of them. |
|
314 |
|
315 FILTER_ACCEPT = 1 |
|
316 FILTER_REJECT = 2 |
|
317 FILTER_SKIP = 3 |
|
318 FILTER_INTERRUPT = 4 |
|
319 |
|
320 whatToShow = NodeFilter.SHOW_ALL |
|
321 |
|
322 def _get_whatToShow(self): |
|
323 return self.whatToShow |
|
324 |
|
325 def acceptNode(self, element): |
|
326 return self.FILTER_ACCEPT |
|
327 |
|
328 def startContainer(self, element): |
|
329 return self.FILTER_ACCEPT |
|
330 |
|
331 del NodeFilter |
|
332 |
|
333 |
|
334 class DocumentLS: |
|
335 """Mixin to create documents that conform to the load/save spec.""" |
|
336 |
|
337 async = False |
|
338 |
|
339 def _get_async(self): |
|
340 return False |
|
341 def _set_async(self, async): |
|
342 if async: |
|
343 raise xml.dom.NotSupportedErr( |
|
344 "asynchronous document loading is not supported") |
|
345 |
|
346 def abort(self): |
|
347 # What does it mean to "clear" a document? Does the |
|
348 # documentElement disappear? |
|
349 raise NotImplementedError( |
|
350 "haven't figured out what this means yet") |
|
351 |
|
352 def load(self, uri): |
|
353 raise NotImplementedError("haven't written this yet") |
|
354 |
|
355 def loadXML(self, source): |
|
356 raise NotImplementedError("haven't written this yet") |
|
357 |
|
358 def saveXML(self, snode): |
|
359 if snode is None: |
|
360 snode = self |
|
361 elif snode.ownerDocument is not self: |
|
362 raise xml.dom.WrongDocumentErr() |
|
363 return snode.toxml() |
|
364 |
|
365 |
|
366 class DOMImplementationLS: |
|
367 MODE_SYNCHRONOUS = 1 |
|
368 MODE_ASYNCHRONOUS = 2 |
|
369 |
|
370 def createDOMBuilder(self, mode, schemaType): |
|
371 if schemaType is not None: |
|
372 raise xml.dom.NotSupportedErr( |
|
373 "schemaType not yet supported") |
|
374 if mode == self.MODE_SYNCHRONOUS: |
|
375 return DOMBuilder() |
|
376 if mode == self.MODE_ASYNCHRONOUS: |
|
377 raise xml.dom.NotSupportedErr( |
|
378 "asynchronous builders are not supported") |
|
379 raise ValueError("unknown value for mode") |
|
380 |
|
381 def createDOMWriter(self): |
|
382 raise NotImplementedError( |
|
383 "the writer interface hasn't been written yet!") |
|
384 |
|
385 def createDOMInputSource(self): |
|
386 return DOMInputSource() |