|
1 # |
|
2 # Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 # All rights reserved. |
|
4 # This component and the accompanying materials are made available |
|
5 # under the terms of "Eclipse Public License v1.0" |
|
6 # which accompanies this distribution, and is available |
|
7 # at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 # |
|
9 # Initial Contributors: |
|
10 # Nokia Corporation - initial contribution. |
|
11 # |
|
12 # Contributors: |
|
13 # |
|
14 # Description: |
|
15 # |
|
16 |
|
17 from xml.parsers import expat |
|
18 |
|
19 # Import ElementTree (should always be available) |
|
20 try: |
|
21 from elementtree import ElementTree |
|
22 except ImportError: |
|
23 from xml.etree import ElementTree |
|
24 |
|
25 import exceptions |
|
26 |
|
27 |
|
28 class ElementTreeBackendWrapperBase(object): |
|
29 def get_module(self): |
|
30 raise NotImplementedError() |
|
31 |
|
32 def get_lineno(self, element): |
|
33 raise NotImplementedError() |
|
34 |
|
35 class ElementTreeBackendWrapper(ElementTreeBackendWrapperBase): |
|
36 |
|
37 class CustomTreeBuilder(ElementTree.TreeBuilder): |
|
38 """ |
|
39 Custom TreeBuilder for ElementTree that records line numbers |
|
40 of the elements. |
|
41 """ |
|
42 def start(self, tag, attrs): |
|
43 elem = ElementTree.TreeBuilder.start(self, tag, attrs) |
|
44 lineno = self._xmltreebuilder._parser.CurrentLineNumber |
|
45 #print "Tag: %s, line: %r" % (tag, lineno) |
|
46 elem.sourceline = lineno |
|
47 return elem |
|
48 |
|
49 def get_module(self): |
|
50 return ElementTree |
|
51 |
|
52 def fromstring(self, text): |
|
53 try: |
|
54 treebuilder = self.CustomTreeBuilder() |
|
55 parser = ElementTree.XMLTreeBuilder(target=treebuilder) |
|
56 treebuilder._xmltreebuilder = parser |
|
57 parser.feed(text) |
|
58 return parser.close() |
|
59 except expat.ExpatError, e: |
|
60 raise exceptions.XmlParseError( |
|
61 "XML parse error on line %d: %s" % (e.lineno, e), |
|
62 e.lineno, str(e)) |
|
63 |
|
64 def tostring(self, etree, encoding=None): |
|
65 return ElementTree.tostring(etree, encoding) |
|
66 |
|
67 def get_lineno(self, element): |
|
68 return element.sourceline |
|
69 |
|
70 |
|
71 class CElementTreeBackendWrapper(ElementTreeBackendWrapperBase): |
|
72 def __init__(self): |
|
73 try: |
|
74 from cElementTree import cElementTree |
|
75 except ImportError: |
|
76 from xml.etree import cElementTree |
|
77 |
|
78 self.cElementTree = cElementTree |
|
79 |
|
80 def get_module(self): |
|
81 return self.cElementTree |
|
82 |
|
83 def fromstring(self, text): |
|
84 try: |
|
85 return self.cElementTree.fromstring(text) |
|
86 except SyntaxError, e: |
|
87 # cElementTree raises a SyntaxError, but does not set |
|
88 # its lineno attribute, so look for the line number |
|
89 # in the exception text |
|
90 import re |
|
91 match = re.search(r'line (\d+)\, column \d+$', str(e)) |
|
92 if match: lineno = int(match.group(1)) |
|
93 else: lineno = None |
|
94 |
|
95 raise exceptions.XmlParseError( |
|
96 "XML parse error on line %s: %s" % (lineno, e), |
|
97 lineno, str(e)) |
|
98 |
|
99 def tostring(self, etree, encoding=None): |
|
100 return self.cElementTree.tostring(etree, encoding) |
|
101 |
|
102 def get_lineno(self, element): |
|
103 # cElementTree does not support line numbers |
|
104 return None |
|
105 |
|
106 |
|
107 class LxmlBackendWrapper(ElementTreeBackendWrapperBase): |
|
108 |
|
109 def __init__(self): |
|
110 import lxml.etree |
|
111 self.lxml = lxml |
|
112 |
|
113 def get_module(self): |
|
114 return self.lxml.etree |
|
115 |
|
116 def fromstring(self, text): |
|
117 try: |
|
118 elem = self.lxml.etree.fromstring(text) |
|
119 |
|
120 # lxml parses also comments, but ConE does not expect those, |
|
121 # so remove them to prevent any errors on that account |
|
122 def remove_comments(elem): |
|
123 # Find the comments under this element |
|
124 comments = [] |
|
125 for x in elem: |
|
126 if isinstance(x, self.lxml.etree._Comment): |
|
127 comments.append(x) |
|
128 |
|
129 # Remove them |
|
130 for c in comments: |
|
131 elem.remove(c) |
|
132 |
|
133 # Recurse to sub-elements |
|
134 for subelem in elem: |
|
135 remove_comments(subelem) |
|
136 |
|
137 remove_comments(elem) |
|
138 |
|
139 return elem |
|
140 except self.lxml.etree.XMLSyntaxError, e: |
|
141 raise exceptions.XmlParseError( |
|
142 "XML parse error on line %d: %s" % (e.position[0], e), |
|
143 e.position[0], str(e)) |
|
144 |
|
145 def tostring(self, etree, encoding=None): |
|
146 return self.lxml.etree.tostring(etree, encoding=encoding) |
|
147 |
|
148 def get_lineno(self, element): |
|
149 return element.sourceline |
|
150 |
|
151 # ============================================================================ |
|
152 # |
|
153 # ============================================================================ |
|
154 |
|
155 class ElementTreeWrapper(object): |
|
156 """ |
|
157 ElementTree wrapper class for providing a unified interface to different |
|
158 ElementTree implementations. |
|
159 |
|
160 Currently supported are the pure Python ElementTree implementation, |
|
161 cElementTree and lxml.etree |
|
162 """ |
|
163 BACKEND_ELEMENT_TREE = 'ElementTree' |
|
164 BACKEND_C_ELEMENT_TREE = 'cElementTree' |
|
165 BACKEND_LXML = 'lxml' |
|
166 |
|
167 # Import order for the default back-end. The list is traversed |
|
168 # top-down and the first back-end whose importing is successful is |
|
169 # used as the default back-end |
|
170 DEFAULT_BACKEND_IMPORT_ORDER = [BACKEND_C_ELEMENT_TREE, |
|
171 BACKEND_ELEMENT_TREE] |
|
172 |
|
173 _backend_mapping = {BACKEND_ELEMENT_TREE: ElementTreeBackendWrapper, |
|
174 BACKEND_C_ELEMENT_TREE: CElementTreeBackendWrapper, |
|
175 BACKEND_LXML: LxmlBackendWrapper} |
|
176 |
|
177 _backend_id = None |
|
178 _backend_wrapper = None |
|
179 |
|
180 def get_backend_id(self): |
|
181 """ |
|
182 Return the ID of the currently used ElementTree back-end. |
|
183 """ |
|
184 # Make sure that the default back-end is set, so _backend_id |
|
185 # will not be None |
|
186 self._get_backend() |
|
187 assert self._backend_id is not None |
|
188 return self._backend_id |
|
189 |
|
190 def set_backend_id(self, backend_id): |
|
191 """ |
|
192 Set the used ElementTree back-end by back-end ID. |
|
193 """ |
|
194 if backend_id not in self._backend_mapping: |
|
195 raise ValueError("Invalid ElementTree back-end ID: %r" % backend_id) |
|
196 |
|
197 if backend_id == self._backend_id: |
|
198 return |
|
199 |
|
200 backend_wrapper_class = self._backend_mapping[backend_id] |
|
201 self._backend_wrapper = backend_wrapper_class() |
|
202 self._backend_id = backend_id |
|
203 |
|
204 def _get_backend(self): |
|
205 """ |
|
206 Return the currently set ElementTree back-end wrapper object. |
|
207 """ |
|
208 if self._backend_wrapper is None: |
|
209 # Back-end not set, so set the default back-end. |
|
210 # The default is the C version of ElementTree, but if that |
|
211 # is not available, the pure Python version is used |
|
212 for backend_id in self.DEFAULT_BACKEND_IMPORT_ORDER: |
|
213 try: |
|
214 self.set_backend_id(backend_id) |
|
215 except ImportError: |
|
216 pass |
|
217 |
|
218 if self._backend_wrapper is None: |
|
219 raise RuntimeError("Failed to set any ElementTree backend! Tried these: %r" % self.DEFAULT_BACKEND_IMPORT_ORDER) |
|
220 |
|
221 return self._backend_wrapper |
|
222 |
|
223 def get_lineno(self, element): |
|
224 """ |
|
225 Return the source line number of the given XML element. |
|
226 |
|
227 Note that for the cElementTree parser this will always return |
|
228 None, since that parser does not support line numbers. |
|
229 """ |
|
230 return self._get_backend().get_lineno(element) |
|
231 |
|
232 def __getattribute__(self, attrname): |
|
233 try: |
|
234 # Try to get the attribute from this object (the top-level wrapper) |
|
235 return object.__getattribute__(self, attrname) |
|
236 except AttributeError: |
|
237 # If not overridden here, try to get it from the back-end wrapper |
|
238 backend = self._get_backend() |
|
239 try: |
|
240 return getattr(backend, attrname) |
|
241 except AttributeError: |
|
242 # Last resort: try to get it from the module |
|
243 # the back-end wrapper wraps |
|
244 backend_module = backend.get_module() |
|
245 return getattr(backend_module, attrname) |