|
1 # |
|
2 # ElementTree |
|
3 # $Id: ElementPath.py 1858 2004-06-17 21:31:41Z Fredrik $ |
|
4 # |
|
5 # limited xpath support for element trees |
|
6 # |
|
7 # history: |
|
8 # 2003-05-23 fl created |
|
9 # 2003-05-28 fl added support for // etc |
|
10 # 2003-08-27 fl fixed parsing of periods in element names |
|
11 # |
|
12 # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. |
|
13 # |
|
14 # fredrik@pythonware.com |
|
15 # http://www.pythonware.com |
|
16 # |
|
17 # -------------------------------------------------------------------- |
|
18 # The ElementTree toolkit is |
|
19 # |
|
20 # Copyright (c) 1999-2004 by Fredrik Lundh |
|
21 # |
|
22 # By obtaining, using, and/or copying this software and/or its |
|
23 # associated documentation, you agree that you have read, understood, |
|
24 # and will comply with the following terms and conditions: |
|
25 # |
|
26 # Permission to use, copy, modify, and distribute this software and |
|
27 # its associated documentation for any purpose and without fee is |
|
28 # hereby granted, provided that the above copyright notice appears in |
|
29 # all copies, and that both that copyright notice and this permission |
|
30 # notice appear in supporting documentation, and that the name of |
|
31 # Secret Labs AB or the author not be used in advertising or publicity |
|
32 # pertaining to distribution of the software without specific, written |
|
33 # prior permission. |
|
34 # |
|
35 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD |
|
36 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- |
|
37 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR |
|
38 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY |
|
39 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
|
40 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS |
|
41 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE |
|
42 # OF THIS SOFTWARE. |
|
43 # -------------------------------------------------------------------- |
|
44 |
|
45 # Licensed to PSF under a Contributor Agreement. |
|
46 # See http://www.python.org/2.4/license for licensing details. |
|
47 |
|
48 ## |
|
49 # Implementation module for XPath support. There's usually no reason |
|
50 # to import this module directly; the <b>ElementTree</b> does this for |
|
51 # you, if needed. |
|
52 ## |
|
53 |
|
54 import re |
|
55 |
|
56 xpath_tokenizer = re.compile( |
|
57 "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+" |
|
58 ).findall |
|
59 |
|
60 class xpath_descendant_or_self: |
|
61 pass |
|
62 |
|
63 ## |
|
64 # Wrapper for a compiled XPath. |
|
65 |
|
66 class Path: |
|
67 |
|
68 ## |
|
69 # Create an Path instance from an XPath expression. |
|
70 |
|
71 def __init__(self, path): |
|
72 tokens = xpath_tokenizer(path) |
|
73 # the current version supports 'path/path'-style expressions only |
|
74 self.path = [] |
|
75 self.tag = None |
|
76 if tokens and tokens[0][0] == "/": |
|
77 raise SyntaxError("cannot use absolute path on element") |
|
78 while tokens: |
|
79 op, tag = tokens.pop(0) |
|
80 if tag or op == "*": |
|
81 self.path.append(tag or op) |
|
82 elif op == ".": |
|
83 pass |
|
84 elif op == "/": |
|
85 self.path.append(xpath_descendant_or_self()) |
|
86 continue |
|
87 else: |
|
88 raise SyntaxError("unsupported path syntax (%s)" % op) |
|
89 if tokens: |
|
90 op, tag = tokens.pop(0) |
|
91 if op != "/": |
|
92 raise SyntaxError( |
|
93 "expected path separator (%s)" % (op or tag) |
|
94 ) |
|
95 if self.path and isinstance(self.path[-1], xpath_descendant_or_self): |
|
96 raise SyntaxError("path cannot end with //") |
|
97 if len(self.path) == 1 and isinstance(self.path[0], type("")): |
|
98 self.tag = self.path[0] |
|
99 |
|
100 ## |
|
101 # Find first matching object. |
|
102 |
|
103 def find(self, element): |
|
104 tag = self.tag |
|
105 if tag is None: |
|
106 nodeset = self.findall(element) |
|
107 if not nodeset: |
|
108 return None |
|
109 return nodeset[0] |
|
110 for elem in element: |
|
111 if elem.tag == tag: |
|
112 return elem |
|
113 return None |
|
114 |
|
115 ## |
|
116 # Find text for first matching object. |
|
117 |
|
118 def findtext(self, element, default=None): |
|
119 tag = self.tag |
|
120 if tag is None: |
|
121 nodeset = self.findall(element) |
|
122 if not nodeset: |
|
123 return default |
|
124 return nodeset[0].text or "" |
|
125 for elem in element: |
|
126 if elem.tag == tag: |
|
127 return elem.text or "" |
|
128 return default |
|
129 |
|
130 ## |
|
131 # Find all matching objects. |
|
132 |
|
133 def findall(self, element): |
|
134 nodeset = [element] |
|
135 index = 0 |
|
136 while 1: |
|
137 try: |
|
138 path = self.path[index] |
|
139 index = index + 1 |
|
140 except IndexError: |
|
141 return nodeset |
|
142 set = [] |
|
143 if isinstance(path, xpath_descendant_or_self): |
|
144 try: |
|
145 tag = self.path[index] |
|
146 if not isinstance(tag, type("")): |
|
147 tag = None |
|
148 else: |
|
149 index = index + 1 |
|
150 except IndexError: |
|
151 tag = None # invalid path |
|
152 for node in nodeset: |
|
153 new = list(node.getiterator(tag)) |
|
154 if new and new[0] is node: |
|
155 set.extend(new[1:]) |
|
156 else: |
|
157 set.extend(new) |
|
158 else: |
|
159 for node in nodeset: |
|
160 for node in node: |
|
161 if path == "*" or node.tag == path: |
|
162 set.append(node) |
|
163 if not set: |
|
164 return [] |
|
165 nodeset = set |
|
166 |
|
167 _cache = {} |
|
168 |
|
169 ## |
|
170 # (Internal) Compile path. |
|
171 |
|
172 def _compile(path): |
|
173 p = _cache.get(path) |
|
174 if p is not None: |
|
175 return p |
|
176 p = Path(path) |
|
177 if len(_cache) >= 100: |
|
178 _cache.clear() |
|
179 _cache[path] = p |
|
180 return p |
|
181 |
|
182 ## |
|
183 # Find first matching object. |
|
184 |
|
185 def find(element, path): |
|
186 return _compile(path).find(element) |
|
187 |
|
188 ## |
|
189 # Find text for first matching object. |
|
190 |
|
191 def findtext(element, path, default=None): |
|
192 return _compile(path).findtext(element, default) |
|
193 |
|
194 ## |
|
195 # Find all matching objects. |
|
196 |
|
197 def findall(element, path): |
|
198 return _compile(path).findall(element) |