|
1 /* |
|
2 * ElementTree |
|
3 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $ |
|
4 * |
|
5 * elementtree accelerator |
|
6 * |
|
7 * History: |
|
8 * 1999-06-20 fl created (as part of sgmlop) |
|
9 * 2001-05-29 fl effdom edition |
|
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear |
|
11 * 2001-07-10 fl added findall helper |
|
12 * 2003-02-27 fl elementtree edition (alpha) |
|
13 * 2004-06-03 fl updates for elementtree 1.2 |
|
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories |
|
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support |
|
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3 |
|
17 * 2005-01-08 fl added makeelement method; fixed path support |
|
18 * 2005-01-10 fl optimized memory usage |
|
19 * 2005-01-11 fl first public release (cElementTree 0.8) |
|
20 * 2005-01-12 fl split element object into base and extras |
|
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9) |
|
22 * 2005-01-17 fl added treebuilder close method |
|
23 * 2005-01-17 fl fixed crash in getchildren |
|
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3) |
|
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8) |
|
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0) |
|
27 * 2005-01-28 fl added remove method (1.0.1) |
|
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2) |
|
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers |
|
30 * 2005-03-26 fl added Comment and PI support to XMLParser |
|
31 * 2005-03-27 fl event optimizations; complain about bogus events |
|
32 * 2005-08-08 fl fixed read error handling in parse |
|
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3) |
|
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4) |
|
35 * 2005-12-16 fl added support for non-standard encodings |
|
36 * 2006-03-08 fl fixed a couple of potential null-refs and leaks |
|
37 * 2006-03-12 fl merge in 2.5 ssize_t changes |
|
38 * |
|
39 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved. |
|
40 * Copyright (c) 1999-2006 by Fredrik Lundh. |
|
41 * |
|
42 * info@pythonware.com |
|
43 * http://www.pythonware.com |
|
44 */ |
|
45 |
|
46 /* Licensed to PSF under a Contributor Agreement. */ |
|
47 /* See http://www.python.org/2.4/license for licensing details. */ |
|
48 |
|
49 #include "Python.h" |
|
50 |
|
51 #define VERSION "1.0.6" |
|
52 |
|
53 /* -------------------------------------------------------------------- */ |
|
54 /* configuration */ |
|
55 |
|
56 /* Leave defined to include the expat-based XMLParser type */ |
|
57 #define USE_EXPAT |
|
58 |
|
59 /* Define to to all expat calls via pyexpat's embedded expat library */ |
|
60 /* #define USE_PYEXPAT_CAPI */ |
|
61 |
|
62 /* An element can hold this many children without extra memory |
|
63 allocations. */ |
|
64 #define STATIC_CHILDREN 4 |
|
65 |
|
66 /* For best performance, chose a value so that 80-90% of all nodes |
|
67 have no more than the given number of children. Set this to zero |
|
68 to minimize the size of the element structure itself (this only |
|
69 helps if you have lots of leaf nodes with attributes). */ |
|
70 |
|
71 /* Also note that pymalloc always allocates blocks in multiples of |
|
72 eight bytes. For the current version of cElementTree, this means |
|
73 that the number of children should be an even number, at least on |
|
74 32-bit platforms. */ |
|
75 |
|
76 /* -------------------------------------------------------------------- */ |
|
77 |
|
78 #if 0 |
|
79 static int memory = 0; |
|
80 #define ALLOC(size, comment)\ |
|
81 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0) |
|
82 #define RELEASE(size, comment)\ |
|
83 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0) |
|
84 #else |
|
85 #define ALLOC(size, comment) |
|
86 #define RELEASE(size, comment) |
|
87 #endif |
|
88 |
|
89 /* compiler tweaks */ |
|
90 #if defined(_MSC_VER) |
|
91 #define LOCAL(type) static __inline type __fastcall |
|
92 #else |
|
93 #define LOCAL(type) static type |
|
94 #endif |
|
95 |
|
96 /* compatibility macros */ |
|
97 #if (PY_VERSION_HEX < 0x02050000) |
|
98 typedef int Py_ssize_t; |
|
99 #define lenfunc inquiry |
|
100 #endif |
|
101 |
|
102 #if (PY_VERSION_HEX < 0x02040000) |
|
103 #define PyDict_CheckExact PyDict_Check |
|
104 #if (PY_VERSION_HEX < 0x02020000) |
|
105 #define PyList_CheckExact PyList_Check |
|
106 #define PyString_CheckExact PyString_Check |
|
107 #if (PY_VERSION_HEX >= 0x01060000) |
|
108 #define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */ |
|
109 #endif |
|
110 #endif |
|
111 #endif |
|
112 |
|
113 #if !defined(Py_RETURN_NONE) |
|
114 #define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None |
|
115 #endif |
|
116 |
|
117 /* macros used to store 'join' flags in string object pointers. note |
|
118 that all use of text and tail as object pointers must be wrapped in |
|
119 JOIN_OBJ. see comments in the ElementObject definition for more |
|
120 info. */ |
|
121 #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1) |
|
122 #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag))) |
|
123 #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1)) |
|
124 |
|
125 /* glue functions (see the init function for details) */ |
|
126 static PyObject* elementtree_copyelement_obj; |
|
127 static PyObject* elementtree_deepcopy_obj; |
|
128 static PyObject* elementtree_getiterator_obj; |
|
129 static PyObject* elementpath_obj; |
|
130 |
|
131 /* helpers */ |
|
132 |
|
133 LOCAL(PyObject*) |
|
134 deepcopy(PyObject* object, PyObject* memo) |
|
135 { |
|
136 /* do a deep copy of the given object */ |
|
137 |
|
138 PyObject* args; |
|
139 PyObject* result; |
|
140 |
|
141 if (!elementtree_deepcopy_obj) { |
|
142 PyErr_SetString( |
|
143 PyExc_RuntimeError, |
|
144 "deepcopy helper not found" |
|
145 ); |
|
146 return NULL; |
|
147 } |
|
148 |
|
149 args = PyTuple_New(2); |
|
150 if (!args) |
|
151 return NULL; |
|
152 |
|
153 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object); |
|
154 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo); |
|
155 |
|
156 result = PyObject_CallObject(elementtree_deepcopy_obj, args); |
|
157 |
|
158 Py_DECREF(args); |
|
159 |
|
160 return result; |
|
161 } |
|
162 |
|
163 LOCAL(PyObject*) |
|
164 list_join(PyObject* list) |
|
165 { |
|
166 /* join list elements (destroying the list in the process) */ |
|
167 |
|
168 PyObject* joiner; |
|
169 PyObject* function; |
|
170 PyObject* args; |
|
171 PyObject* result; |
|
172 |
|
173 switch (PyList_GET_SIZE(list)) { |
|
174 case 0: |
|
175 Py_DECREF(list); |
|
176 return PyString_FromString(""); |
|
177 case 1: |
|
178 result = PyList_GET_ITEM(list, 0); |
|
179 Py_INCREF(result); |
|
180 Py_DECREF(list); |
|
181 return result; |
|
182 } |
|
183 |
|
184 /* two or more elements: slice out a suitable separator from the |
|
185 first member, and use that to join the entire list */ |
|
186 |
|
187 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0); |
|
188 if (!joiner) |
|
189 return NULL; |
|
190 |
|
191 function = PyObject_GetAttrString(joiner, "join"); |
|
192 if (!function) { |
|
193 Py_DECREF(joiner); |
|
194 return NULL; |
|
195 } |
|
196 |
|
197 args = PyTuple_New(1); |
|
198 if (!args) |
|
199 return NULL; |
|
200 |
|
201 PyTuple_SET_ITEM(args, 0, list); |
|
202 |
|
203 result = PyObject_CallObject(function, args); |
|
204 |
|
205 Py_DECREF(args); /* also removes list */ |
|
206 Py_DECREF(function); |
|
207 Py_DECREF(joiner); |
|
208 |
|
209 return result; |
|
210 } |
|
211 |
|
212 #if (PY_VERSION_HEX < 0x02020000) |
|
213 LOCAL(int) |
|
214 PyDict_Update(PyObject* dict, PyObject* other) |
|
215 { |
|
216 /* PyDict_Update emulation for 2.1 and earlier */ |
|
217 |
|
218 PyObject* res; |
|
219 |
|
220 res = PyObject_CallMethod(dict, "update", "O", other); |
|
221 if (!res) |
|
222 return -1; |
|
223 |
|
224 Py_DECREF(res); |
|
225 return 0; |
|
226 } |
|
227 #endif |
|
228 |
|
229 /* -------------------------------------------------------------------- */ |
|
230 /* the element type */ |
|
231 |
|
232 typedef struct { |
|
233 |
|
234 /* attributes (a dictionary object), or None if no attributes */ |
|
235 PyObject* attrib; |
|
236 |
|
237 /* child elements */ |
|
238 int length; /* actual number of items */ |
|
239 int allocated; /* allocated items */ |
|
240 |
|
241 /* this either points to _children or to a malloced buffer */ |
|
242 PyObject* *children; |
|
243 |
|
244 PyObject* _children[STATIC_CHILDREN]; |
|
245 |
|
246 } ElementObjectExtra; |
|
247 |
|
248 typedef struct { |
|
249 PyObject_HEAD |
|
250 |
|
251 /* element tag (a string). */ |
|
252 PyObject* tag; |
|
253 |
|
254 /* text before first child. note that this is a tagged pointer; |
|
255 use JOIN_OBJ to get the object pointer. the join flag is used |
|
256 to distinguish lists created by the tree builder from lists |
|
257 assigned to the attribute by application code; the former |
|
258 should be joined before being returned to the user, the latter |
|
259 should be left intact. */ |
|
260 PyObject* text; |
|
261 |
|
262 /* text after this element, in parent. note that this is a tagged |
|
263 pointer; use JOIN_OBJ to get the object pointer. */ |
|
264 PyObject* tail; |
|
265 |
|
266 ElementObjectExtra* extra; |
|
267 |
|
268 } ElementObject; |
|
269 |
|
270 staticforward PyTypeObject Element_Type; |
|
271 |
|
272 #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type) |
|
273 |
|
274 /* -------------------------------------------------------------------- */ |
|
275 /* element constructor and destructor */ |
|
276 |
|
277 LOCAL(int) |
|
278 element_new_extra(ElementObject* self, PyObject* attrib) |
|
279 { |
|
280 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra)); |
|
281 if (!self->extra) |
|
282 return -1; |
|
283 |
|
284 if (!attrib) |
|
285 attrib = Py_None; |
|
286 |
|
287 Py_INCREF(attrib); |
|
288 self->extra->attrib = attrib; |
|
289 |
|
290 self->extra->length = 0; |
|
291 self->extra->allocated = STATIC_CHILDREN; |
|
292 self->extra->children = self->extra->_children; |
|
293 |
|
294 return 0; |
|
295 } |
|
296 |
|
297 LOCAL(void) |
|
298 element_dealloc_extra(ElementObject* self) |
|
299 { |
|
300 int i; |
|
301 |
|
302 Py_DECREF(self->extra->attrib); |
|
303 |
|
304 for (i = 0; i < self->extra->length; i++) |
|
305 Py_DECREF(self->extra->children[i]); |
|
306 |
|
307 if (self->extra->children != self->extra->_children) |
|
308 PyObject_Free(self->extra->children); |
|
309 |
|
310 PyObject_Free(self->extra); |
|
311 } |
|
312 |
|
313 LOCAL(PyObject*) |
|
314 element_new(PyObject* tag, PyObject* attrib) |
|
315 { |
|
316 ElementObject* self; |
|
317 |
|
318 self = PyObject_New(ElementObject, &Element_Type); |
|
319 if (self == NULL) |
|
320 return NULL; |
|
321 |
|
322 /* use None for empty dictionaries */ |
|
323 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib)) |
|
324 attrib = Py_None; |
|
325 |
|
326 self->extra = NULL; |
|
327 |
|
328 if (attrib != Py_None) { |
|
329 |
|
330 if (element_new_extra(self, attrib) < 0) { |
|
331 PyObject_Del(self); |
|
332 return NULL; |
|
333 } |
|
334 |
|
335 self->extra->length = 0; |
|
336 self->extra->allocated = STATIC_CHILDREN; |
|
337 self->extra->children = self->extra->_children; |
|
338 |
|
339 } |
|
340 |
|
341 Py_INCREF(tag); |
|
342 self->tag = tag; |
|
343 |
|
344 Py_INCREF(Py_None); |
|
345 self->text = Py_None; |
|
346 |
|
347 Py_INCREF(Py_None); |
|
348 self->tail = Py_None; |
|
349 |
|
350 ALLOC(sizeof(ElementObject), "create element"); |
|
351 |
|
352 return (PyObject*) self; |
|
353 } |
|
354 |
|
355 LOCAL(int) |
|
356 element_resize(ElementObject* self, int extra) |
|
357 { |
|
358 int size; |
|
359 PyObject* *children; |
|
360 |
|
361 /* make sure self->children can hold the given number of extra |
|
362 elements. set an exception and return -1 if allocation failed */ |
|
363 |
|
364 if (!self->extra) |
|
365 element_new_extra(self, NULL); |
|
366 |
|
367 size = self->extra->length + extra; |
|
368 |
|
369 if (size > self->extra->allocated) { |
|
370 /* use Python 2.4's list growth strategy */ |
|
371 size = (size >> 3) + (size < 9 ? 3 : 6) + size; |
|
372 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children" |
|
373 * which needs at least 4 bytes. |
|
374 * Although it's a false alarm always assume at least one child to |
|
375 * be safe. |
|
376 */ |
|
377 size = size ? size : 1; |
|
378 if (self->extra->children != self->extra->_children) { |
|
379 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer |
|
380 * "children", which needs at least 4 bytes. Although it's a |
|
381 * false alarm always assume at least one child to be safe. |
|
382 */ |
|
383 children = PyObject_Realloc(self->extra->children, |
|
384 size * sizeof(PyObject*)); |
|
385 if (!children) |
|
386 goto nomemory; |
|
387 } else { |
|
388 children = PyObject_Malloc(size * sizeof(PyObject*)); |
|
389 if (!children) |
|
390 goto nomemory; |
|
391 /* copy existing children from static area to malloc buffer */ |
|
392 memcpy(children, self->extra->children, |
|
393 self->extra->length * sizeof(PyObject*)); |
|
394 } |
|
395 self->extra->children = children; |
|
396 self->extra->allocated = size; |
|
397 } |
|
398 |
|
399 return 0; |
|
400 |
|
401 nomemory: |
|
402 PyErr_NoMemory(); |
|
403 return -1; |
|
404 } |
|
405 |
|
406 LOCAL(int) |
|
407 element_add_subelement(ElementObject* self, PyObject* element) |
|
408 { |
|
409 /* add a child element to a parent */ |
|
410 |
|
411 if (element_resize(self, 1) < 0) |
|
412 return -1; |
|
413 |
|
414 Py_INCREF(element); |
|
415 self->extra->children[self->extra->length] = element; |
|
416 |
|
417 self->extra->length++; |
|
418 |
|
419 return 0; |
|
420 } |
|
421 |
|
422 LOCAL(PyObject*) |
|
423 element_get_attrib(ElementObject* self) |
|
424 { |
|
425 /* return borrowed reference to attrib dictionary */ |
|
426 /* note: this function assumes that the extra section exists */ |
|
427 |
|
428 PyObject* res = self->extra->attrib; |
|
429 |
|
430 if (res == Py_None) { |
|
431 /* create missing dictionary */ |
|
432 res = PyDict_New(); |
|
433 if (!res) |
|
434 return NULL; |
|
435 self->extra->attrib = res; |
|
436 } |
|
437 |
|
438 return res; |
|
439 } |
|
440 |
|
441 LOCAL(PyObject*) |
|
442 element_get_text(ElementObject* self) |
|
443 { |
|
444 /* return borrowed reference to text attribute */ |
|
445 |
|
446 PyObject* res = self->text; |
|
447 |
|
448 if (JOIN_GET(res)) { |
|
449 res = JOIN_OBJ(res); |
|
450 if (PyList_CheckExact(res)) { |
|
451 res = list_join(res); |
|
452 if (!res) |
|
453 return NULL; |
|
454 self->text = res; |
|
455 } |
|
456 } |
|
457 |
|
458 return res; |
|
459 } |
|
460 |
|
461 LOCAL(PyObject*) |
|
462 element_get_tail(ElementObject* self) |
|
463 { |
|
464 /* return borrowed reference to text attribute */ |
|
465 |
|
466 PyObject* res = self->tail; |
|
467 |
|
468 if (JOIN_GET(res)) { |
|
469 res = JOIN_OBJ(res); |
|
470 if (PyList_CheckExact(res)) { |
|
471 res = list_join(res); |
|
472 if (!res) |
|
473 return NULL; |
|
474 self->tail = res; |
|
475 } |
|
476 } |
|
477 |
|
478 return res; |
|
479 } |
|
480 |
|
481 static PyObject* |
|
482 element(PyObject* self, PyObject* args, PyObject* kw) |
|
483 { |
|
484 PyObject* elem; |
|
485 |
|
486 PyObject* tag; |
|
487 PyObject* attrib = NULL; |
|
488 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, |
|
489 &PyDict_Type, &attrib)) |
|
490 return NULL; |
|
491 |
|
492 if (attrib || kw) { |
|
493 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New(); |
|
494 if (!attrib) |
|
495 return NULL; |
|
496 if (kw) |
|
497 PyDict_Update(attrib, kw); |
|
498 } else { |
|
499 Py_INCREF(Py_None); |
|
500 attrib = Py_None; |
|
501 } |
|
502 |
|
503 elem = element_new(tag, attrib); |
|
504 |
|
505 Py_DECREF(attrib); |
|
506 |
|
507 return elem; |
|
508 } |
|
509 |
|
510 static PyObject* |
|
511 subelement(PyObject* self, PyObject* args, PyObject* kw) |
|
512 { |
|
513 PyObject* elem; |
|
514 |
|
515 ElementObject* parent; |
|
516 PyObject* tag; |
|
517 PyObject* attrib = NULL; |
|
518 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement", |
|
519 &Element_Type, &parent, &tag, |
|
520 &PyDict_Type, &attrib)) |
|
521 return NULL; |
|
522 |
|
523 if (attrib || kw) { |
|
524 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New(); |
|
525 if (!attrib) |
|
526 return NULL; |
|
527 if (kw) |
|
528 PyDict_Update(attrib, kw); |
|
529 } else { |
|
530 Py_INCREF(Py_None); |
|
531 attrib = Py_None; |
|
532 } |
|
533 |
|
534 elem = element_new(tag, attrib); |
|
535 |
|
536 Py_DECREF(attrib); |
|
537 |
|
538 if (element_add_subelement(parent, elem) < 0) { |
|
539 Py_DECREF(elem); |
|
540 return NULL; |
|
541 } |
|
542 |
|
543 return elem; |
|
544 } |
|
545 |
|
546 static void |
|
547 element_dealloc(ElementObject* self) |
|
548 { |
|
549 if (self->extra) |
|
550 element_dealloc_extra(self); |
|
551 |
|
552 /* discard attributes */ |
|
553 Py_DECREF(self->tag); |
|
554 Py_DECREF(JOIN_OBJ(self->text)); |
|
555 Py_DECREF(JOIN_OBJ(self->tail)); |
|
556 |
|
557 RELEASE(sizeof(ElementObject), "destroy element"); |
|
558 |
|
559 PyObject_Del(self); |
|
560 } |
|
561 |
|
562 /* -------------------------------------------------------------------- */ |
|
563 /* methods (in alphabetical order) */ |
|
564 |
|
565 static PyObject* |
|
566 element_append(ElementObject* self, PyObject* args) |
|
567 { |
|
568 PyObject* element; |
|
569 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element)) |
|
570 return NULL; |
|
571 |
|
572 if (element_add_subelement(self, element) < 0) |
|
573 return NULL; |
|
574 |
|
575 Py_RETURN_NONE; |
|
576 } |
|
577 |
|
578 static PyObject* |
|
579 element_clear(ElementObject* self, PyObject* args) |
|
580 { |
|
581 if (!PyArg_ParseTuple(args, ":clear")) |
|
582 return NULL; |
|
583 |
|
584 if (self->extra) { |
|
585 element_dealloc_extra(self); |
|
586 self->extra = NULL; |
|
587 } |
|
588 |
|
589 Py_INCREF(Py_None); |
|
590 Py_DECREF(JOIN_OBJ(self->text)); |
|
591 self->text = Py_None; |
|
592 |
|
593 Py_INCREF(Py_None); |
|
594 Py_DECREF(JOIN_OBJ(self->tail)); |
|
595 self->tail = Py_None; |
|
596 |
|
597 Py_RETURN_NONE; |
|
598 } |
|
599 |
|
600 static PyObject* |
|
601 element_copy(ElementObject* self, PyObject* args) |
|
602 { |
|
603 int i; |
|
604 ElementObject* element; |
|
605 |
|
606 if (!PyArg_ParseTuple(args, ":__copy__")) |
|
607 return NULL; |
|
608 |
|
609 element = (ElementObject*) element_new( |
|
610 self->tag, (self->extra) ? self->extra->attrib : Py_None |
|
611 ); |
|
612 if (!element) |
|
613 return NULL; |
|
614 |
|
615 Py_DECREF(JOIN_OBJ(element->text)); |
|
616 element->text = self->text; |
|
617 Py_INCREF(JOIN_OBJ(element->text)); |
|
618 |
|
619 Py_DECREF(JOIN_OBJ(element->tail)); |
|
620 element->tail = self->tail; |
|
621 Py_INCREF(JOIN_OBJ(element->tail)); |
|
622 |
|
623 if (self->extra) { |
|
624 |
|
625 if (element_resize(element, self->extra->length) < 0) { |
|
626 Py_DECREF(element); |
|
627 return NULL; |
|
628 } |
|
629 |
|
630 for (i = 0; i < self->extra->length; i++) { |
|
631 Py_INCREF(self->extra->children[i]); |
|
632 element->extra->children[i] = self->extra->children[i]; |
|
633 } |
|
634 |
|
635 element->extra->length = self->extra->length; |
|
636 |
|
637 } |
|
638 |
|
639 return (PyObject*) element; |
|
640 } |
|
641 |
|
642 static PyObject* |
|
643 element_deepcopy(ElementObject* self, PyObject* args) |
|
644 { |
|
645 int i; |
|
646 ElementObject* element; |
|
647 PyObject* tag; |
|
648 PyObject* attrib; |
|
649 PyObject* text; |
|
650 PyObject* tail; |
|
651 PyObject* id; |
|
652 |
|
653 PyObject* memo; |
|
654 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo)) |
|
655 return NULL; |
|
656 |
|
657 tag = deepcopy(self->tag, memo); |
|
658 if (!tag) |
|
659 return NULL; |
|
660 |
|
661 if (self->extra) { |
|
662 attrib = deepcopy(self->extra->attrib, memo); |
|
663 if (!attrib) { |
|
664 Py_DECREF(tag); |
|
665 return NULL; |
|
666 } |
|
667 } else { |
|
668 Py_INCREF(Py_None); |
|
669 attrib = Py_None; |
|
670 } |
|
671 |
|
672 element = (ElementObject*) element_new(tag, attrib); |
|
673 |
|
674 Py_DECREF(tag); |
|
675 Py_DECREF(attrib); |
|
676 |
|
677 if (!element) |
|
678 return NULL; |
|
679 |
|
680 text = deepcopy(JOIN_OBJ(self->text), memo); |
|
681 if (!text) |
|
682 goto error; |
|
683 Py_DECREF(element->text); |
|
684 element->text = JOIN_SET(text, JOIN_GET(self->text)); |
|
685 |
|
686 tail = deepcopy(JOIN_OBJ(self->tail), memo); |
|
687 if (!tail) |
|
688 goto error; |
|
689 Py_DECREF(element->tail); |
|
690 element->tail = JOIN_SET(tail, JOIN_GET(self->tail)); |
|
691 |
|
692 if (self->extra) { |
|
693 |
|
694 if (element_resize(element, self->extra->length) < 0) |
|
695 goto error; |
|
696 |
|
697 for (i = 0; i < self->extra->length; i++) { |
|
698 PyObject* child = deepcopy(self->extra->children[i], memo); |
|
699 if (!child) { |
|
700 element->extra->length = i; |
|
701 goto error; |
|
702 } |
|
703 element->extra->children[i] = child; |
|
704 } |
|
705 |
|
706 element->extra->length = self->extra->length; |
|
707 |
|
708 } |
|
709 |
|
710 /* add object to memo dictionary (so deepcopy won't visit it again) */ |
|
711 id = PyInt_FromLong((Py_uintptr_t) self); |
|
712 |
|
713 i = PyDict_SetItem(memo, id, (PyObject*) element); |
|
714 |
|
715 Py_DECREF(id); |
|
716 |
|
717 if (i < 0) |
|
718 goto error; |
|
719 |
|
720 return (PyObject*) element; |
|
721 |
|
722 error: |
|
723 Py_DECREF(element); |
|
724 return NULL; |
|
725 } |
|
726 |
|
727 LOCAL(int) |
|
728 checkpath(PyObject* tag) |
|
729 { |
|
730 Py_ssize_t i; |
|
731 int check = 1; |
|
732 |
|
733 /* check if a tag contains an xpath character */ |
|
734 |
|
735 #define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@') |
|
736 |
|
737 #if defined(Py_USING_UNICODE) |
|
738 if (PyUnicode_Check(tag)) { |
|
739 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag); |
|
740 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) { |
|
741 if (p[i] == '{') |
|
742 check = 0; |
|
743 else if (p[i] == '}') |
|
744 check = 1; |
|
745 else if (check && PATHCHAR(p[i])) |
|
746 return 1; |
|
747 } |
|
748 return 0; |
|
749 } |
|
750 #endif |
|
751 if (PyString_Check(tag)) { |
|
752 char *p = PyString_AS_STRING(tag); |
|
753 for (i = 0; i < PyString_GET_SIZE(tag); i++) { |
|
754 if (p[i] == '{') |
|
755 check = 0; |
|
756 else if (p[i] == '}') |
|
757 check = 1; |
|
758 else if (check && PATHCHAR(p[i])) |
|
759 return 1; |
|
760 } |
|
761 return 0; |
|
762 } |
|
763 |
|
764 return 1; /* unknown type; might be path expression */ |
|
765 } |
|
766 |
|
767 static PyObject* |
|
768 element_find(ElementObject* self, PyObject* args) |
|
769 { |
|
770 int i; |
|
771 |
|
772 PyObject* tag; |
|
773 if (!PyArg_ParseTuple(args, "O:find", &tag)) |
|
774 return NULL; |
|
775 |
|
776 if (checkpath(tag)) |
|
777 return PyObject_CallMethod( |
|
778 elementpath_obj, "find", "OO", self, tag |
|
779 ); |
|
780 |
|
781 if (!self->extra) |
|
782 Py_RETURN_NONE; |
|
783 |
|
784 for (i = 0; i < self->extra->length; i++) { |
|
785 PyObject* item = self->extra->children[i]; |
|
786 if (Element_CheckExact(item) && |
|
787 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) { |
|
788 Py_INCREF(item); |
|
789 return item; |
|
790 } |
|
791 } |
|
792 |
|
793 Py_RETURN_NONE; |
|
794 } |
|
795 |
|
796 static PyObject* |
|
797 element_findtext(ElementObject* self, PyObject* args) |
|
798 { |
|
799 int i; |
|
800 |
|
801 PyObject* tag; |
|
802 PyObject* default_value = Py_None; |
|
803 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value)) |
|
804 return NULL; |
|
805 |
|
806 if (checkpath(tag)) |
|
807 return PyObject_CallMethod( |
|
808 elementpath_obj, "findtext", "OOO", self, tag, default_value |
|
809 ); |
|
810 |
|
811 if (!self->extra) { |
|
812 Py_INCREF(default_value); |
|
813 return default_value; |
|
814 } |
|
815 |
|
816 for (i = 0; i < self->extra->length; i++) { |
|
817 ElementObject* item = (ElementObject*) self->extra->children[i]; |
|
818 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) { |
|
819 PyObject* text = element_get_text(item); |
|
820 if (text == Py_None) |
|
821 return PyString_FromString(""); |
|
822 Py_XINCREF(text); |
|
823 return text; |
|
824 } |
|
825 } |
|
826 |
|
827 Py_INCREF(default_value); |
|
828 return default_value; |
|
829 } |
|
830 |
|
831 static PyObject* |
|
832 element_findall(ElementObject* self, PyObject* args) |
|
833 { |
|
834 int i; |
|
835 PyObject* out; |
|
836 |
|
837 PyObject* tag; |
|
838 if (!PyArg_ParseTuple(args, "O:findall", &tag)) |
|
839 return NULL; |
|
840 |
|
841 if (checkpath(tag)) |
|
842 return PyObject_CallMethod( |
|
843 elementpath_obj, "findall", "OO", self, tag |
|
844 ); |
|
845 |
|
846 out = PyList_New(0); |
|
847 if (!out) |
|
848 return NULL; |
|
849 |
|
850 if (!self->extra) |
|
851 return out; |
|
852 |
|
853 for (i = 0; i < self->extra->length; i++) { |
|
854 PyObject* item = self->extra->children[i]; |
|
855 if (Element_CheckExact(item) && |
|
856 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) { |
|
857 if (PyList_Append(out, item) < 0) { |
|
858 Py_DECREF(out); |
|
859 return NULL; |
|
860 } |
|
861 } |
|
862 } |
|
863 |
|
864 return out; |
|
865 } |
|
866 |
|
867 static PyObject* |
|
868 element_get(ElementObject* self, PyObject* args) |
|
869 { |
|
870 PyObject* value; |
|
871 |
|
872 PyObject* key; |
|
873 PyObject* default_value = Py_None; |
|
874 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value)) |
|
875 return NULL; |
|
876 |
|
877 if (!self->extra || self->extra->attrib == Py_None) |
|
878 value = default_value; |
|
879 else { |
|
880 value = PyDict_GetItem(self->extra->attrib, key); |
|
881 if (!value) |
|
882 value = default_value; |
|
883 } |
|
884 |
|
885 Py_INCREF(value); |
|
886 return value; |
|
887 } |
|
888 |
|
889 static PyObject* |
|
890 element_getchildren(ElementObject* self, PyObject* args) |
|
891 { |
|
892 int i; |
|
893 PyObject* list; |
|
894 |
|
895 if (!PyArg_ParseTuple(args, ":getchildren")) |
|
896 return NULL; |
|
897 |
|
898 if (!self->extra) |
|
899 return PyList_New(0); |
|
900 |
|
901 list = PyList_New(self->extra->length); |
|
902 if (!list) |
|
903 return NULL; |
|
904 |
|
905 for (i = 0; i < self->extra->length; i++) { |
|
906 PyObject* item = self->extra->children[i]; |
|
907 Py_INCREF(item); |
|
908 PyList_SET_ITEM(list, i, item); |
|
909 } |
|
910 |
|
911 return list; |
|
912 } |
|
913 |
|
914 static PyObject* |
|
915 element_getiterator(ElementObject* self, PyObject* args) |
|
916 { |
|
917 PyObject* result; |
|
918 |
|
919 PyObject* tag = Py_None; |
|
920 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag)) |
|
921 return NULL; |
|
922 |
|
923 if (!elementtree_getiterator_obj) { |
|
924 PyErr_SetString( |
|
925 PyExc_RuntimeError, |
|
926 "getiterator helper not found" |
|
927 ); |
|
928 return NULL; |
|
929 } |
|
930 |
|
931 args = PyTuple_New(2); |
|
932 if (!args) |
|
933 return NULL; |
|
934 |
|
935 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self); |
|
936 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag); |
|
937 |
|
938 result = PyObject_CallObject(elementtree_getiterator_obj, args); |
|
939 |
|
940 Py_DECREF(args); |
|
941 |
|
942 return result; |
|
943 } |
|
944 |
|
945 static PyObject* |
|
946 element_getitem(PyObject* self_, Py_ssize_t index) |
|
947 { |
|
948 ElementObject* self = (ElementObject*) self_; |
|
949 |
|
950 if (!self->extra || index < 0 || index >= self->extra->length) { |
|
951 PyErr_SetString( |
|
952 PyExc_IndexError, |
|
953 "child index out of range" |
|
954 ); |
|
955 return NULL; |
|
956 } |
|
957 |
|
958 Py_INCREF(self->extra->children[index]); |
|
959 return self->extra->children[index]; |
|
960 } |
|
961 |
|
962 static PyObject* |
|
963 element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end) |
|
964 { |
|
965 ElementObject* self = (ElementObject*) self_; |
|
966 Py_ssize_t i; |
|
967 PyObject* list; |
|
968 |
|
969 if (!self->extra) |
|
970 return PyList_New(0); |
|
971 |
|
972 /* standard clamping */ |
|
973 if (start < 0) |
|
974 start = 0; |
|
975 if (end < 0) |
|
976 end = 0; |
|
977 if (end > self->extra->length) |
|
978 end = self->extra->length; |
|
979 if (start > end) |
|
980 start = end; |
|
981 |
|
982 list = PyList_New(end - start); |
|
983 if (!list) |
|
984 return NULL; |
|
985 |
|
986 for (i = start; i < end; i++) { |
|
987 PyObject* item = self->extra->children[i]; |
|
988 Py_INCREF(item); |
|
989 PyList_SET_ITEM(list, i - start, item); |
|
990 } |
|
991 |
|
992 return list; |
|
993 } |
|
994 |
|
995 static PyObject* |
|
996 element_insert(ElementObject* self, PyObject* args) |
|
997 { |
|
998 int i; |
|
999 |
|
1000 int index; |
|
1001 PyObject* element; |
|
1002 if (!PyArg_ParseTuple(args, "iO!:insert", &index, |
|
1003 &Element_Type, &element)) |
|
1004 return NULL; |
|
1005 |
|
1006 if (!self->extra) |
|
1007 element_new_extra(self, NULL); |
|
1008 |
|
1009 if (index < 0) |
|
1010 index = 0; |
|
1011 if (index > self->extra->length) |
|
1012 index = self->extra->length; |
|
1013 |
|
1014 if (element_resize(self, 1) < 0) |
|
1015 return NULL; |
|
1016 |
|
1017 for (i = self->extra->length; i > index; i--) |
|
1018 self->extra->children[i] = self->extra->children[i-1]; |
|
1019 |
|
1020 Py_INCREF(element); |
|
1021 self->extra->children[index] = element; |
|
1022 |
|
1023 self->extra->length++; |
|
1024 |
|
1025 Py_RETURN_NONE; |
|
1026 } |
|
1027 |
|
1028 static PyObject* |
|
1029 element_items(ElementObject* self, PyObject* args) |
|
1030 { |
|
1031 if (!PyArg_ParseTuple(args, ":items")) |
|
1032 return NULL; |
|
1033 |
|
1034 if (!self->extra || self->extra->attrib == Py_None) |
|
1035 return PyList_New(0); |
|
1036 |
|
1037 return PyDict_Items(self->extra->attrib); |
|
1038 } |
|
1039 |
|
1040 static PyObject* |
|
1041 element_keys(ElementObject* self, PyObject* args) |
|
1042 { |
|
1043 if (!PyArg_ParseTuple(args, ":keys")) |
|
1044 return NULL; |
|
1045 |
|
1046 if (!self->extra || self->extra->attrib == Py_None) |
|
1047 return PyList_New(0); |
|
1048 |
|
1049 return PyDict_Keys(self->extra->attrib); |
|
1050 } |
|
1051 |
|
1052 static Py_ssize_t |
|
1053 element_length(ElementObject* self) |
|
1054 { |
|
1055 if (!self->extra) |
|
1056 return 0; |
|
1057 |
|
1058 return self->extra->length; |
|
1059 } |
|
1060 |
|
1061 static PyObject* |
|
1062 element_makeelement(PyObject* self, PyObject* args, PyObject* kw) |
|
1063 { |
|
1064 PyObject* elem; |
|
1065 |
|
1066 PyObject* tag; |
|
1067 PyObject* attrib; |
|
1068 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib)) |
|
1069 return NULL; |
|
1070 |
|
1071 attrib = PyDict_Copy(attrib); |
|
1072 if (!attrib) |
|
1073 return NULL; |
|
1074 |
|
1075 elem = element_new(tag, attrib); |
|
1076 |
|
1077 Py_DECREF(attrib); |
|
1078 |
|
1079 return elem; |
|
1080 } |
|
1081 |
|
1082 static PyObject* |
|
1083 element_reduce(ElementObject* self, PyObject* args) |
|
1084 { |
|
1085 if (!PyArg_ParseTuple(args, ":__reduce__")) |
|
1086 return NULL; |
|
1087 |
|
1088 /* Hack alert: This method is used to work around a __copy__ |
|
1089 problem on certain 2.3 and 2.4 versions. To save time and |
|
1090 simplify the code, we create the copy in here, and use a dummy |
|
1091 copyelement helper to trick the copy module into doing the |
|
1092 right thing. */ |
|
1093 |
|
1094 if (!elementtree_copyelement_obj) { |
|
1095 PyErr_SetString( |
|
1096 PyExc_RuntimeError, |
|
1097 "copyelement helper not found" |
|
1098 ); |
|
1099 return NULL; |
|
1100 } |
|
1101 |
|
1102 return Py_BuildValue( |
|
1103 "O(N)", elementtree_copyelement_obj, element_copy(self, args) |
|
1104 ); |
|
1105 } |
|
1106 |
|
1107 static PyObject* |
|
1108 element_remove(ElementObject* self, PyObject* args) |
|
1109 { |
|
1110 int i; |
|
1111 |
|
1112 PyObject* element; |
|
1113 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element)) |
|
1114 return NULL; |
|
1115 |
|
1116 if (!self->extra) { |
|
1117 /* element has no children, so raise exception */ |
|
1118 PyErr_SetString( |
|
1119 PyExc_ValueError, |
|
1120 "list.remove(x): x not in list" |
|
1121 ); |
|
1122 return NULL; |
|
1123 } |
|
1124 |
|
1125 for (i = 0; i < self->extra->length; i++) { |
|
1126 if (self->extra->children[i] == element) |
|
1127 break; |
|
1128 if (PyObject_Compare(self->extra->children[i], element) == 0) |
|
1129 break; |
|
1130 } |
|
1131 |
|
1132 if (i == self->extra->length) { |
|
1133 /* element is not in children, so raise exception */ |
|
1134 PyErr_SetString( |
|
1135 PyExc_ValueError, |
|
1136 "list.remove(x): x not in list" |
|
1137 ); |
|
1138 return NULL; |
|
1139 } |
|
1140 |
|
1141 Py_DECREF(self->extra->children[i]); |
|
1142 |
|
1143 self->extra->length--; |
|
1144 |
|
1145 for (; i < self->extra->length; i++) |
|
1146 self->extra->children[i] = self->extra->children[i+1]; |
|
1147 |
|
1148 Py_RETURN_NONE; |
|
1149 } |
|
1150 |
|
1151 static PyObject* |
|
1152 element_repr(ElementObject* self) |
|
1153 { |
|
1154 PyObject* repr; |
|
1155 char buffer[100]; |
|
1156 |
|
1157 repr = PyString_FromString("<Element "); |
|
1158 |
|
1159 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag)); |
|
1160 |
|
1161 sprintf(buffer, " at %p>", self); |
|
1162 PyString_ConcatAndDel(&repr, PyString_FromString(buffer)); |
|
1163 |
|
1164 return repr; |
|
1165 } |
|
1166 |
|
1167 static PyObject* |
|
1168 element_set(ElementObject* self, PyObject* args) |
|
1169 { |
|
1170 PyObject* attrib; |
|
1171 |
|
1172 PyObject* key; |
|
1173 PyObject* value; |
|
1174 if (!PyArg_ParseTuple(args, "OO:set", &key, &value)) |
|
1175 return NULL; |
|
1176 |
|
1177 if (!self->extra) |
|
1178 element_new_extra(self, NULL); |
|
1179 |
|
1180 attrib = element_get_attrib(self); |
|
1181 if (!attrib) |
|
1182 return NULL; |
|
1183 |
|
1184 if (PyDict_SetItem(attrib, key, value) < 0) |
|
1185 return NULL; |
|
1186 |
|
1187 Py_RETURN_NONE; |
|
1188 } |
|
1189 |
|
1190 static int |
|
1191 element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item) |
|
1192 { |
|
1193 ElementObject* self = (ElementObject*) self_; |
|
1194 Py_ssize_t i, new, old; |
|
1195 PyObject* recycle = NULL; |
|
1196 |
|
1197 if (!self->extra) |
|
1198 element_new_extra(self, NULL); |
|
1199 |
|
1200 /* standard clamping */ |
|
1201 if (start < 0) |
|
1202 start = 0; |
|
1203 if (end < 0) |
|
1204 end = 0; |
|
1205 if (end > self->extra->length) |
|
1206 end = self->extra->length; |
|
1207 if (start > end) |
|
1208 start = end; |
|
1209 |
|
1210 old = end - start; |
|
1211 |
|
1212 if (item == NULL) |
|
1213 new = 0; |
|
1214 else if (PyList_CheckExact(item)) { |
|
1215 new = PyList_GET_SIZE(item); |
|
1216 } else { |
|
1217 /* FIXME: support arbitrary sequences? */ |
|
1218 PyErr_Format( |
|
1219 PyExc_TypeError, |
|
1220 "expected list, not \"%.200s\"", Py_TYPE(item)->tp_name |
|
1221 ); |
|
1222 return -1; |
|
1223 } |
|
1224 |
|
1225 if (old > 0) { |
|
1226 /* to avoid recursive calls to this method (via decref), move |
|
1227 old items to the recycle bin here, and get rid of them when |
|
1228 we're done modifying the element */ |
|
1229 recycle = PyList_New(old); |
|
1230 for (i = 0; i < old; i++) |
|
1231 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]); |
|
1232 } |
|
1233 |
|
1234 if (new < old) { |
|
1235 /* delete slice */ |
|
1236 for (i = end; i < self->extra->length; i++) |
|
1237 self->extra->children[i + new - old] = self->extra->children[i]; |
|
1238 } else if (new > old) { |
|
1239 /* insert slice */ |
|
1240 if (element_resize(self, new - old) < 0) |
|
1241 return -1; |
|
1242 for (i = self->extra->length-1; i >= end; i--) |
|
1243 self->extra->children[i + new - old] = self->extra->children[i]; |
|
1244 } |
|
1245 |
|
1246 /* replace the slice */ |
|
1247 for (i = 0; i < new; i++) { |
|
1248 PyObject* element = PyList_GET_ITEM(item, i); |
|
1249 Py_INCREF(element); |
|
1250 self->extra->children[i + start] = element; |
|
1251 } |
|
1252 |
|
1253 self->extra->length += new - old; |
|
1254 |
|
1255 /* discard the recycle bin, and everything in it */ |
|
1256 Py_XDECREF(recycle); |
|
1257 |
|
1258 return 0; |
|
1259 } |
|
1260 |
|
1261 static int |
|
1262 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item) |
|
1263 { |
|
1264 ElementObject* self = (ElementObject*) self_; |
|
1265 int i; |
|
1266 PyObject* old; |
|
1267 |
|
1268 if (!self->extra || index < 0 || index >= self->extra->length) { |
|
1269 PyErr_SetString( |
|
1270 PyExc_IndexError, |
|
1271 "child assignment index out of range"); |
|
1272 return -1; |
|
1273 } |
|
1274 |
|
1275 old = self->extra->children[index]; |
|
1276 |
|
1277 if (item) { |
|
1278 Py_INCREF(item); |
|
1279 self->extra->children[index] = item; |
|
1280 } else { |
|
1281 self->extra->length--; |
|
1282 for (i = index; i < self->extra->length; i++) |
|
1283 self->extra->children[i] = self->extra->children[i+1]; |
|
1284 } |
|
1285 |
|
1286 Py_DECREF(old); |
|
1287 |
|
1288 return 0; |
|
1289 } |
|
1290 |
|
1291 static PyMethodDef element_methods[] = { |
|
1292 |
|
1293 {"clear", (PyCFunction) element_clear, METH_VARARGS}, |
|
1294 |
|
1295 {"get", (PyCFunction) element_get, METH_VARARGS}, |
|
1296 {"set", (PyCFunction) element_set, METH_VARARGS}, |
|
1297 |
|
1298 {"find", (PyCFunction) element_find, METH_VARARGS}, |
|
1299 {"findtext", (PyCFunction) element_findtext, METH_VARARGS}, |
|
1300 {"findall", (PyCFunction) element_findall, METH_VARARGS}, |
|
1301 |
|
1302 {"append", (PyCFunction) element_append, METH_VARARGS}, |
|
1303 {"insert", (PyCFunction) element_insert, METH_VARARGS}, |
|
1304 {"remove", (PyCFunction) element_remove, METH_VARARGS}, |
|
1305 |
|
1306 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS}, |
|
1307 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS}, |
|
1308 |
|
1309 {"items", (PyCFunction) element_items, METH_VARARGS}, |
|
1310 {"keys", (PyCFunction) element_keys, METH_VARARGS}, |
|
1311 |
|
1312 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS}, |
|
1313 |
|
1314 {"__copy__", (PyCFunction) element_copy, METH_VARARGS}, |
|
1315 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS}, |
|
1316 |
|
1317 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on |
|
1318 C objects correctly, so we have to fake it using a __reduce__- |
|
1319 based hack (see the element_reduce implementation above for |
|
1320 details). */ |
|
1321 |
|
1322 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're |
|
1323 using a runtime test to figure out if we need to fake things |
|
1324 or now (see the init code below). The following entry is |
|
1325 enabled only if the hack is needed. */ |
|
1326 |
|
1327 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS}, |
|
1328 |
|
1329 {NULL, NULL} |
|
1330 }; |
|
1331 |
|
1332 static PyObject* |
|
1333 element_getattr(ElementObject* self, char* name) |
|
1334 { |
|
1335 PyObject* res; |
|
1336 |
|
1337 res = Py_FindMethod(element_methods, (PyObject*) self, name); |
|
1338 if (res) |
|
1339 return res; |
|
1340 |
|
1341 PyErr_Clear(); |
|
1342 |
|
1343 if (strcmp(name, "tag") == 0) |
|
1344 res = self->tag; |
|
1345 else if (strcmp(name, "text") == 0) |
|
1346 res = element_get_text(self); |
|
1347 else if (strcmp(name, "tail") == 0) { |
|
1348 res = element_get_tail(self); |
|
1349 } else if (strcmp(name, "attrib") == 0) { |
|
1350 if (!self->extra) |
|
1351 element_new_extra(self, NULL); |
|
1352 res = element_get_attrib(self); |
|
1353 } else { |
|
1354 PyErr_SetString(PyExc_AttributeError, name); |
|
1355 return NULL; |
|
1356 } |
|
1357 |
|
1358 if (!res) |
|
1359 return NULL; |
|
1360 |
|
1361 Py_INCREF(res); |
|
1362 return res; |
|
1363 } |
|
1364 |
|
1365 static int |
|
1366 element_setattr(ElementObject* self, const char* name, PyObject* value) |
|
1367 { |
|
1368 if (value == NULL) { |
|
1369 PyErr_SetString( |
|
1370 PyExc_AttributeError, |
|
1371 "can't delete element attributes" |
|
1372 ); |
|
1373 return -1; |
|
1374 } |
|
1375 |
|
1376 if (strcmp(name, "tag") == 0) { |
|
1377 Py_DECREF(self->tag); |
|
1378 self->tag = value; |
|
1379 Py_INCREF(self->tag); |
|
1380 } else if (strcmp(name, "text") == 0) { |
|
1381 Py_DECREF(JOIN_OBJ(self->text)); |
|
1382 self->text = value; |
|
1383 Py_INCREF(self->text); |
|
1384 } else if (strcmp(name, "tail") == 0) { |
|
1385 Py_DECREF(JOIN_OBJ(self->tail)); |
|
1386 self->tail = value; |
|
1387 Py_INCREF(self->tail); |
|
1388 } else if (strcmp(name, "attrib") == 0) { |
|
1389 if (!self->extra) |
|
1390 element_new_extra(self, NULL); |
|
1391 Py_DECREF(self->extra->attrib); |
|
1392 self->extra->attrib = value; |
|
1393 Py_INCREF(self->extra->attrib); |
|
1394 } else { |
|
1395 PyErr_SetString(PyExc_AttributeError, name); |
|
1396 return -1; |
|
1397 } |
|
1398 |
|
1399 return 0; |
|
1400 } |
|
1401 |
|
1402 static PySequenceMethods element_as_sequence = { |
|
1403 (lenfunc) element_length, |
|
1404 0, /* sq_concat */ |
|
1405 0, /* sq_repeat */ |
|
1406 element_getitem, |
|
1407 element_getslice, |
|
1408 element_setitem, |
|
1409 element_setslice, |
|
1410 }; |
|
1411 |
|
1412 statichere PyTypeObject Element_Type = { |
|
1413 PyObject_HEAD_INIT(NULL) |
|
1414 0, "Element", sizeof(ElementObject), 0, |
|
1415 /* methods */ |
|
1416 (destructor)element_dealloc, /* tp_dealloc */ |
|
1417 0, /* tp_print */ |
|
1418 (getattrfunc)element_getattr, /* tp_getattr */ |
|
1419 (setattrfunc)element_setattr, /* tp_setattr */ |
|
1420 0, /* tp_compare */ |
|
1421 (reprfunc)element_repr, /* tp_repr */ |
|
1422 0, /* tp_as_number */ |
|
1423 &element_as_sequence, /* tp_as_sequence */ |
|
1424 }; |
|
1425 |
|
1426 /* ==================================================================== */ |
|
1427 /* the tree builder type */ |
|
1428 |
|
1429 typedef struct { |
|
1430 PyObject_HEAD |
|
1431 |
|
1432 PyObject* root; /* root node (first created node) */ |
|
1433 |
|
1434 ElementObject* this; /* current node */ |
|
1435 ElementObject* last; /* most recently created node */ |
|
1436 |
|
1437 PyObject* data; /* data collector (string or list), or NULL */ |
|
1438 |
|
1439 PyObject* stack; /* element stack */ |
|
1440 Py_ssize_t index; /* current stack size (0=empty) */ |
|
1441 |
|
1442 /* element tracing */ |
|
1443 PyObject* events; /* list of events, or NULL if not collecting */ |
|
1444 PyObject* start_event_obj; /* event objects (NULL to ignore) */ |
|
1445 PyObject* end_event_obj; |
|
1446 PyObject* start_ns_event_obj; |
|
1447 PyObject* end_ns_event_obj; |
|
1448 |
|
1449 } TreeBuilderObject; |
|
1450 |
|
1451 staticforward PyTypeObject TreeBuilder_Type; |
|
1452 |
|
1453 #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type) |
|
1454 |
|
1455 /* -------------------------------------------------------------------- */ |
|
1456 /* constructor and destructor */ |
|
1457 |
|
1458 LOCAL(PyObject*) |
|
1459 treebuilder_new(void) |
|
1460 { |
|
1461 TreeBuilderObject* self; |
|
1462 |
|
1463 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type); |
|
1464 if (self == NULL) |
|
1465 return NULL; |
|
1466 |
|
1467 self->root = NULL; |
|
1468 |
|
1469 Py_INCREF(Py_None); |
|
1470 self->this = (ElementObject*) Py_None; |
|
1471 |
|
1472 Py_INCREF(Py_None); |
|
1473 self->last = (ElementObject*) Py_None; |
|
1474 |
|
1475 self->data = NULL; |
|
1476 |
|
1477 self->stack = PyList_New(20); |
|
1478 self->index = 0; |
|
1479 |
|
1480 self->events = NULL; |
|
1481 self->start_event_obj = self->end_event_obj = NULL; |
|
1482 self->start_ns_event_obj = self->end_ns_event_obj = NULL; |
|
1483 |
|
1484 ALLOC(sizeof(TreeBuilderObject), "create treebuilder"); |
|
1485 |
|
1486 return (PyObject*) self; |
|
1487 } |
|
1488 |
|
1489 static PyObject* |
|
1490 treebuilder(PyObject* self_, PyObject* args) |
|
1491 { |
|
1492 if (!PyArg_ParseTuple(args, ":TreeBuilder")) |
|
1493 return NULL; |
|
1494 |
|
1495 return treebuilder_new(); |
|
1496 } |
|
1497 |
|
1498 static void |
|
1499 treebuilder_dealloc(TreeBuilderObject* self) |
|
1500 { |
|
1501 Py_XDECREF(self->end_ns_event_obj); |
|
1502 Py_XDECREF(self->start_ns_event_obj); |
|
1503 Py_XDECREF(self->end_event_obj); |
|
1504 Py_XDECREF(self->start_event_obj); |
|
1505 Py_XDECREF(self->events); |
|
1506 Py_DECREF(self->stack); |
|
1507 Py_XDECREF(self->data); |
|
1508 Py_DECREF(self->last); |
|
1509 Py_DECREF(self->this); |
|
1510 Py_XDECREF(self->root); |
|
1511 |
|
1512 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder"); |
|
1513 |
|
1514 PyObject_Del(self); |
|
1515 } |
|
1516 |
|
1517 /* -------------------------------------------------------------------- */ |
|
1518 /* handlers */ |
|
1519 |
|
1520 LOCAL(PyObject*) |
|
1521 treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding, |
|
1522 PyObject* standalone) |
|
1523 { |
|
1524 Py_RETURN_NONE; |
|
1525 } |
|
1526 |
|
1527 LOCAL(PyObject*) |
|
1528 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, |
|
1529 PyObject* attrib) |
|
1530 { |
|
1531 PyObject* node; |
|
1532 PyObject* this; |
|
1533 |
|
1534 if (self->data) { |
|
1535 if (self->this == self->last) { |
|
1536 Py_DECREF(JOIN_OBJ(self->last->text)); |
|
1537 self->last->text = JOIN_SET( |
|
1538 self->data, PyList_CheckExact(self->data) |
|
1539 ); |
|
1540 } else { |
|
1541 Py_DECREF(JOIN_OBJ(self->last->tail)); |
|
1542 self->last->tail = JOIN_SET( |
|
1543 self->data, PyList_CheckExact(self->data) |
|
1544 ); |
|
1545 } |
|
1546 self->data = NULL; |
|
1547 } |
|
1548 |
|
1549 node = element_new(tag, attrib); |
|
1550 if (!node) |
|
1551 return NULL; |
|
1552 |
|
1553 this = (PyObject*) self->this; |
|
1554 |
|
1555 if (this != Py_None) { |
|
1556 if (element_add_subelement((ElementObject*) this, node) < 0) |
|
1557 goto error; |
|
1558 } else { |
|
1559 if (self->root) { |
|
1560 PyErr_SetString( |
|
1561 PyExc_SyntaxError, |
|
1562 "multiple elements on top level" |
|
1563 ); |
|
1564 goto error; |
|
1565 } |
|
1566 Py_INCREF(node); |
|
1567 self->root = node; |
|
1568 } |
|
1569 |
|
1570 if (self->index < PyList_GET_SIZE(self->stack)) { |
|
1571 if (PyList_SetItem(self->stack, self->index, this) < 0) |
|
1572 goto error; |
|
1573 Py_INCREF(this); |
|
1574 } else { |
|
1575 if (PyList_Append(self->stack, this) < 0) |
|
1576 goto error; |
|
1577 } |
|
1578 self->index++; |
|
1579 |
|
1580 Py_DECREF(this); |
|
1581 Py_INCREF(node); |
|
1582 self->this = (ElementObject*) node; |
|
1583 |
|
1584 Py_DECREF(self->last); |
|
1585 Py_INCREF(node); |
|
1586 self->last = (ElementObject*) node; |
|
1587 |
|
1588 if (self->start_event_obj) { |
|
1589 PyObject* res; |
|
1590 PyObject* action = self->start_event_obj; |
|
1591 res = PyTuple_New(2); |
|
1592 if (res) { |
|
1593 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action); |
|
1594 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node); |
|
1595 PyList_Append(self->events, res); |
|
1596 Py_DECREF(res); |
|
1597 } else |
|
1598 PyErr_Clear(); /* FIXME: propagate error */ |
|
1599 } |
|
1600 |
|
1601 return node; |
|
1602 |
|
1603 error: |
|
1604 Py_DECREF(node); |
|
1605 return NULL; |
|
1606 } |
|
1607 |
|
1608 LOCAL(PyObject*) |
|
1609 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data) |
|
1610 { |
|
1611 if (!self->data) { |
|
1612 if (self->last == (ElementObject*) Py_None) { |
|
1613 /* ignore calls to data before the first call to start */ |
|
1614 Py_RETURN_NONE; |
|
1615 } |
|
1616 /* store the first item as is */ |
|
1617 Py_INCREF(data); self->data = data; |
|
1618 } else { |
|
1619 /* more than one item; use a list to collect items */ |
|
1620 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 && |
|
1621 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) { |
|
1622 /* expat often generates single character data sections; handle |
|
1623 the most common case by resizing the existing string... */ |
|
1624 Py_ssize_t size = PyString_GET_SIZE(self->data); |
|
1625 if (_PyString_Resize(&self->data, size + 1) < 0) |
|
1626 return NULL; |
|
1627 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0]; |
|
1628 } else if (PyList_CheckExact(self->data)) { |
|
1629 if (PyList_Append(self->data, data) < 0) |
|
1630 return NULL; |
|
1631 } else { |
|
1632 PyObject* list = PyList_New(2); |
|
1633 if (!list) |
|
1634 return NULL; |
|
1635 PyList_SET_ITEM(list, 0, self->data); |
|
1636 Py_INCREF(data); PyList_SET_ITEM(list, 1, data); |
|
1637 self->data = list; |
|
1638 } |
|
1639 } |
|
1640 |
|
1641 Py_RETURN_NONE; |
|
1642 } |
|
1643 |
|
1644 LOCAL(PyObject*) |
|
1645 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) |
|
1646 { |
|
1647 PyObject* item; |
|
1648 |
|
1649 if (self->data) { |
|
1650 if (self->this == self->last) { |
|
1651 Py_DECREF(JOIN_OBJ(self->last->text)); |
|
1652 self->last->text = JOIN_SET( |
|
1653 self->data, PyList_CheckExact(self->data) |
|
1654 ); |
|
1655 } else { |
|
1656 Py_DECREF(JOIN_OBJ(self->last->tail)); |
|
1657 self->last->tail = JOIN_SET( |
|
1658 self->data, PyList_CheckExact(self->data) |
|
1659 ); |
|
1660 } |
|
1661 self->data = NULL; |
|
1662 } |
|
1663 |
|
1664 if (self->index == 0) { |
|
1665 PyErr_SetString( |
|
1666 PyExc_IndexError, |
|
1667 "pop from empty stack" |
|
1668 ); |
|
1669 return NULL; |
|
1670 } |
|
1671 |
|
1672 self->index--; |
|
1673 |
|
1674 item = PyList_GET_ITEM(self->stack, self->index); |
|
1675 Py_INCREF(item); |
|
1676 |
|
1677 Py_DECREF(self->last); |
|
1678 |
|
1679 self->last = (ElementObject*) self->this; |
|
1680 self->this = (ElementObject*) item; |
|
1681 |
|
1682 if (self->end_event_obj) { |
|
1683 PyObject* res; |
|
1684 PyObject* action = self->end_event_obj; |
|
1685 PyObject* node = (PyObject*) self->last; |
|
1686 res = PyTuple_New(2); |
|
1687 if (res) { |
|
1688 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action); |
|
1689 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node); |
|
1690 PyList_Append(self->events, res); |
|
1691 Py_DECREF(res); |
|
1692 } else |
|
1693 PyErr_Clear(); /* FIXME: propagate error */ |
|
1694 } |
|
1695 |
|
1696 Py_INCREF(self->last); |
|
1697 return (PyObject*) self->last; |
|
1698 } |
|
1699 |
|
1700 LOCAL(void) |
|
1701 treebuilder_handle_namespace(TreeBuilderObject* self, int start, |
|
1702 const char* prefix, const char *uri) |
|
1703 { |
|
1704 PyObject* res; |
|
1705 PyObject* action; |
|
1706 PyObject* parcel; |
|
1707 |
|
1708 if (!self->events) |
|
1709 return; |
|
1710 |
|
1711 if (start) { |
|
1712 if (!self->start_ns_event_obj) |
|
1713 return; |
|
1714 action = self->start_ns_event_obj; |
|
1715 /* FIXME: prefix and uri use utf-8 encoding! */ |
|
1716 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri); |
|
1717 if (!parcel) |
|
1718 return; |
|
1719 Py_INCREF(action); |
|
1720 } else { |
|
1721 if (!self->end_ns_event_obj) |
|
1722 return; |
|
1723 action = self->end_ns_event_obj; |
|
1724 Py_INCREF(action); |
|
1725 parcel = Py_None; |
|
1726 Py_INCREF(parcel); |
|
1727 } |
|
1728 |
|
1729 res = PyTuple_New(2); |
|
1730 |
|
1731 if (res) { |
|
1732 PyTuple_SET_ITEM(res, 0, action); |
|
1733 PyTuple_SET_ITEM(res, 1, parcel); |
|
1734 PyList_Append(self->events, res); |
|
1735 Py_DECREF(res); |
|
1736 } else |
|
1737 PyErr_Clear(); /* FIXME: propagate error */ |
|
1738 } |
|
1739 |
|
1740 /* -------------------------------------------------------------------- */ |
|
1741 /* methods (in alphabetical order) */ |
|
1742 |
|
1743 static PyObject* |
|
1744 treebuilder_data(TreeBuilderObject* self, PyObject* args) |
|
1745 { |
|
1746 PyObject* data; |
|
1747 if (!PyArg_ParseTuple(args, "O:data", &data)) |
|
1748 return NULL; |
|
1749 |
|
1750 return treebuilder_handle_data(self, data); |
|
1751 } |
|
1752 |
|
1753 static PyObject* |
|
1754 treebuilder_end(TreeBuilderObject* self, PyObject* args) |
|
1755 { |
|
1756 PyObject* tag; |
|
1757 if (!PyArg_ParseTuple(args, "O:end", &tag)) |
|
1758 return NULL; |
|
1759 |
|
1760 return treebuilder_handle_end(self, tag); |
|
1761 } |
|
1762 |
|
1763 LOCAL(PyObject*) |
|
1764 treebuilder_done(TreeBuilderObject* self) |
|
1765 { |
|
1766 PyObject* res; |
|
1767 |
|
1768 /* FIXME: check stack size? */ |
|
1769 |
|
1770 if (self->root) |
|
1771 res = self->root; |
|
1772 else |
|
1773 res = Py_None; |
|
1774 |
|
1775 Py_INCREF(res); |
|
1776 return res; |
|
1777 } |
|
1778 |
|
1779 static PyObject* |
|
1780 treebuilder_close(TreeBuilderObject* self, PyObject* args) |
|
1781 { |
|
1782 if (!PyArg_ParseTuple(args, ":close")) |
|
1783 return NULL; |
|
1784 |
|
1785 return treebuilder_done(self); |
|
1786 } |
|
1787 |
|
1788 static PyObject* |
|
1789 treebuilder_start(TreeBuilderObject* self, PyObject* args) |
|
1790 { |
|
1791 PyObject* tag; |
|
1792 PyObject* attrib = Py_None; |
|
1793 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib)) |
|
1794 return NULL; |
|
1795 |
|
1796 return treebuilder_handle_start(self, tag, attrib); |
|
1797 } |
|
1798 |
|
1799 static PyObject* |
|
1800 treebuilder_xml(TreeBuilderObject* self, PyObject* args) |
|
1801 { |
|
1802 PyObject* encoding; |
|
1803 PyObject* standalone; |
|
1804 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone)) |
|
1805 return NULL; |
|
1806 |
|
1807 return treebuilder_handle_xml(self, encoding, standalone); |
|
1808 } |
|
1809 |
|
1810 static PyMethodDef treebuilder_methods[] = { |
|
1811 {"data", (PyCFunction) treebuilder_data, METH_VARARGS}, |
|
1812 {"start", (PyCFunction) treebuilder_start, METH_VARARGS}, |
|
1813 {"end", (PyCFunction) treebuilder_end, METH_VARARGS}, |
|
1814 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS}, |
|
1815 {"close", (PyCFunction) treebuilder_close, METH_VARARGS}, |
|
1816 {NULL, NULL} |
|
1817 }; |
|
1818 |
|
1819 static PyObject* |
|
1820 treebuilder_getattr(TreeBuilderObject* self, char* name) |
|
1821 { |
|
1822 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name); |
|
1823 } |
|
1824 |
|
1825 statichere PyTypeObject TreeBuilder_Type = { |
|
1826 PyObject_HEAD_INIT(NULL) |
|
1827 0, "TreeBuilder", sizeof(TreeBuilderObject), 0, |
|
1828 /* methods */ |
|
1829 (destructor)treebuilder_dealloc, /* tp_dealloc */ |
|
1830 0, /* tp_print */ |
|
1831 (getattrfunc)treebuilder_getattr, /* tp_getattr */ |
|
1832 }; |
|
1833 |
|
1834 /* ==================================================================== */ |
|
1835 /* the expat interface */ |
|
1836 |
|
1837 #if defined(USE_EXPAT) |
|
1838 |
|
1839 #include "expat.h" |
|
1840 |
|
1841 #if defined(USE_PYEXPAT_CAPI) |
|
1842 #include "pyexpat.h" |
|
1843 static struct PyExpat_CAPI* expat_capi; |
|
1844 #define EXPAT(func) (expat_capi->func) |
|
1845 #else |
|
1846 #define EXPAT(func) (XML_##func) |
|
1847 #endif |
|
1848 |
|
1849 typedef struct { |
|
1850 PyObject_HEAD |
|
1851 |
|
1852 XML_Parser parser; |
|
1853 |
|
1854 PyObject* target; |
|
1855 PyObject* entity; |
|
1856 |
|
1857 PyObject* names; |
|
1858 |
|
1859 PyObject* handle_xml; |
|
1860 PyObject* handle_start; |
|
1861 PyObject* handle_data; |
|
1862 PyObject* handle_end; |
|
1863 |
|
1864 PyObject* handle_comment; |
|
1865 PyObject* handle_pi; |
|
1866 |
|
1867 } XMLParserObject; |
|
1868 |
|
1869 staticforward PyTypeObject XMLParser_Type; |
|
1870 |
|
1871 /* helpers */ |
|
1872 |
|
1873 #if defined(Py_USING_UNICODE) |
|
1874 LOCAL(int) |
|
1875 checkstring(const char* string, int size) |
|
1876 { |
|
1877 int i; |
|
1878 |
|
1879 /* check if an 8-bit string contains UTF-8 characters */ |
|
1880 for (i = 0; i < size; i++) |
|
1881 if (string[i] & 0x80) |
|
1882 return 1; |
|
1883 |
|
1884 return 0; |
|
1885 } |
|
1886 #endif |
|
1887 |
|
1888 LOCAL(PyObject*) |
|
1889 makestring(const char* string, int size) |
|
1890 { |
|
1891 /* convert a UTF-8 string to either a 7-bit ascii string or a |
|
1892 Unicode string */ |
|
1893 |
|
1894 #if defined(Py_USING_UNICODE) |
|
1895 if (checkstring(string, size)) |
|
1896 return PyUnicode_DecodeUTF8(string, size, "strict"); |
|
1897 #endif |
|
1898 |
|
1899 return PyString_FromStringAndSize(string, size); |
|
1900 } |
|
1901 |
|
1902 LOCAL(PyObject*) |
|
1903 makeuniversal(XMLParserObject* self, const char* string) |
|
1904 { |
|
1905 /* convert a UTF-8 tag/attribute name from the expat parser |
|
1906 to a universal name string */ |
|
1907 |
|
1908 int size = strlen(string); |
|
1909 PyObject* key; |
|
1910 PyObject* value; |
|
1911 |
|
1912 /* look the 'raw' name up in the names dictionary */ |
|
1913 key = PyString_FromStringAndSize(string, size); |
|
1914 if (!key) |
|
1915 return NULL; |
|
1916 |
|
1917 value = PyDict_GetItem(self->names, key); |
|
1918 |
|
1919 if (value) { |
|
1920 Py_INCREF(value); |
|
1921 } else { |
|
1922 /* new name. convert to universal name, and decode as |
|
1923 necessary */ |
|
1924 |
|
1925 PyObject* tag; |
|
1926 char* p; |
|
1927 int i; |
|
1928 |
|
1929 /* look for namespace separator */ |
|
1930 for (i = 0; i < size; i++) |
|
1931 if (string[i] == '}') |
|
1932 break; |
|
1933 if (i != size) { |
|
1934 /* convert to universal name */ |
|
1935 tag = PyString_FromStringAndSize(NULL, size+1); |
|
1936 p = PyString_AS_STRING(tag); |
|
1937 p[0] = '{'; |
|
1938 memcpy(p+1, string, size); |
|
1939 size++; |
|
1940 } else { |
|
1941 /* plain name; use key as tag */ |
|
1942 Py_INCREF(key); |
|
1943 tag = key; |
|
1944 } |
|
1945 |
|
1946 /* decode universal name */ |
|
1947 #if defined(Py_USING_UNICODE) |
|
1948 /* inline makestring, to avoid duplicating the source string if |
|
1949 it's not an utf-8 string */ |
|
1950 p = PyString_AS_STRING(tag); |
|
1951 if (checkstring(p, size)) { |
|
1952 value = PyUnicode_DecodeUTF8(p, size, "strict"); |
|
1953 Py_DECREF(tag); |
|
1954 if (!value) { |
|
1955 Py_DECREF(key); |
|
1956 return NULL; |
|
1957 } |
|
1958 } else |
|
1959 #endif |
|
1960 value = tag; /* use tag as is */ |
|
1961 |
|
1962 /* add to names dictionary */ |
|
1963 if (PyDict_SetItem(self->names, key, value) < 0) { |
|
1964 Py_DECREF(key); |
|
1965 Py_DECREF(value); |
|
1966 return NULL; |
|
1967 } |
|
1968 } |
|
1969 |
|
1970 Py_DECREF(key); |
|
1971 return value; |
|
1972 } |
|
1973 |
|
1974 /* -------------------------------------------------------------------- */ |
|
1975 /* handlers */ |
|
1976 |
|
1977 static void |
|
1978 expat_default_handler(XMLParserObject* self, const XML_Char* data_in, |
|
1979 int data_len) |
|
1980 { |
|
1981 PyObject* key; |
|
1982 PyObject* value; |
|
1983 PyObject* res; |
|
1984 |
|
1985 if (data_len < 2 || data_in[0] != '&') |
|
1986 return; |
|
1987 |
|
1988 key = makestring(data_in + 1, data_len - 2); |
|
1989 if (!key) |
|
1990 return; |
|
1991 |
|
1992 value = PyDict_GetItem(self->entity, key); |
|
1993 |
|
1994 if (value) { |
|
1995 if (TreeBuilder_CheckExact(self->target)) |
|
1996 res = treebuilder_handle_data( |
|
1997 (TreeBuilderObject*) self->target, value |
|
1998 ); |
|
1999 else if (self->handle_data) |
|
2000 res = PyObject_CallFunction(self->handle_data, "O", value); |
|
2001 else |
|
2002 res = NULL; |
|
2003 Py_XDECREF(res); |
|
2004 } else { |
|
2005 PyErr_Format( |
|
2006 PyExc_SyntaxError, "undefined entity &%s;: line %ld, column %ld", |
|
2007 PyString_AS_STRING(key), |
|
2008 EXPAT(GetErrorLineNumber)(self->parser), |
|
2009 EXPAT(GetErrorColumnNumber)(self->parser) |
|
2010 ); |
|
2011 } |
|
2012 |
|
2013 Py_DECREF(key); |
|
2014 } |
|
2015 |
|
2016 static void |
|
2017 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in, |
|
2018 const XML_Char **attrib_in) |
|
2019 { |
|
2020 PyObject* res; |
|
2021 PyObject* tag; |
|
2022 PyObject* attrib; |
|
2023 int ok; |
|
2024 |
|
2025 /* tag name */ |
|
2026 tag = makeuniversal(self, tag_in); |
|
2027 if (!tag) |
|
2028 return; /* parser will look for errors */ |
|
2029 |
|
2030 /* attributes */ |
|
2031 if (attrib_in[0]) { |
|
2032 attrib = PyDict_New(); |
|
2033 if (!attrib) |
|
2034 return; |
|
2035 while (attrib_in[0] && attrib_in[1]) { |
|
2036 PyObject* key = makeuniversal(self, attrib_in[0]); |
|
2037 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1])); |
|
2038 if (!key || !value) { |
|
2039 Py_XDECREF(value); |
|
2040 Py_XDECREF(key); |
|
2041 Py_DECREF(attrib); |
|
2042 return; |
|
2043 } |
|
2044 ok = PyDict_SetItem(attrib, key, value); |
|
2045 Py_DECREF(value); |
|
2046 Py_DECREF(key); |
|
2047 if (ok < 0) { |
|
2048 Py_DECREF(attrib); |
|
2049 return; |
|
2050 } |
|
2051 attrib_in += 2; |
|
2052 } |
|
2053 } else { |
|
2054 Py_INCREF(Py_None); |
|
2055 attrib = Py_None; |
|
2056 } |
|
2057 |
|
2058 if (TreeBuilder_CheckExact(self->target)) |
|
2059 /* shortcut */ |
|
2060 res = treebuilder_handle_start((TreeBuilderObject*) self->target, |
|
2061 tag, attrib); |
|
2062 else if (self->handle_start) |
|
2063 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib); |
|
2064 else |
|
2065 res = NULL; |
|
2066 |
|
2067 Py_DECREF(tag); |
|
2068 Py_DECREF(attrib); |
|
2069 |
|
2070 Py_XDECREF(res); |
|
2071 } |
|
2072 |
|
2073 static void |
|
2074 expat_data_handler(XMLParserObject* self, const XML_Char* data_in, |
|
2075 int data_len) |
|
2076 { |
|
2077 PyObject* data; |
|
2078 PyObject* res; |
|
2079 |
|
2080 data = makestring(data_in, data_len); |
|
2081 if (!data) |
|
2082 return; /* parser will look for errors */ |
|
2083 |
|
2084 if (TreeBuilder_CheckExact(self->target)) |
|
2085 /* shortcut */ |
|
2086 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data); |
|
2087 else if (self->handle_data) |
|
2088 res = PyObject_CallFunction(self->handle_data, "O", data); |
|
2089 else |
|
2090 res = NULL; |
|
2091 |
|
2092 Py_DECREF(data); |
|
2093 |
|
2094 Py_XDECREF(res); |
|
2095 } |
|
2096 |
|
2097 static void |
|
2098 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in) |
|
2099 { |
|
2100 PyObject* tag; |
|
2101 PyObject* res = NULL; |
|
2102 |
|
2103 if (TreeBuilder_CheckExact(self->target)) |
|
2104 /* shortcut */ |
|
2105 /* the standard tree builder doesn't look at the end tag */ |
|
2106 res = treebuilder_handle_end( |
|
2107 (TreeBuilderObject*) self->target, Py_None |
|
2108 ); |
|
2109 else if (self->handle_end) { |
|
2110 tag = makeuniversal(self, tag_in); |
|
2111 if (tag) { |
|
2112 res = PyObject_CallFunction(self->handle_end, "O", tag); |
|
2113 Py_DECREF(tag); |
|
2114 } |
|
2115 } |
|
2116 |
|
2117 Py_XDECREF(res); |
|
2118 } |
|
2119 |
|
2120 static void |
|
2121 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix, |
|
2122 const XML_Char *uri) |
|
2123 { |
|
2124 treebuilder_handle_namespace( |
|
2125 (TreeBuilderObject*) self->target, 1, prefix, uri |
|
2126 ); |
|
2127 } |
|
2128 |
|
2129 static void |
|
2130 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in) |
|
2131 { |
|
2132 treebuilder_handle_namespace( |
|
2133 (TreeBuilderObject*) self->target, 0, NULL, NULL |
|
2134 ); |
|
2135 } |
|
2136 |
|
2137 static void |
|
2138 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in) |
|
2139 { |
|
2140 PyObject* comment; |
|
2141 PyObject* res; |
|
2142 |
|
2143 if (self->handle_comment) { |
|
2144 comment = makestring(comment_in, strlen(comment_in)); |
|
2145 if (comment) { |
|
2146 res = PyObject_CallFunction(self->handle_comment, "O", comment); |
|
2147 Py_XDECREF(res); |
|
2148 Py_DECREF(comment); |
|
2149 } |
|
2150 } |
|
2151 } |
|
2152 |
|
2153 static void |
|
2154 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in, |
|
2155 const XML_Char* data_in) |
|
2156 { |
|
2157 PyObject* target; |
|
2158 PyObject* data; |
|
2159 PyObject* res; |
|
2160 |
|
2161 if (self->handle_pi) { |
|
2162 target = makestring(target_in, strlen(target_in)); |
|
2163 data = makestring(data_in, strlen(data_in)); |
|
2164 if (target && data) { |
|
2165 res = PyObject_CallFunction(self->handle_pi, "OO", target, data); |
|
2166 Py_XDECREF(res); |
|
2167 Py_DECREF(data); |
|
2168 Py_DECREF(target); |
|
2169 } else { |
|
2170 Py_XDECREF(data); |
|
2171 Py_XDECREF(target); |
|
2172 } |
|
2173 } |
|
2174 } |
|
2175 |
|
2176 #if defined(Py_USING_UNICODE) |
|
2177 static int |
|
2178 expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name, |
|
2179 XML_Encoding *info) |
|
2180 { |
|
2181 PyObject* u; |
|
2182 Py_UNICODE* p; |
|
2183 unsigned char s[256]; |
|
2184 int i; |
|
2185 |
|
2186 memset(info, 0, sizeof(XML_Encoding)); |
|
2187 |
|
2188 for (i = 0; i < 256; i++) |
|
2189 s[i] = i; |
|
2190 |
|
2191 u = PyUnicode_Decode((char*) s, 256, name, "replace"); |
|
2192 if (!u) |
|
2193 return XML_STATUS_ERROR; |
|
2194 |
|
2195 if (PyUnicode_GET_SIZE(u) != 256) { |
|
2196 Py_DECREF(u); |
|
2197 return XML_STATUS_ERROR; |
|
2198 } |
|
2199 |
|
2200 p = PyUnicode_AS_UNICODE(u); |
|
2201 |
|
2202 for (i = 0; i < 256; i++) { |
|
2203 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER) |
|
2204 info->map[i] = p[i]; |
|
2205 else |
|
2206 info->map[i] = -1; |
|
2207 } |
|
2208 |
|
2209 Py_DECREF(u); |
|
2210 |
|
2211 return XML_STATUS_OK; |
|
2212 } |
|
2213 #endif |
|
2214 |
|
2215 /* -------------------------------------------------------------------- */ |
|
2216 /* constructor and destructor */ |
|
2217 |
|
2218 static PyObject* |
|
2219 xmlparser(PyObject* self_, PyObject* args, PyObject* kw) |
|
2220 { |
|
2221 XMLParserObject* self; |
|
2222 /* FIXME: does this need to be static? */ |
|
2223 static XML_Memory_Handling_Suite memory_handler; |
|
2224 |
|
2225 PyObject* target = NULL; |
|
2226 char* encoding = NULL; |
|
2227 static char* kwlist[] = { "target", "encoding", NULL }; |
|
2228 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist, |
|
2229 &target, &encoding)) |
|
2230 return NULL; |
|
2231 |
|
2232 #if defined(USE_PYEXPAT_CAPI) |
|
2233 if (!expat_capi) { |
|
2234 PyErr_SetString( |
|
2235 PyExc_RuntimeError, "cannot load dispatch table from pyexpat" |
|
2236 ); |
|
2237 return NULL; |
|
2238 } |
|
2239 #endif |
|
2240 |
|
2241 self = PyObject_New(XMLParserObject, &XMLParser_Type); |
|
2242 if (self == NULL) |
|
2243 return NULL; |
|
2244 |
|
2245 self->entity = PyDict_New(); |
|
2246 if (!self->entity) { |
|
2247 PyObject_Del(self); |
|
2248 return NULL; |
|
2249 } |
|
2250 |
|
2251 self->names = PyDict_New(); |
|
2252 if (!self->names) { |
|
2253 PyObject_Del(self->entity); |
|
2254 PyObject_Del(self); |
|
2255 return NULL; |
|
2256 } |
|
2257 |
|
2258 memory_handler.malloc_fcn = PyObject_Malloc; |
|
2259 memory_handler.realloc_fcn = PyObject_Realloc; |
|
2260 memory_handler.free_fcn = PyObject_Free; |
|
2261 |
|
2262 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}"); |
|
2263 if (!self->parser) { |
|
2264 PyObject_Del(self->names); |
|
2265 PyObject_Del(self->entity); |
|
2266 PyObject_Del(self); |
|
2267 PyErr_NoMemory(); |
|
2268 return NULL; |
|
2269 } |
|
2270 |
|
2271 /* setup target handlers */ |
|
2272 if (!target) { |
|
2273 target = treebuilder_new(); |
|
2274 if (!target) { |
|
2275 EXPAT(ParserFree)(self->parser); |
|
2276 PyObject_Del(self->names); |
|
2277 PyObject_Del(self->entity); |
|
2278 PyObject_Del(self); |
|
2279 return NULL; |
|
2280 } |
|
2281 } else |
|
2282 Py_INCREF(target); |
|
2283 self->target = target; |
|
2284 |
|
2285 self->handle_xml = PyObject_GetAttrString(target, "xml"); |
|
2286 self->handle_start = PyObject_GetAttrString(target, "start"); |
|
2287 self->handle_data = PyObject_GetAttrString(target, "data"); |
|
2288 self->handle_end = PyObject_GetAttrString(target, "end"); |
|
2289 self->handle_comment = PyObject_GetAttrString(target, "comment"); |
|
2290 self->handle_pi = PyObject_GetAttrString(target, "pi"); |
|
2291 |
|
2292 PyErr_Clear(); |
|
2293 |
|
2294 /* configure parser */ |
|
2295 EXPAT(SetUserData)(self->parser, self); |
|
2296 EXPAT(SetElementHandler)( |
|
2297 self->parser, |
|
2298 (XML_StartElementHandler) expat_start_handler, |
|
2299 (XML_EndElementHandler) expat_end_handler |
|
2300 ); |
|
2301 EXPAT(SetDefaultHandlerExpand)( |
|
2302 self->parser, |
|
2303 (XML_DefaultHandler) expat_default_handler |
|
2304 ); |
|
2305 EXPAT(SetCharacterDataHandler)( |
|
2306 self->parser, |
|
2307 (XML_CharacterDataHandler) expat_data_handler |
|
2308 ); |
|
2309 if (self->handle_comment) |
|
2310 EXPAT(SetCommentHandler)( |
|
2311 self->parser, |
|
2312 (XML_CommentHandler) expat_comment_handler |
|
2313 ); |
|
2314 if (self->handle_pi) |
|
2315 EXPAT(SetProcessingInstructionHandler)( |
|
2316 self->parser, |
|
2317 (XML_ProcessingInstructionHandler) expat_pi_handler |
|
2318 ); |
|
2319 #if defined(Py_USING_UNICODE) |
|
2320 EXPAT(SetUnknownEncodingHandler)( |
|
2321 self->parser, |
|
2322 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL |
|
2323 ); |
|
2324 #endif |
|
2325 |
|
2326 ALLOC(sizeof(XMLParserObject), "create expatparser"); |
|
2327 |
|
2328 return (PyObject*) self; |
|
2329 } |
|
2330 |
|
2331 static void |
|
2332 xmlparser_dealloc(XMLParserObject* self) |
|
2333 { |
|
2334 EXPAT(ParserFree)(self->parser); |
|
2335 |
|
2336 Py_XDECREF(self->handle_pi); |
|
2337 Py_XDECREF(self->handle_comment); |
|
2338 Py_XDECREF(self->handle_end); |
|
2339 Py_XDECREF(self->handle_data); |
|
2340 Py_XDECREF(self->handle_start); |
|
2341 Py_XDECREF(self->handle_xml); |
|
2342 |
|
2343 Py_DECREF(self->target); |
|
2344 Py_DECREF(self->entity); |
|
2345 Py_DECREF(self->names); |
|
2346 |
|
2347 RELEASE(sizeof(XMLParserObject), "destroy expatparser"); |
|
2348 |
|
2349 PyObject_Del(self); |
|
2350 } |
|
2351 |
|
2352 /* -------------------------------------------------------------------- */ |
|
2353 /* methods (in alphabetical order) */ |
|
2354 |
|
2355 LOCAL(PyObject*) |
|
2356 expat_parse(XMLParserObject* self, char* data, int data_len, int final) |
|
2357 { |
|
2358 int ok; |
|
2359 |
|
2360 ok = EXPAT(Parse)(self->parser, data, data_len, final); |
|
2361 |
|
2362 if (PyErr_Occurred()) |
|
2363 return NULL; |
|
2364 |
|
2365 if (!ok) { |
|
2366 PyErr_Format( |
|
2367 PyExc_SyntaxError, "%s: line %ld, column %ld", |
|
2368 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)), |
|
2369 EXPAT(GetErrorLineNumber)(self->parser), |
|
2370 EXPAT(GetErrorColumnNumber)(self->parser) |
|
2371 ); |
|
2372 return NULL; |
|
2373 } |
|
2374 |
|
2375 Py_RETURN_NONE; |
|
2376 } |
|
2377 |
|
2378 static PyObject* |
|
2379 xmlparser_close(XMLParserObject* self, PyObject* args) |
|
2380 { |
|
2381 /* end feeding data to parser */ |
|
2382 |
|
2383 PyObject* res; |
|
2384 if (!PyArg_ParseTuple(args, ":close")) |
|
2385 return NULL; |
|
2386 |
|
2387 res = expat_parse(self, "", 0, 1); |
|
2388 |
|
2389 if (res && TreeBuilder_CheckExact(self->target)) { |
|
2390 Py_DECREF(res); |
|
2391 return treebuilder_done((TreeBuilderObject*) self->target); |
|
2392 } |
|
2393 |
|
2394 return res; |
|
2395 } |
|
2396 |
|
2397 static PyObject* |
|
2398 xmlparser_feed(XMLParserObject* self, PyObject* args) |
|
2399 { |
|
2400 /* feed data to parser */ |
|
2401 |
|
2402 char* data; |
|
2403 int data_len; |
|
2404 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len)) |
|
2405 return NULL; |
|
2406 |
|
2407 return expat_parse(self, data, data_len, 0); |
|
2408 } |
|
2409 |
|
2410 static PyObject* |
|
2411 xmlparser_parse(XMLParserObject* self, PyObject* args) |
|
2412 { |
|
2413 /* (internal) parse until end of input stream */ |
|
2414 |
|
2415 PyObject* reader; |
|
2416 PyObject* buffer; |
|
2417 PyObject* res; |
|
2418 |
|
2419 PyObject* fileobj; |
|
2420 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj)) |
|
2421 return NULL; |
|
2422 |
|
2423 reader = PyObject_GetAttrString(fileobj, "read"); |
|
2424 if (!reader) |
|
2425 return NULL; |
|
2426 |
|
2427 /* read from open file object */ |
|
2428 for (;;) { |
|
2429 |
|
2430 buffer = PyObject_CallFunction(reader, "i", 64*1024); |
|
2431 |
|
2432 if (!buffer) { |
|
2433 /* read failed (e.g. due to KeyboardInterrupt) */ |
|
2434 Py_DECREF(reader); |
|
2435 return NULL; |
|
2436 } |
|
2437 |
|
2438 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) { |
|
2439 Py_DECREF(buffer); |
|
2440 break; |
|
2441 } |
|
2442 |
|
2443 res = expat_parse( |
|
2444 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0 |
|
2445 ); |
|
2446 |
|
2447 Py_DECREF(buffer); |
|
2448 |
|
2449 if (!res) { |
|
2450 Py_DECREF(reader); |
|
2451 return NULL; |
|
2452 } |
|
2453 Py_DECREF(res); |
|
2454 |
|
2455 } |
|
2456 |
|
2457 Py_DECREF(reader); |
|
2458 |
|
2459 res = expat_parse(self, "", 0, 1); |
|
2460 |
|
2461 if (res && TreeBuilder_CheckExact(self->target)) { |
|
2462 Py_DECREF(res); |
|
2463 return treebuilder_done((TreeBuilderObject*) self->target); |
|
2464 } |
|
2465 |
|
2466 return res; |
|
2467 } |
|
2468 |
|
2469 static PyObject* |
|
2470 xmlparser_setevents(XMLParserObject* self, PyObject* args) |
|
2471 { |
|
2472 /* activate element event reporting */ |
|
2473 |
|
2474 Py_ssize_t i; |
|
2475 TreeBuilderObject* target; |
|
2476 |
|
2477 PyObject* events; /* event collector */ |
|
2478 PyObject* event_set = Py_None; |
|
2479 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events, |
|
2480 &event_set)) |
|
2481 return NULL; |
|
2482 |
|
2483 if (!TreeBuilder_CheckExact(self->target)) { |
|
2484 PyErr_SetString( |
|
2485 PyExc_TypeError, |
|
2486 "event handling only supported for cElementTree.Treebuilder " |
|
2487 "targets" |
|
2488 ); |
|
2489 return NULL; |
|
2490 } |
|
2491 |
|
2492 target = (TreeBuilderObject*) self->target; |
|
2493 |
|
2494 Py_INCREF(events); |
|
2495 Py_XDECREF(target->events); |
|
2496 target->events = events; |
|
2497 |
|
2498 /* clear out existing events */ |
|
2499 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL; |
|
2500 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL; |
|
2501 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL; |
|
2502 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL; |
|
2503 |
|
2504 if (event_set == Py_None) { |
|
2505 /* default is "end" only */ |
|
2506 target->end_event_obj = PyString_FromString("end"); |
|
2507 Py_RETURN_NONE; |
|
2508 } |
|
2509 |
|
2510 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */ |
|
2511 goto error; |
|
2512 |
|
2513 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) { |
|
2514 PyObject* item = PyTuple_GET_ITEM(event_set, i); |
|
2515 char* event; |
|
2516 if (!PyString_Check(item)) |
|
2517 goto error; |
|
2518 event = PyString_AS_STRING(item); |
|
2519 if (strcmp(event, "start") == 0) { |
|
2520 Py_INCREF(item); |
|
2521 target->start_event_obj = item; |
|
2522 } else if (strcmp(event, "end") == 0) { |
|
2523 Py_INCREF(item); |
|
2524 Py_XDECREF(target->end_event_obj); |
|
2525 target->end_event_obj = item; |
|
2526 } else if (strcmp(event, "start-ns") == 0) { |
|
2527 Py_INCREF(item); |
|
2528 Py_XDECREF(target->start_ns_event_obj); |
|
2529 target->start_ns_event_obj = item; |
|
2530 EXPAT(SetNamespaceDeclHandler)( |
|
2531 self->parser, |
|
2532 (XML_StartNamespaceDeclHandler) expat_start_ns_handler, |
|
2533 (XML_EndNamespaceDeclHandler) expat_end_ns_handler |
|
2534 ); |
|
2535 } else if (strcmp(event, "end-ns") == 0) { |
|
2536 Py_INCREF(item); |
|
2537 Py_XDECREF(target->end_ns_event_obj); |
|
2538 target->end_ns_event_obj = item; |
|
2539 EXPAT(SetNamespaceDeclHandler)( |
|
2540 self->parser, |
|
2541 (XML_StartNamespaceDeclHandler) expat_start_ns_handler, |
|
2542 (XML_EndNamespaceDeclHandler) expat_end_ns_handler |
|
2543 ); |
|
2544 } else { |
|
2545 PyErr_Format( |
|
2546 PyExc_ValueError, |
|
2547 "unknown event '%s'", event |
|
2548 ); |
|
2549 return NULL; |
|
2550 } |
|
2551 } |
|
2552 |
|
2553 Py_RETURN_NONE; |
|
2554 |
|
2555 error: |
|
2556 PyErr_SetString( |
|
2557 PyExc_TypeError, |
|
2558 "invalid event tuple" |
|
2559 ); |
|
2560 return NULL; |
|
2561 } |
|
2562 |
|
2563 static PyMethodDef xmlparser_methods[] = { |
|
2564 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS}, |
|
2565 {"close", (PyCFunction) xmlparser_close, METH_VARARGS}, |
|
2566 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS}, |
|
2567 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS}, |
|
2568 {NULL, NULL} |
|
2569 }; |
|
2570 |
|
2571 static PyObject* |
|
2572 xmlparser_getattr(XMLParserObject* self, char* name) |
|
2573 { |
|
2574 PyObject* res; |
|
2575 |
|
2576 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name); |
|
2577 if (res) |
|
2578 return res; |
|
2579 |
|
2580 PyErr_Clear(); |
|
2581 |
|
2582 if (strcmp(name, "entity") == 0) |
|
2583 res = self->entity; |
|
2584 else if (strcmp(name, "target") == 0) |
|
2585 res = self->target; |
|
2586 else if (strcmp(name, "version") == 0) { |
|
2587 char buffer[100]; |
|
2588 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION, |
|
2589 XML_MINOR_VERSION, XML_MICRO_VERSION); |
|
2590 return PyString_FromString(buffer); |
|
2591 } else { |
|
2592 PyErr_SetString(PyExc_AttributeError, name); |
|
2593 return NULL; |
|
2594 } |
|
2595 |
|
2596 Py_INCREF(res); |
|
2597 return res; |
|
2598 } |
|
2599 |
|
2600 statichere PyTypeObject XMLParser_Type = { |
|
2601 PyObject_HEAD_INIT(NULL) |
|
2602 0, "XMLParser", sizeof(XMLParserObject), 0, |
|
2603 /* methods */ |
|
2604 (destructor)xmlparser_dealloc, /* tp_dealloc */ |
|
2605 0, /* tp_print */ |
|
2606 (getattrfunc)xmlparser_getattr, /* tp_getattr */ |
|
2607 }; |
|
2608 |
|
2609 #endif |
|
2610 |
|
2611 /* ==================================================================== */ |
|
2612 /* python module interface */ |
|
2613 |
|
2614 static PyMethodDef _functions[] = { |
|
2615 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS}, |
|
2616 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS}, |
|
2617 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS}, |
|
2618 #if defined(USE_EXPAT) |
|
2619 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS}, |
|
2620 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS}, |
|
2621 #endif |
|
2622 {NULL, NULL} |
|
2623 }; |
|
2624 |
|
2625 DL_EXPORT(void) |
|
2626 init_elementtree(void) |
|
2627 { |
|
2628 PyObject* m; |
|
2629 PyObject* g; |
|
2630 char* bootstrap; |
|
2631 #if defined(USE_PYEXPAT_CAPI) |
|
2632 struct PyExpat_CAPI* capi; |
|
2633 #endif |
|
2634 |
|
2635 /* Patch object type */ |
|
2636 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type; |
|
2637 #if defined(USE_EXPAT) |
|
2638 Py_TYPE(&XMLParser_Type) = &PyType_Type; |
|
2639 #endif |
|
2640 |
|
2641 m = Py_InitModule("_elementtree", _functions); |
|
2642 if (!m) |
|
2643 return; |
|
2644 |
|
2645 /* python glue code */ |
|
2646 |
|
2647 g = PyDict_New(); |
|
2648 if (!g) |
|
2649 return; |
|
2650 |
|
2651 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins()); |
|
2652 |
|
2653 bootstrap = ( |
|
2654 |
|
2655 #if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000) |
|
2656 "from __future__ import generators\n" /* enable yield under 2.2 */ |
|
2657 #endif |
|
2658 |
|
2659 "from copy import copy, deepcopy\n" |
|
2660 |
|
2661 "try:\n" |
|
2662 " from xml.etree import ElementTree\n" |
|
2663 "except ImportError:\n" |
|
2664 " import ElementTree\n" |
|
2665 "ET = ElementTree\n" |
|
2666 "del ElementTree\n" |
|
2667 |
|
2668 "import _elementtree as cElementTree\n" |
|
2669 |
|
2670 "try:\n" /* check if copy works as is */ |
|
2671 " copy(cElementTree.Element('x'))\n" |
|
2672 "except:\n" |
|
2673 " def copyelement(elem):\n" |
|
2674 " return elem\n" |
|
2675 |
|
2676 "def Comment(text=None):\n" /* public */ |
|
2677 " element = cElementTree.Element(ET.Comment)\n" |
|
2678 " element.text = text\n" |
|
2679 " return element\n" |
|
2680 "cElementTree.Comment = Comment\n" |
|
2681 |
|
2682 "class ElementTree(ET.ElementTree):\n" /* public */ |
|
2683 " def parse(self, source, parser=None):\n" |
|
2684 " if not hasattr(source, 'read'):\n" |
|
2685 " source = open(source, 'rb')\n" |
|
2686 " if parser is not None:\n" |
|
2687 " while 1:\n" |
|
2688 " data = source.read(65536)\n" |
|
2689 " if not data:\n" |
|
2690 " break\n" |
|
2691 " parser.feed(data)\n" |
|
2692 " self._root = parser.close()\n" |
|
2693 " else:\n" |
|
2694 " parser = cElementTree.XMLParser()\n" |
|
2695 " self._root = parser._parse(source)\n" |
|
2696 " return self._root\n" |
|
2697 "cElementTree.ElementTree = ElementTree\n" |
|
2698 |
|
2699 "def getiterator(node, tag=None):\n" /* helper */ |
|
2700 " if tag == '*':\n" |
|
2701 " tag = None\n" |
|
2702 #if (PY_VERSION_HEX < 0x02020000) |
|
2703 " nodes = []\n" /* 2.1 doesn't have yield */ |
|
2704 " if tag is None or node.tag == tag:\n" |
|
2705 " nodes.append(node)\n" |
|
2706 " for node in node:\n" |
|
2707 " nodes.extend(getiterator(node, tag))\n" |
|
2708 " return nodes\n" |
|
2709 #else |
|
2710 " if tag is None or node.tag == tag:\n" |
|
2711 " yield node\n" |
|
2712 " for node in node:\n" |
|
2713 " for node in getiterator(node, tag):\n" |
|
2714 " yield node\n" |
|
2715 #endif |
|
2716 |
|
2717 "def parse(source, parser=None):\n" /* public */ |
|
2718 " tree = ElementTree()\n" |
|
2719 " tree.parse(source, parser)\n" |
|
2720 " return tree\n" |
|
2721 "cElementTree.parse = parse\n" |
|
2722 |
|
2723 #if (PY_VERSION_HEX < 0x02020000) |
|
2724 "if hasattr(ET, 'iterparse'):\n" |
|
2725 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */ |
|
2726 #else |
|
2727 "class iterparse(object):\n" |
|
2728 " root = None\n" |
|
2729 " def __init__(self, file, events=None):\n" |
|
2730 " if not hasattr(file, 'read'):\n" |
|
2731 " file = open(file, 'rb')\n" |
|
2732 " self._file = file\n" |
|
2733 " self._events = events\n" |
|
2734 " def __iter__(self):\n" |
|
2735 " events = []\n" |
|
2736 " b = cElementTree.TreeBuilder()\n" |
|
2737 " p = cElementTree.XMLParser(b)\n" |
|
2738 " p._setevents(events, self._events)\n" |
|
2739 " while 1:\n" |
|
2740 " data = self._file.read(16384)\n" |
|
2741 " if not data:\n" |
|
2742 " break\n" |
|
2743 " p.feed(data)\n" |
|
2744 " for event in events:\n" |
|
2745 " yield event\n" |
|
2746 " del events[:]\n" |
|
2747 " root = p.close()\n" |
|
2748 " for event in events:\n" |
|
2749 " yield event\n" |
|
2750 " self.root = root\n" |
|
2751 "cElementTree.iterparse = iterparse\n" |
|
2752 #endif |
|
2753 |
|
2754 "def PI(target, text=None):\n" /* public */ |
|
2755 " element = cElementTree.Element(ET.ProcessingInstruction)\n" |
|
2756 " element.text = target\n" |
|
2757 " if text:\n" |
|
2758 " element.text = element.text + ' ' + text\n" |
|
2759 " return element\n" |
|
2760 |
|
2761 " elem = cElementTree.Element(ET.PI)\n" |
|
2762 " elem.text = text\n" |
|
2763 " return elem\n" |
|
2764 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n" |
|
2765 |
|
2766 "def XML(text):\n" /* public */ |
|
2767 " parser = cElementTree.XMLParser()\n" |
|
2768 " parser.feed(text)\n" |
|
2769 " return parser.close()\n" |
|
2770 "cElementTree.XML = cElementTree.fromstring = XML\n" |
|
2771 |
|
2772 "def XMLID(text):\n" /* public */ |
|
2773 " tree = XML(text)\n" |
|
2774 " ids = {}\n" |
|
2775 " for elem in tree.getiterator():\n" |
|
2776 " id = elem.get('id')\n" |
|
2777 " if id:\n" |
|
2778 " ids[id] = elem\n" |
|
2779 " return tree, ids\n" |
|
2780 "cElementTree.XMLID = XMLID\n" |
|
2781 |
|
2782 "cElementTree.dump = ET.dump\n" |
|
2783 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n" |
|
2784 "cElementTree.iselement = ET.iselement\n" |
|
2785 "cElementTree.QName = ET.QName\n" |
|
2786 "cElementTree.tostring = ET.tostring\n" |
|
2787 "cElementTree.VERSION = '" VERSION "'\n" |
|
2788 "cElementTree.__version__ = '" VERSION "'\n" |
|
2789 "cElementTree.XMLParserError = SyntaxError\n" |
|
2790 |
|
2791 ); |
|
2792 |
|
2793 PyRun_String(bootstrap, Py_file_input, g, NULL); |
|
2794 |
|
2795 elementpath_obj = PyDict_GetItemString(g, "ElementPath"); |
|
2796 |
|
2797 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement"); |
|
2798 if (elementtree_copyelement_obj) { |
|
2799 /* reduce hack needed; enable reduce method */ |
|
2800 PyMethodDef* mp; |
|
2801 for (mp = element_methods; mp->ml_name; mp++) |
|
2802 if (mp->ml_meth == (PyCFunction) element_reduce) { |
|
2803 mp->ml_name = "__reduce__"; |
|
2804 break; |
|
2805 } |
|
2806 } else |
|
2807 PyErr_Clear(); |
|
2808 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy"); |
|
2809 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator"); |
|
2810 |
|
2811 #if defined(USE_PYEXPAT_CAPI) |
|
2812 /* link against pyexpat, if possible */ |
|
2813 capi = PyCObject_Import("pyexpat", "expat_CAPI"); |
|
2814 if (capi && |
|
2815 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 && |
|
2816 capi->size <= sizeof(*expat_capi) && |
|
2817 capi->MAJOR_VERSION == XML_MAJOR_VERSION && |
|
2818 capi->MINOR_VERSION == XML_MINOR_VERSION && |
|
2819 capi->MICRO_VERSION == XML_MICRO_VERSION) |
|
2820 expat_capi = capi; |
|
2821 else |
|
2822 expat_capi = NULL; |
|
2823 #endif |
|
2824 |
|
2825 } |