|
1 #include "Python.h" |
|
2 #include <ctype.h> |
|
3 |
|
4 #include "frameobject.h" |
|
5 #include "expat.h" |
|
6 |
|
7 #include "pyexpat.h" |
|
8 |
|
9 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION) |
|
10 |
|
11 #ifndef PyDoc_STRVAR |
|
12 |
|
13 /* |
|
14 * fdrake says: |
|
15 * Don't change the PyDoc_STR macro definition to (str), because |
|
16 * '''the parentheses cause compile failures |
|
17 * ("non-constant static initializer" or something like that) |
|
18 * on some platforms (Irix?)''' |
|
19 */ |
|
20 #define PyDoc_STR(str) str |
|
21 #define PyDoc_VAR(name) static char name[] |
|
22 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str) |
|
23 #endif |
|
24 |
|
25 #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2) |
|
26 /* In Python 2.0 and 2.1, disabling Unicode was not possible. */ |
|
27 #define Py_USING_UNICODE |
|
28 #else |
|
29 #define FIX_TRACE |
|
30 #endif |
|
31 |
|
32 enum HandlerTypes { |
|
33 StartElement, |
|
34 EndElement, |
|
35 ProcessingInstruction, |
|
36 CharacterData, |
|
37 UnparsedEntityDecl, |
|
38 NotationDecl, |
|
39 StartNamespaceDecl, |
|
40 EndNamespaceDecl, |
|
41 Comment, |
|
42 StartCdataSection, |
|
43 EndCdataSection, |
|
44 Default, |
|
45 DefaultHandlerExpand, |
|
46 NotStandalone, |
|
47 ExternalEntityRef, |
|
48 StartDoctypeDecl, |
|
49 EndDoctypeDecl, |
|
50 EntityDecl, |
|
51 XmlDecl, |
|
52 ElementDecl, |
|
53 AttlistDecl, |
|
54 #if XML_COMBINED_VERSION >= 19504 |
|
55 SkippedEntity, |
|
56 #endif |
|
57 _DummyDecl |
|
58 }; |
|
59 |
|
60 static PyObject *ErrorObject; |
|
61 |
|
62 /* ----------------------------------------------------- */ |
|
63 |
|
64 /* Declarations for objects of type xmlparser */ |
|
65 |
|
66 typedef struct { |
|
67 PyObject_HEAD |
|
68 |
|
69 XML_Parser itself; |
|
70 int returns_unicode; /* True if Unicode strings are returned; |
|
71 if false, UTF-8 strings are returned */ |
|
72 int ordered_attributes; /* Return attributes as a list. */ |
|
73 int specified_attributes; /* Report only specified attributes. */ |
|
74 int in_callback; /* Is a callback active? */ |
|
75 int ns_prefixes; /* Namespace-triplets mode? */ |
|
76 XML_Char *buffer; /* Buffer used when accumulating characters */ |
|
77 /* NULL if not enabled */ |
|
78 int buffer_size; /* Size of buffer, in XML_Char units */ |
|
79 int buffer_used; /* Buffer units in use */ |
|
80 PyObject *intern; /* Dictionary to intern strings */ |
|
81 PyObject **handlers; |
|
82 } xmlparseobject; |
|
83 |
|
84 #define CHARACTER_DATA_BUFFER_SIZE 8192 |
|
85 |
|
86 static PyTypeObject Xmlparsetype; |
|
87 |
|
88 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth); |
|
89 typedef void* xmlhandler; |
|
90 |
|
91 struct HandlerInfo { |
|
92 const char *name; |
|
93 xmlhandlersetter setter; |
|
94 xmlhandler handler; |
|
95 PyCodeObject *tb_code; |
|
96 PyObject *nameobj; |
|
97 }; |
|
98 |
|
99 static struct HandlerInfo handler_info[64]; |
|
100 |
|
101 /* Set an integer attribute on the error object; return true on success, |
|
102 * false on an exception. |
|
103 */ |
|
104 static int |
|
105 set_error_attr(PyObject *err, char *name, int value) |
|
106 { |
|
107 PyObject *v = PyInt_FromLong(value); |
|
108 |
|
109 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) { |
|
110 Py_XDECREF(v); |
|
111 return 0; |
|
112 } |
|
113 Py_DECREF(v); |
|
114 return 1; |
|
115 } |
|
116 |
|
117 /* Build and set an Expat exception, including positioning |
|
118 * information. Always returns NULL. |
|
119 */ |
|
120 static PyObject * |
|
121 set_error(xmlparseobject *self, enum XML_Error code) |
|
122 { |
|
123 PyObject *err; |
|
124 char buffer[256]; |
|
125 XML_Parser parser = self->itself; |
|
126 int lineno = XML_GetErrorLineNumber(parser); |
|
127 int column = XML_GetErrorColumnNumber(parser); |
|
128 |
|
129 /* There is no risk of overflowing this buffer, since |
|
130 even for 64-bit integers, there is sufficient space. */ |
|
131 sprintf(buffer, "%.200s: line %i, column %i", |
|
132 XML_ErrorString(code), lineno, column); |
|
133 err = PyObject_CallFunction(ErrorObject, "s", buffer); |
|
134 if ( err != NULL |
|
135 && set_error_attr(err, "code", code) |
|
136 && set_error_attr(err, "offset", column) |
|
137 && set_error_attr(err, "lineno", lineno)) { |
|
138 PyErr_SetObject(ErrorObject, err); |
|
139 } |
|
140 Py_XDECREF(err); |
|
141 return NULL; |
|
142 } |
|
143 |
|
144 static int |
|
145 have_handler(xmlparseobject *self, int type) |
|
146 { |
|
147 PyObject *handler = self->handlers[type]; |
|
148 return handler != NULL; |
|
149 } |
|
150 |
|
151 static PyObject * |
|
152 get_handler_name(struct HandlerInfo *hinfo) |
|
153 { |
|
154 PyObject *name = hinfo->nameobj; |
|
155 if (name == NULL) { |
|
156 name = PyString_FromString(hinfo->name); |
|
157 hinfo->nameobj = name; |
|
158 } |
|
159 Py_XINCREF(name); |
|
160 return name; |
|
161 } |
|
162 |
|
163 |
|
164 #ifdef Py_USING_UNICODE |
|
165 /* Convert a string of XML_Chars into a Unicode string. |
|
166 Returns None if str is a null pointer. */ |
|
167 |
|
168 static PyObject * |
|
169 conv_string_to_unicode(const XML_Char *str) |
|
170 { |
|
171 /* XXX currently this code assumes that XML_Char is 8-bit, |
|
172 and hence in UTF-8. */ |
|
173 /* UTF-8 from Expat, Unicode desired */ |
|
174 if (str == NULL) { |
|
175 Py_INCREF(Py_None); |
|
176 return Py_None; |
|
177 } |
|
178 return PyUnicode_DecodeUTF8(str, strlen(str), "strict"); |
|
179 } |
|
180 |
|
181 static PyObject * |
|
182 conv_string_len_to_unicode(const XML_Char *str, int len) |
|
183 { |
|
184 /* XXX currently this code assumes that XML_Char is 8-bit, |
|
185 and hence in UTF-8. */ |
|
186 /* UTF-8 from Expat, Unicode desired */ |
|
187 if (str == NULL) { |
|
188 Py_INCREF(Py_None); |
|
189 return Py_None; |
|
190 } |
|
191 return PyUnicode_DecodeUTF8((const char *)str, len, "strict"); |
|
192 } |
|
193 #endif |
|
194 |
|
195 /* Convert a string of XML_Chars into an 8-bit Python string. |
|
196 Returns None if str is a null pointer. */ |
|
197 |
|
198 static PyObject * |
|
199 conv_string_to_utf8(const XML_Char *str) |
|
200 { |
|
201 /* XXX currently this code assumes that XML_Char is 8-bit, |
|
202 and hence in UTF-8. */ |
|
203 /* UTF-8 from Expat, UTF-8 desired */ |
|
204 if (str == NULL) { |
|
205 Py_INCREF(Py_None); |
|
206 return Py_None; |
|
207 } |
|
208 return PyString_FromString(str); |
|
209 } |
|
210 |
|
211 static PyObject * |
|
212 conv_string_len_to_utf8(const XML_Char *str, int len) |
|
213 { |
|
214 /* XXX currently this code assumes that XML_Char is 8-bit, |
|
215 and hence in UTF-8. */ |
|
216 /* UTF-8 from Expat, UTF-8 desired */ |
|
217 if (str == NULL) { |
|
218 Py_INCREF(Py_None); |
|
219 return Py_None; |
|
220 } |
|
221 return PyString_FromStringAndSize((const char *)str, len); |
|
222 } |
|
223 |
|
224 /* Callback routines */ |
|
225 |
|
226 static void clear_handlers(xmlparseobject *self, int initial); |
|
227 |
|
228 /* This handler is used when an error has been detected, in the hope |
|
229 that actual parsing can be terminated early. This will only help |
|
230 if an external entity reference is encountered. */ |
|
231 static int |
|
232 error_external_entity_ref_handler(XML_Parser parser, |
|
233 const XML_Char *context, |
|
234 const XML_Char *base, |
|
235 const XML_Char *systemId, |
|
236 const XML_Char *publicId) |
|
237 { |
|
238 return 0; |
|
239 } |
|
240 |
|
241 /* Dummy character data handler used when an error (exception) has |
|
242 been detected, and the actual parsing can be terminated early. |
|
243 This is needed since character data handler can't be safely removed |
|
244 from within the character data handler, but can be replaced. It is |
|
245 used only from the character data handler trampoline, and must be |
|
246 used right after `flag_error()` is called. */ |
|
247 static void |
|
248 noop_character_data_handler(void *userData, const XML_Char *data, int len) |
|
249 { |
|
250 /* Do nothing. */ |
|
251 } |
|
252 |
|
253 static void |
|
254 flag_error(xmlparseobject *self) |
|
255 { |
|
256 clear_handlers(self, 0); |
|
257 XML_SetExternalEntityRefHandler(self->itself, |
|
258 error_external_entity_ref_handler); |
|
259 } |
|
260 |
|
261 static PyCodeObject* |
|
262 getcode(enum HandlerTypes slot, char* func_name, int lineno) |
|
263 { |
|
264 PyObject *code = NULL; |
|
265 PyObject *name = NULL; |
|
266 PyObject *nulltuple = NULL; |
|
267 PyObject *filename = NULL; |
|
268 |
|
269 if (handler_info[slot].tb_code == NULL) { |
|
270 code = PyString_FromString(""); |
|
271 if (code == NULL) |
|
272 goto failed; |
|
273 name = PyString_FromString(func_name); |
|
274 if (name == NULL) |
|
275 goto failed; |
|
276 nulltuple = PyTuple_New(0); |
|
277 if (nulltuple == NULL) |
|
278 goto failed; |
|
279 filename = PyString_FromString(__FILE__); |
|
280 handler_info[slot].tb_code = |
|
281 PyCode_New(0, /* argcount */ |
|
282 0, /* nlocals */ |
|
283 0, /* stacksize */ |
|
284 0, /* flags */ |
|
285 code, /* code */ |
|
286 nulltuple, /* consts */ |
|
287 nulltuple, /* names */ |
|
288 nulltuple, /* varnames */ |
|
289 #if PYTHON_API_VERSION >= 1010 |
|
290 nulltuple, /* freevars */ |
|
291 nulltuple, /* cellvars */ |
|
292 #endif |
|
293 filename, /* filename */ |
|
294 name, /* name */ |
|
295 lineno, /* firstlineno */ |
|
296 code /* lnotab */ |
|
297 ); |
|
298 if (handler_info[slot].tb_code == NULL) |
|
299 goto failed; |
|
300 Py_DECREF(code); |
|
301 Py_DECREF(nulltuple); |
|
302 Py_DECREF(filename); |
|
303 Py_DECREF(name); |
|
304 } |
|
305 return handler_info[slot].tb_code; |
|
306 failed: |
|
307 Py_XDECREF(code); |
|
308 Py_XDECREF(name); |
|
309 return NULL; |
|
310 } |
|
311 |
|
312 #ifdef FIX_TRACE |
|
313 static int |
|
314 trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val) |
|
315 { |
|
316 int result = 0; |
|
317 if (!tstate->use_tracing || tstate->tracing) |
|
318 return 0; |
|
319 if (tstate->c_profilefunc != NULL) { |
|
320 tstate->tracing++; |
|
321 result = tstate->c_profilefunc(tstate->c_profileobj, |
|
322 f, code , val); |
|
323 tstate->use_tracing = ((tstate->c_tracefunc != NULL) |
|
324 || (tstate->c_profilefunc != NULL)); |
|
325 tstate->tracing--; |
|
326 if (result) |
|
327 return result; |
|
328 } |
|
329 if (tstate->c_tracefunc != NULL) { |
|
330 tstate->tracing++; |
|
331 result = tstate->c_tracefunc(tstate->c_traceobj, |
|
332 f, code , val); |
|
333 tstate->use_tracing = ((tstate->c_tracefunc != NULL) |
|
334 || (tstate->c_profilefunc != NULL)); |
|
335 tstate->tracing--; |
|
336 } |
|
337 return result; |
|
338 } |
|
339 |
|
340 static int |
|
341 trace_frame_exc(PyThreadState *tstate, PyFrameObject *f) |
|
342 { |
|
343 PyObject *type, *value, *traceback, *arg; |
|
344 int err; |
|
345 |
|
346 if (tstate->c_tracefunc == NULL) |
|
347 return 0; |
|
348 |
|
349 PyErr_Fetch(&type, &value, &traceback); |
|
350 if (value == NULL) { |
|
351 value = Py_None; |
|
352 Py_INCREF(value); |
|
353 } |
|
354 #if PY_VERSION_HEX < 0x02040000 |
|
355 arg = Py_BuildValue("(OOO)", type, value, traceback); |
|
356 #else |
|
357 arg = PyTuple_Pack(3, type, value, traceback); |
|
358 #endif |
|
359 if (arg == NULL) { |
|
360 PyErr_Restore(type, value, traceback); |
|
361 return 0; |
|
362 } |
|
363 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg); |
|
364 Py_DECREF(arg); |
|
365 if (err == 0) |
|
366 PyErr_Restore(type, value, traceback); |
|
367 else { |
|
368 Py_XDECREF(type); |
|
369 Py_XDECREF(value); |
|
370 Py_XDECREF(traceback); |
|
371 } |
|
372 return err; |
|
373 } |
|
374 #endif |
|
375 |
|
376 static PyObject* |
|
377 call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args, |
|
378 xmlparseobject *self) |
|
379 { |
|
380 PyThreadState *tstate = PyThreadState_GET(); |
|
381 PyFrameObject *f; |
|
382 PyObject *res; |
|
383 |
|
384 if (c == NULL) |
|
385 return NULL; |
|
386 |
|
387 f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL); |
|
388 if (f == NULL) |
|
389 return NULL; |
|
390 tstate->frame = f; |
|
391 #ifdef FIX_TRACE |
|
392 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) { |
|
393 return NULL; |
|
394 } |
|
395 #endif |
|
396 res = PyEval_CallObject(func, args); |
|
397 if (res == NULL) { |
|
398 if (tstate->curexc_traceback == NULL) |
|
399 PyTraceBack_Here(f); |
|
400 XML_StopParser(self->itself, XML_FALSE); |
|
401 #ifdef FIX_TRACE |
|
402 if (trace_frame_exc(tstate, f) < 0) { |
|
403 return NULL; |
|
404 } |
|
405 } |
|
406 else { |
|
407 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) { |
|
408 Py_XDECREF(res); |
|
409 res = NULL; |
|
410 } |
|
411 } |
|
412 #else |
|
413 } |
|
414 #endif |
|
415 tstate->frame = f->f_back; |
|
416 Py_DECREF(f); |
|
417 return res; |
|
418 } |
|
419 |
|
420 #ifndef Py_USING_UNICODE |
|
421 #define STRING_CONV_FUNC conv_string_to_utf8 |
|
422 #else |
|
423 /* Python 2.0 and later versions, when built with Unicode support */ |
|
424 #define STRING_CONV_FUNC (self->returns_unicode \ |
|
425 ? conv_string_to_unicode : conv_string_to_utf8) |
|
426 #endif |
|
427 |
|
428 static PyObject* |
|
429 string_intern(xmlparseobject *self, const char* str) |
|
430 { |
|
431 PyObject *result = STRING_CONV_FUNC(str); |
|
432 PyObject *value; |
|
433 /* result can be NULL if the unicode conversion failed. */ |
|
434 if (!result) |
|
435 return result; |
|
436 if (!self->intern) |
|
437 return result; |
|
438 value = PyDict_GetItem(self->intern, result); |
|
439 if (!value) { |
|
440 if (PyDict_SetItem(self->intern, result, result) == 0) |
|
441 return result; |
|
442 else |
|
443 return NULL; |
|
444 } |
|
445 Py_INCREF(value); |
|
446 Py_DECREF(result); |
|
447 return value; |
|
448 } |
|
449 |
|
450 /* Return 0 on success, -1 on exception. |
|
451 * flag_error() will be called before return if needed. |
|
452 */ |
|
453 static int |
|
454 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len) |
|
455 { |
|
456 PyObject *args; |
|
457 PyObject *temp; |
|
458 |
|
459 args = PyTuple_New(1); |
|
460 if (args == NULL) |
|
461 return -1; |
|
462 #ifdef Py_USING_UNICODE |
|
463 temp = (self->returns_unicode |
|
464 ? conv_string_len_to_unicode(buffer, len) |
|
465 : conv_string_len_to_utf8(buffer, len)); |
|
466 #else |
|
467 temp = conv_string_len_to_utf8(buffer, len); |
|
468 #endif |
|
469 if (temp == NULL) { |
|
470 Py_DECREF(args); |
|
471 flag_error(self); |
|
472 XML_SetCharacterDataHandler(self->itself, |
|
473 noop_character_data_handler); |
|
474 return -1; |
|
475 } |
|
476 PyTuple_SET_ITEM(args, 0, temp); |
|
477 /* temp is now a borrowed reference; consider it unused. */ |
|
478 self->in_callback = 1; |
|
479 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__), |
|
480 self->handlers[CharacterData], args, self); |
|
481 /* temp is an owned reference again, or NULL */ |
|
482 self->in_callback = 0; |
|
483 Py_DECREF(args); |
|
484 if (temp == NULL) { |
|
485 flag_error(self); |
|
486 XML_SetCharacterDataHandler(self->itself, |
|
487 noop_character_data_handler); |
|
488 return -1; |
|
489 } |
|
490 Py_DECREF(temp); |
|
491 return 0; |
|
492 } |
|
493 |
|
494 static int |
|
495 flush_character_buffer(xmlparseobject *self) |
|
496 { |
|
497 int rc; |
|
498 if (self->buffer == NULL || self->buffer_used == 0) |
|
499 return 0; |
|
500 rc = call_character_handler(self, self->buffer, self->buffer_used); |
|
501 self->buffer_used = 0; |
|
502 return rc; |
|
503 } |
|
504 |
|
505 static void |
|
506 my_CharacterDataHandler(void *userData, const XML_Char *data, int len) |
|
507 { |
|
508 xmlparseobject *self = (xmlparseobject *) userData; |
|
509 if (self->buffer == NULL) |
|
510 call_character_handler(self, data, len); |
|
511 else { |
|
512 if ((self->buffer_used + len) > self->buffer_size) { |
|
513 if (flush_character_buffer(self) < 0) |
|
514 return; |
|
515 /* handler might have changed; drop the rest on the floor |
|
516 * if there isn't a handler anymore |
|
517 */ |
|
518 if (!have_handler(self, CharacterData)) |
|
519 return; |
|
520 } |
|
521 if (len > self->buffer_size) { |
|
522 call_character_handler(self, data, len); |
|
523 self->buffer_used = 0; |
|
524 } |
|
525 else { |
|
526 memcpy(self->buffer + self->buffer_used, |
|
527 data, len * sizeof(XML_Char)); |
|
528 self->buffer_used += len; |
|
529 } |
|
530 } |
|
531 } |
|
532 |
|
533 static void |
|
534 my_StartElementHandler(void *userData, |
|
535 const XML_Char *name, const XML_Char *atts[]) |
|
536 { |
|
537 xmlparseobject *self = (xmlparseobject *)userData; |
|
538 |
|
539 if (have_handler(self, StartElement)) { |
|
540 PyObject *container, *rv, *args; |
|
541 int i, max; |
|
542 |
|
543 if (flush_character_buffer(self) < 0) |
|
544 return; |
|
545 /* Set max to the number of slots filled in atts[]; max/2 is |
|
546 * the number of attributes we need to process. |
|
547 */ |
|
548 if (self->specified_attributes) { |
|
549 max = XML_GetSpecifiedAttributeCount(self->itself); |
|
550 } |
|
551 else { |
|
552 max = 0; |
|
553 while (atts[max] != NULL) |
|
554 max += 2; |
|
555 } |
|
556 /* Build the container. */ |
|
557 if (self->ordered_attributes) |
|
558 container = PyList_New(max); |
|
559 else |
|
560 container = PyDict_New(); |
|
561 if (container == NULL) { |
|
562 flag_error(self); |
|
563 return; |
|
564 } |
|
565 for (i = 0; i < max; i += 2) { |
|
566 PyObject *n = string_intern(self, (XML_Char *) atts[i]); |
|
567 PyObject *v; |
|
568 if (n == NULL) { |
|
569 flag_error(self); |
|
570 Py_DECREF(container); |
|
571 return; |
|
572 } |
|
573 v = STRING_CONV_FUNC((XML_Char *) atts[i+1]); |
|
574 if (v == NULL) { |
|
575 flag_error(self); |
|
576 Py_DECREF(container); |
|
577 Py_DECREF(n); |
|
578 return; |
|
579 } |
|
580 if (self->ordered_attributes) { |
|
581 PyList_SET_ITEM(container, i, n); |
|
582 PyList_SET_ITEM(container, i+1, v); |
|
583 } |
|
584 else if (PyDict_SetItem(container, n, v)) { |
|
585 flag_error(self); |
|
586 Py_DECREF(n); |
|
587 Py_DECREF(v); |
|
588 return; |
|
589 } |
|
590 else { |
|
591 Py_DECREF(n); |
|
592 Py_DECREF(v); |
|
593 } |
|
594 } |
|
595 args = string_intern(self, name); |
|
596 if (args != NULL) |
|
597 args = Py_BuildValue("(NN)", args, container); |
|
598 if (args == NULL) { |
|
599 Py_DECREF(container); |
|
600 return; |
|
601 } |
|
602 /* Container is now a borrowed reference; ignore it. */ |
|
603 self->in_callback = 1; |
|
604 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__), |
|
605 self->handlers[StartElement], args, self); |
|
606 self->in_callback = 0; |
|
607 Py_DECREF(args); |
|
608 if (rv == NULL) { |
|
609 flag_error(self); |
|
610 return; |
|
611 } |
|
612 Py_DECREF(rv); |
|
613 } |
|
614 } |
|
615 |
|
616 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \ |
|
617 RETURN, GETUSERDATA) \ |
|
618 static RC \ |
|
619 my_##NAME##Handler PARAMS {\ |
|
620 xmlparseobject *self = GETUSERDATA ; \ |
|
621 PyObject *args = NULL; \ |
|
622 PyObject *rv = NULL; \ |
|
623 INIT \ |
|
624 \ |
|
625 if (have_handler(self, NAME)) { \ |
|
626 if (flush_character_buffer(self) < 0) \ |
|
627 return RETURN; \ |
|
628 args = Py_BuildValue PARAM_FORMAT ;\ |
|
629 if (!args) { flag_error(self); return RETURN;} \ |
|
630 self->in_callback = 1; \ |
|
631 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \ |
|
632 self->handlers[NAME], args, self); \ |
|
633 self->in_callback = 0; \ |
|
634 Py_DECREF(args); \ |
|
635 if (rv == NULL) { \ |
|
636 flag_error(self); \ |
|
637 return RETURN; \ |
|
638 } \ |
|
639 CONVERSION \ |
|
640 Py_DECREF(rv); \ |
|
641 } \ |
|
642 return RETURN; \ |
|
643 } |
|
644 |
|
645 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \ |
|
646 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\ |
|
647 (xmlparseobject *)userData) |
|
648 |
|
649 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\ |
|
650 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \ |
|
651 rc = PyInt_AsLong(rv);, rc, \ |
|
652 (xmlparseobject *)userData) |
|
653 |
|
654 VOID_HANDLER(EndElement, |
|
655 (void *userData, const XML_Char *name), |
|
656 ("(N)", string_intern(self, name))) |
|
657 |
|
658 VOID_HANDLER(ProcessingInstruction, |
|
659 (void *userData, |
|
660 const XML_Char *target, |
|
661 const XML_Char *data), |
|
662 ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data)) |
|
663 |
|
664 VOID_HANDLER(UnparsedEntityDecl, |
|
665 (void *userData, |
|
666 const XML_Char *entityName, |
|
667 const XML_Char *base, |
|
668 const XML_Char *systemId, |
|
669 const XML_Char *publicId, |
|
670 const XML_Char *notationName), |
|
671 ("(NNNNN)", |
|
672 string_intern(self, entityName), string_intern(self, base), |
|
673 string_intern(self, systemId), string_intern(self, publicId), |
|
674 string_intern(self, notationName))) |
|
675 |
|
676 #ifndef Py_USING_UNICODE |
|
677 VOID_HANDLER(EntityDecl, |
|
678 (void *userData, |
|
679 const XML_Char *entityName, |
|
680 int is_parameter_entity, |
|
681 const XML_Char *value, |
|
682 int value_length, |
|
683 const XML_Char *base, |
|
684 const XML_Char *systemId, |
|
685 const XML_Char *publicId, |
|
686 const XML_Char *notationName), |
|
687 ("NiNNNNN", |
|
688 string_intern(self, entityName), is_parameter_entity, |
|
689 conv_string_len_to_utf8(value, value_length), |
|
690 string_intern(self, base), string_intern(self, systemId), |
|
691 string_intern(self, publicId), |
|
692 string_intern(self, notationName))) |
|
693 #else |
|
694 VOID_HANDLER(EntityDecl, |
|
695 (void *userData, |
|
696 const XML_Char *entityName, |
|
697 int is_parameter_entity, |
|
698 const XML_Char *value, |
|
699 int value_length, |
|
700 const XML_Char *base, |
|
701 const XML_Char *systemId, |
|
702 const XML_Char *publicId, |
|
703 const XML_Char *notationName), |
|
704 ("NiNNNNN", |
|
705 string_intern(self, entityName), is_parameter_entity, |
|
706 (self->returns_unicode |
|
707 ? conv_string_len_to_unicode(value, value_length) |
|
708 : conv_string_len_to_utf8(value, value_length)), |
|
709 string_intern(self, base), string_intern(self, systemId), |
|
710 string_intern(self, publicId), |
|
711 string_intern(self, notationName))) |
|
712 #endif |
|
713 |
|
714 VOID_HANDLER(XmlDecl, |
|
715 (void *userData, |
|
716 const XML_Char *version, |
|
717 const XML_Char *encoding, |
|
718 int standalone), |
|
719 ("(O&O&i)", |
|
720 STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding, |
|
721 standalone)) |
|
722 |
|
723 static PyObject * |
|
724 conv_content_model(XML_Content * const model, |
|
725 PyObject *(*conv_string)(const XML_Char *)) |
|
726 { |
|
727 PyObject *result = NULL; |
|
728 PyObject *children = PyTuple_New(model->numchildren); |
|
729 int i; |
|
730 |
|
731 if (children != NULL) { |
|
732 assert(model->numchildren < INT_MAX); |
|
733 for (i = 0; i < (int)model->numchildren; ++i) { |
|
734 PyObject *child = conv_content_model(&model->children[i], |
|
735 conv_string); |
|
736 if (child == NULL) { |
|
737 Py_XDECREF(children); |
|
738 return NULL; |
|
739 } |
|
740 PyTuple_SET_ITEM(children, i, child); |
|
741 } |
|
742 result = Py_BuildValue("(iiO&N)", |
|
743 model->type, model->quant, |
|
744 conv_string,model->name, children); |
|
745 } |
|
746 return result; |
|
747 } |
|
748 |
|
749 static void |
|
750 my_ElementDeclHandler(void *userData, |
|
751 const XML_Char *name, |
|
752 XML_Content *model) |
|
753 { |
|
754 xmlparseobject *self = (xmlparseobject *)userData; |
|
755 PyObject *args = NULL; |
|
756 |
|
757 if (have_handler(self, ElementDecl)) { |
|
758 PyObject *rv = NULL; |
|
759 PyObject *modelobj, *nameobj; |
|
760 |
|
761 if (flush_character_buffer(self) < 0) |
|
762 goto finally; |
|
763 #ifdef Py_USING_UNICODE |
|
764 modelobj = conv_content_model(model, |
|
765 (self->returns_unicode |
|
766 ? conv_string_to_unicode |
|
767 : conv_string_to_utf8)); |
|
768 #else |
|
769 modelobj = conv_content_model(model, conv_string_to_utf8); |
|
770 #endif |
|
771 if (modelobj == NULL) { |
|
772 flag_error(self); |
|
773 goto finally; |
|
774 } |
|
775 nameobj = string_intern(self, name); |
|
776 if (nameobj == NULL) { |
|
777 Py_DECREF(modelobj); |
|
778 flag_error(self); |
|
779 goto finally; |
|
780 } |
|
781 args = Py_BuildValue("NN", nameobj, modelobj); |
|
782 if (args == NULL) { |
|
783 Py_DECREF(modelobj); |
|
784 flag_error(self); |
|
785 goto finally; |
|
786 } |
|
787 self->in_callback = 1; |
|
788 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__), |
|
789 self->handlers[ElementDecl], args, self); |
|
790 self->in_callback = 0; |
|
791 if (rv == NULL) { |
|
792 flag_error(self); |
|
793 goto finally; |
|
794 } |
|
795 Py_DECREF(rv); |
|
796 } |
|
797 finally: |
|
798 Py_XDECREF(args); |
|
799 XML_FreeContentModel(self->itself, model); |
|
800 return; |
|
801 } |
|
802 |
|
803 VOID_HANDLER(AttlistDecl, |
|
804 (void *userData, |
|
805 const XML_Char *elname, |
|
806 const XML_Char *attname, |
|
807 const XML_Char *att_type, |
|
808 const XML_Char *dflt, |
|
809 int isrequired), |
|
810 ("(NNO&O&i)", |
|
811 string_intern(self, elname), string_intern(self, attname), |
|
812 STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt, |
|
813 isrequired)) |
|
814 |
|
815 #if XML_COMBINED_VERSION >= 19504 |
|
816 VOID_HANDLER(SkippedEntity, |
|
817 (void *userData, |
|
818 const XML_Char *entityName, |
|
819 int is_parameter_entity), |
|
820 ("Ni", |
|
821 string_intern(self, entityName), is_parameter_entity)) |
|
822 #endif |
|
823 |
|
824 VOID_HANDLER(NotationDecl, |
|
825 (void *userData, |
|
826 const XML_Char *notationName, |
|
827 const XML_Char *base, |
|
828 const XML_Char *systemId, |
|
829 const XML_Char *publicId), |
|
830 ("(NNNN)", |
|
831 string_intern(self, notationName), string_intern(self, base), |
|
832 string_intern(self, systemId), string_intern(self, publicId))) |
|
833 |
|
834 VOID_HANDLER(StartNamespaceDecl, |
|
835 (void *userData, |
|
836 const XML_Char *prefix, |
|
837 const XML_Char *uri), |
|
838 ("(NN)", |
|
839 string_intern(self, prefix), string_intern(self, uri))) |
|
840 |
|
841 VOID_HANDLER(EndNamespaceDecl, |
|
842 (void *userData, |
|
843 const XML_Char *prefix), |
|
844 ("(N)", string_intern(self, prefix))) |
|
845 |
|
846 VOID_HANDLER(Comment, |
|
847 (void *userData, const XML_Char *data), |
|
848 ("(O&)", STRING_CONV_FUNC,data)) |
|
849 |
|
850 VOID_HANDLER(StartCdataSection, |
|
851 (void *userData), |
|
852 ("()")) |
|
853 |
|
854 VOID_HANDLER(EndCdataSection, |
|
855 (void *userData), |
|
856 ("()")) |
|
857 |
|
858 #ifndef Py_USING_UNICODE |
|
859 VOID_HANDLER(Default, |
|
860 (void *userData, const XML_Char *s, int len), |
|
861 ("(N)", conv_string_len_to_utf8(s,len))) |
|
862 |
|
863 VOID_HANDLER(DefaultHandlerExpand, |
|
864 (void *userData, const XML_Char *s, int len), |
|
865 ("(N)", conv_string_len_to_utf8(s,len))) |
|
866 #else |
|
867 VOID_HANDLER(Default, |
|
868 (void *userData, const XML_Char *s, int len), |
|
869 ("(N)", (self->returns_unicode |
|
870 ? conv_string_len_to_unicode(s,len) |
|
871 : conv_string_len_to_utf8(s,len)))) |
|
872 |
|
873 VOID_HANDLER(DefaultHandlerExpand, |
|
874 (void *userData, const XML_Char *s, int len), |
|
875 ("(N)", (self->returns_unicode |
|
876 ? conv_string_len_to_unicode(s,len) |
|
877 : conv_string_len_to_utf8(s,len)))) |
|
878 #endif |
|
879 |
|
880 INT_HANDLER(NotStandalone, |
|
881 (void *userData), |
|
882 ("()")) |
|
883 |
|
884 RC_HANDLER(int, ExternalEntityRef, |
|
885 (XML_Parser parser, |
|
886 const XML_Char *context, |
|
887 const XML_Char *base, |
|
888 const XML_Char *systemId, |
|
889 const XML_Char *publicId), |
|
890 int rc=0;, |
|
891 ("(O&NNN)", |
|
892 STRING_CONV_FUNC,context, string_intern(self, base), |
|
893 string_intern(self, systemId), string_intern(self, publicId)), |
|
894 rc = PyInt_AsLong(rv);, rc, |
|
895 XML_GetUserData(parser)) |
|
896 |
|
897 /* XXX UnknownEncodingHandler */ |
|
898 |
|
899 VOID_HANDLER(StartDoctypeDecl, |
|
900 (void *userData, const XML_Char *doctypeName, |
|
901 const XML_Char *sysid, const XML_Char *pubid, |
|
902 int has_internal_subset), |
|
903 ("(NNNi)", string_intern(self, doctypeName), |
|
904 string_intern(self, sysid), string_intern(self, pubid), |
|
905 has_internal_subset)) |
|
906 |
|
907 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()")) |
|
908 |
|
909 /* ---------------------------------------------------------------- */ |
|
910 |
|
911 static PyObject * |
|
912 get_parse_result(xmlparseobject *self, int rv) |
|
913 { |
|
914 if (PyErr_Occurred()) { |
|
915 return NULL; |
|
916 } |
|
917 if (rv == 0) { |
|
918 return set_error(self, XML_GetErrorCode(self->itself)); |
|
919 } |
|
920 if (flush_character_buffer(self) < 0) { |
|
921 return NULL; |
|
922 } |
|
923 return PyInt_FromLong(rv); |
|
924 } |
|
925 |
|
926 PyDoc_STRVAR(xmlparse_Parse__doc__, |
|
927 "Parse(data[, isfinal])\n\ |
|
928 Parse XML data. `isfinal' should be true at end of input."); |
|
929 |
|
930 static PyObject * |
|
931 xmlparse_Parse(xmlparseobject *self, PyObject *args) |
|
932 { |
|
933 char *s; |
|
934 int slen; |
|
935 int isFinal = 0; |
|
936 |
|
937 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal)) |
|
938 return NULL; |
|
939 |
|
940 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal)); |
|
941 } |
|
942 |
|
943 /* File reading copied from cPickle */ |
|
944 |
|
945 #define BUF_SIZE 2048 |
|
946 |
|
947 static int |
|
948 readinst(char *buf, int buf_size, PyObject *meth) |
|
949 { |
|
950 PyObject *arg = NULL; |
|
951 PyObject *bytes = NULL; |
|
952 PyObject *str = NULL; |
|
953 int len = -1; |
|
954 |
|
955 if ((bytes = PyInt_FromLong(buf_size)) == NULL) |
|
956 goto finally; |
|
957 |
|
958 if ((arg = PyTuple_New(1)) == NULL) { |
|
959 Py_DECREF(bytes); |
|
960 goto finally; |
|
961 } |
|
962 |
|
963 PyTuple_SET_ITEM(arg, 0, bytes); |
|
964 |
|
965 #if PY_VERSION_HEX < 0x02020000 |
|
966 str = PyObject_CallObject(meth, arg); |
|
967 #else |
|
968 str = PyObject_Call(meth, arg, NULL); |
|
969 #endif |
|
970 if (str == NULL) |
|
971 goto finally; |
|
972 |
|
973 /* XXX what to do if it returns a Unicode string? */ |
|
974 if (!PyString_Check(str)) { |
|
975 PyErr_Format(PyExc_TypeError, |
|
976 "read() did not return a string object (type=%.400s)", |
|
977 Py_TYPE(str)->tp_name); |
|
978 goto finally; |
|
979 } |
|
980 len = PyString_GET_SIZE(str); |
|
981 if (len > buf_size) { |
|
982 PyErr_Format(PyExc_ValueError, |
|
983 "read() returned too much data: " |
|
984 "%i bytes requested, %i returned", |
|
985 buf_size, len); |
|
986 goto finally; |
|
987 } |
|
988 memcpy(buf, PyString_AsString(str), len); |
|
989 finally: |
|
990 Py_XDECREF(arg); |
|
991 Py_XDECREF(str); |
|
992 return len; |
|
993 } |
|
994 |
|
995 PyDoc_STRVAR(xmlparse_ParseFile__doc__, |
|
996 "ParseFile(file)\n\ |
|
997 Parse XML data from file-like object."); |
|
998 |
|
999 static PyObject * |
|
1000 xmlparse_ParseFile(xmlparseobject *self, PyObject *f) |
|
1001 { |
|
1002 int rv = 1; |
|
1003 FILE *fp; |
|
1004 PyObject *readmethod = NULL; |
|
1005 |
|
1006 if (PyFile_Check(f)) { |
|
1007 fp = PyFile_AsFile(f); |
|
1008 } |
|
1009 else { |
|
1010 fp = NULL; |
|
1011 readmethod = PyObject_GetAttrString(f, "read"); |
|
1012 if (readmethod == NULL) { |
|
1013 PyErr_Clear(); |
|
1014 PyErr_SetString(PyExc_TypeError, |
|
1015 "argument must have 'read' attribute"); |
|
1016 return NULL; |
|
1017 } |
|
1018 } |
|
1019 for (;;) { |
|
1020 int bytes_read; |
|
1021 void *buf = XML_GetBuffer(self->itself, BUF_SIZE); |
|
1022 if (buf == NULL) { |
|
1023 Py_XDECREF(readmethod); |
|
1024 return PyErr_NoMemory(); |
|
1025 } |
|
1026 |
|
1027 if (fp) { |
|
1028 bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp); |
|
1029 if (bytes_read < 0) { |
|
1030 PyErr_SetFromErrno(PyExc_IOError); |
|
1031 return NULL; |
|
1032 } |
|
1033 } |
|
1034 else { |
|
1035 bytes_read = readinst(buf, BUF_SIZE, readmethod); |
|
1036 if (bytes_read < 0) { |
|
1037 Py_DECREF(readmethod); |
|
1038 return NULL; |
|
1039 } |
|
1040 } |
|
1041 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0); |
|
1042 if (PyErr_Occurred()) { |
|
1043 Py_XDECREF(readmethod); |
|
1044 return NULL; |
|
1045 } |
|
1046 |
|
1047 if (!rv || bytes_read == 0) |
|
1048 break; |
|
1049 } |
|
1050 Py_XDECREF(readmethod); |
|
1051 return get_parse_result(self, rv); |
|
1052 } |
|
1053 |
|
1054 PyDoc_STRVAR(xmlparse_SetBase__doc__, |
|
1055 "SetBase(base_url)\n\ |
|
1056 Set the base URL for the parser."); |
|
1057 |
|
1058 static PyObject * |
|
1059 xmlparse_SetBase(xmlparseobject *self, PyObject *args) |
|
1060 { |
|
1061 char *base; |
|
1062 |
|
1063 if (!PyArg_ParseTuple(args, "s:SetBase", &base)) |
|
1064 return NULL; |
|
1065 if (!XML_SetBase(self->itself, base)) { |
|
1066 return PyErr_NoMemory(); |
|
1067 } |
|
1068 Py_INCREF(Py_None); |
|
1069 return Py_None; |
|
1070 } |
|
1071 |
|
1072 PyDoc_STRVAR(xmlparse_GetBase__doc__, |
|
1073 "GetBase() -> url\n\ |
|
1074 Return base URL string for the parser."); |
|
1075 |
|
1076 static PyObject * |
|
1077 xmlparse_GetBase(xmlparseobject *self, PyObject *unused) |
|
1078 { |
|
1079 return Py_BuildValue("z", XML_GetBase(self->itself)); |
|
1080 } |
|
1081 |
|
1082 PyDoc_STRVAR(xmlparse_GetInputContext__doc__, |
|
1083 "GetInputContext() -> string\n\ |
|
1084 Return the untranslated text of the input that caused the current event.\n\ |
|
1085 If the event was generated by a large amount of text (such as a start tag\n\ |
|
1086 for an element with many attributes), not all of the text may be available."); |
|
1087 |
|
1088 static PyObject * |
|
1089 xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused) |
|
1090 { |
|
1091 if (self->in_callback) { |
|
1092 int offset, size; |
|
1093 const char *buffer |
|
1094 = XML_GetInputContext(self->itself, &offset, &size); |
|
1095 |
|
1096 if (buffer != NULL) |
|
1097 return PyString_FromStringAndSize(buffer + offset, |
|
1098 size - offset); |
|
1099 else |
|
1100 Py_RETURN_NONE; |
|
1101 } |
|
1102 else |
|
1103 Py_RETURN_NONE; |
|
1104 } |
|
1105 |
|
1106 PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__, |
|
1107 "ExternalEntityParserCreate(context[, encoding])\n\ |
|
1108 Create a parser for parsing an external entity based on the\n\ |
|
1109 information passed to the ExternalEntityRefHandler."); |
|
1110 |
|
1111 static PyObject * |
|
1112 xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args) |
|
1113 { |
|
1114 char *context; |
|
1115 char *encoding = NULL; |
|
1116 xmlparseobject *new_parser; |
|
1117 int i; |
|
1118 |
|
1119 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate", |
|
1120 &context, &encoding)) { |
|
1121 return NULL; |
|
1122 } |
|
1123 |
|
1124 #ifndef Py_TPFLAGS_HAVE_GC |
|
1125 /* Python versions 2.0 and 2.1 */ |
|
1126 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype); |
|
1127 #else |
|
1128 /* Python versions 2.2 and later */ |
|
1129 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype); |
|
1130 #endif |
|
1131 |
|
1132 if (new_parser == NULL) |
|
1133 return NULL; |
|
1134 new_parser->buffer_size = self->buffer_size; |
|
1135 new_parser->buffer_used = 0; |
|
1136 if (self->buffer != NULL) { |
|
1137 new_parser->buffer = malloc(new_parser->buffer_size); |
|
1138 if (new_parser->buffer == NULL) { |
|
1139 #ifndef Py_TPFLAGS_HAVE_GC |
|
1140 /* Code for versions 2.0 and 2.1 */ |
|
1141 PyObject_Del(new_parser); |
|
1142 #else |
|
1143 /* Code for versions 2.2 and later. */ |
|
1144 PyObject_GC_Del(new_parser); |
|
1145 #endif |
|
1146 return PyErr_NoMemory(); |
|
1147 } |
|
1148 } |
|
1149 else |
|
1150 new_parser->buffer = NULL; |
|
1151 new_parser->returns_unicode = self->returns_unicode; |
|
1152 new_parser->ordered_attributes = self->ordered_attributes; |
|
1153 new_parser->specified_attributes = self->specified_attributes; |
|
1154 new_parser->in_callback = 0; |
|
1155 new_parser->ns_prefixes = self->ns_prefixes; |
|
1156 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context, |
|
1157 encoding); |
|
1158 new_parser->handlers = 0; |
|
1159 new_parser->intern = self->intern; |
|
1160 Py_XINCREF(new_parser->intern); |
|
1161 #ifdef Py_TPFLAGS_HAVE_GC |
|
1162 PyObject_GC_Track(new_parser); |
|
1163 #else |
|
1164 PyObject_GC_Init(new_parser); |
|
1165 #endif |
|
1166 |
|
1167 if (!new_parser->itself) { |
|
1168 Py_DECREF(new_parser); |
|
1169 return PyErr_NoMemory(); |
|
1170 } |
|
1171 |
|
1172 XML_SetUserData(new_parser->itself, (void *)new_parser); |
|
1173 |
|
1174 /* allocate and clear handlers first */ |
|
1175 for (i = 0; handler_info[i].name != NULL; i++) |
|
1176 /* do nothing */; |
|
1177 |
|
1178 new_parser->handlers = malloc(sizeof(PyObject *) * i); |
|
1179 if (!new_parser->handlers) { |
|
1180 Py_DECREF(new_parser); |
|
1181 return PyErr_NoMemory(); |
|
1182 } |
|
1183 clear_handlers(new_parser, 1); |
|
1184 |
|
1185 /* then copy handlers from self */ |
|
1186 for (i = 0; handler_info[i].name != NULL; i++) { |
|
1187 PyObject *handler = self->handlers[i]; |
|
1188 if (handler != NULL) { |
|
1189 Py_INCREF(handler); |
|
1190 new_parser->handlers[i] = handler; |
|
1191 handler_info[i].setter(new_parser->itself, |
|
1192 handler_info[i].handler); |
|
1193 } |
|
1194 } |
|
1195 return (PyObject *)new_parser; |
|
1196 } |
|
1197 |
|
1198 PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__, |
|
1199 "SetParamEntityParsing(flag) -> success\n\ |
|
1200 Controls parsing of parameter entities (including the external DTD\n\ |
|
1201 subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\ |
|
1202 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\ |
|
1203 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\ |
|
1204 was successful."); |
|
1205 |
|
1206 static PyObject* |
|
1207 xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args) |
|
1208 { |
|
1209 int flag; |
|
1210 if (!PyArg_ParseTuple(args, "i", &flag)) |
|
1211 return NULL; |
|
1212 flag = XML_SetParamEntityParsing(p->itself, flag); |
|
1213 return PyInt_FromLong(flag); |
|
1214 } |
|
1215 |
|
1216 |
|
1217 #if XML_COMBINED_VERSION >= 19505 |
|
1218 PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__, |
|
1219 "UseForeignDTD([flag])\n\ |
|
1220 Allows the application to provide an artificial external subset if one is\n\ |
|
1221 not specified as part of the document instance. This readily allows the\n\ |
|
1222 use of a 'default' document type controlled by the application, while still\n\ |
|
1223 getting the advantage of providing document type information to the parser.\n\ |
|
1224 'flag' defaults to True if not provided."); |
|
1225 |
|
1226 static PyObject * |
|
1227 xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args) |
|
1228 { |
|
1229 PyObject *flagobj = NULL; |
|
1230 XML_Bool flag = XML_TRUE; |
|
1231 enum XML_Error rc; |
|
1232 if (!PyArg_UnpackTuple(args, "UseForeignDTD", 0, 1, &flagobj)) |
|
1233 return NULL; |
|
1234 if (flagobj != NULL) |
|
1235 flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE; |
|
1236 rc = XML_UseForeignDTD(self->itself, flag); |
|
1237 if (rc != XML_ERROR_NONE) { |
|
1238 return set_error(self, rc); |
|
1239 } |
|
1240 Py_INCREF(Py_None); |
|
1241 return Py_None; |
|
1242 } |
|
1243 #endif |
|
1244 |
|
1245 static struct PyMethodDef xmlparse_methods[] = { |
|
1246 {"Parse", (PyCFunction)xmlparse_Parse, |
|
1247 METH_VARARGS, xmlparse_Parse__doc__}, |
|
1248 {"ParseFile", (PyCFunction)xmlparse_ParseFile, |
|
1249 METH_O, xmlparse_ParseFile__doc__}, |
|
1250 {"SetBase", (PyCFunction)xmlparse_SetBase, |
|
1251 METH_VARARGS, xmlparse_SetBase__doc__}, |
|
1252 {"GetBase", (PyCFunction)xmlparse_GetBase, |
|
1253 METH_NOARGS, xmlparse_GetBase__doc__}, |
|
1254 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate, |
|
1255 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__}, |
|
1256 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing, |
|
1257 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__}, |
|
1258 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext, |
|
1259 METH_NOARGS, xmlparse_GetInputContext__doc__}, |
|
1260 #if XML_COMBINED_VERSION >= 19505 |
|
1261 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD, |
|
1262 METH_VARARGS, xmlparse_UseForeignDTD__doc__}, |
|
1263 #endif |
|
1264 {NULL, NULL} /* sentinel */ |
|
1265 }; |
|
1266 |
|
1267 /* ---------- */ |
|
1268 |
|
1269 |
|
1270 #ifdef Py_USING_UNICODE |
|
1271 |
|
1272 /* pyexpat international encoding support. |
|
1273 Make it as simple as possible. |
|
1274 */ |
|
1275 |
|
1276 static char template_buffer[257]; |
|
1277 PyObject *template_string = NULL; |
|
1278 |
|
1279 static void |
|
1280 init_template_buffer(void) |
|
1281 { |
|
1282 int i; |
|
1283 for (i = 0; i < 256; i++) { |
|
1284 template_buffer[i] = i; |
|
1285 } |
|
1286 template_buffer[256] = 0; |
|
1287 } |
|
1288 |
|
1289 static int |
|
1290 PyUnknownEncodingHandler(void *encodingHandlerData, |
|
1291 const XML_Char *name, |
|
1292 XML_Encoding *info) |
|
1293 { |
|
1294 PyUnicodeObject *_u_string = NULL; |
|
1295 int result = 0; |
|
1296 int i; |
|
1297 |
|
1298 /* Yes, supports only 8bit encodings */ |
|
1299 _u_string = (PyUnicodeObject *) |
|
1300 PyUnicode_Decode(template_buffer, 256, name, "replace"); |
|
1301 |
|
1302 if (_u_string == NULL) |
|
1303 return result; |
|
1304 |
|
1305 for (i = 0; i < 256; i++) { |
|
1306 /* Stupid to access directly, but fast */ |
|
1307 Py_UNICODE c = _u_string->str[i]; |
|
1308 if (c == Py_UNICODE_REPLACEMENT_CHARACTER) |
|
1309 info->map[i] = -1; |
|
1310 else |
|
1311 info->map[i] = c; |
|
1312 } |
|
1313 info->data = NULL; |
|
1314 info->convert = NULL; |
|
1315 info->release = NULL; |
|
1316 result = 1; |
|
1317 Py_DECREF(_u_string); |
|
1318 return result; |
|
1319 } |
|
1320 |
|
1321 #endif |
|
1322 |
|
1323 static PyObject * |
|
1324 newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern) |
|
1325 { |
|
1326 int i; |
|
1327 xmlparseobject *self; |
|
1328 |
|
1329 #ifdef Py_TPFLAGS_HAVE_GC |
|
1330 /* Code for versions 2.2 and later */ |
|
1331 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype); |
|
1332 #else |
|
1333 self = PyObject_New(xmlparseobject, &Xmlparsetype); |
|
1334 #endif |
|
1335 if (self == NULL) |
|
1336 return NULL; |
|
1337 |
|
1338 #ifdef Py_USING_UNICODE |
|
1339 self->returns_unicode = 1; |
|
1340 #else |
|
1341 self->returns_unicode = 0; |
|
1342 #endif |
|
1343 |
|
1344 self->buffer = NULL; |
|
1345 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE; |
|
1346 self->buffer_used = 0; |
|
1347 self->ordered_attributes = 0; |
|
1348 self->specified_attributes = 0; |
|
1349 self->in_callback = 0; |
|
1350 self->ns_prefixes = 0; |
|
1351 self->handlers = NULL; |
|
1352 if (namespace_separator != NULL) { |
|
1353 self->itself = XML_ParserCreateNS(encoding, *namespace_separator); |
|
1354 } |
|
1355 else { |
|
1356 self->itself = XML_ParserCreate(encoding); |
|
1357 } |
|
1358 self->intern = intern; |
|
1359 Py_XINCREF(self->intern); |
|
1360 #ifdef Py_TPFLAGS_HAVE_GC |
|
1361 PyObject_GC_Track(self); |
|
1362 #else |
|
1363 PyObject_GC_Init(self); |
|
1364 #endif |
|
1365 if (self->itself == NULL) { |
|
1366 PyErr_SetString(PyExc_RuntimeError, |
|
1367 "XML_ParserCreate failed"); |
|
1368 Py_DECREF(self); |
|
1369 return NULL; |
|
1370 } |
|
1371 XML_SetUserData(self->itself, (void *)self); |
|
1372 #ifdef Py_USING_UNICODE |
|
1373 XML_SetUnknownEncodingHandler(self->itself, |
|
1374 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL); |
|
1375 #endif |
|
1376 |
|
1377 for (i = 0; handler_info[i].name != NULL; i++) |
|
1378 /* do nothing */; |
|
1379 |
|
1380 self->handlers = malloc(sizeof(PyObject *) * i); |
|
1381 if (!self->handlers) { |
|
1382 Py_DECREF(self); |
|
1383 return PyErr_NoMemory(); |
|
1384 } |
|
1385 clear_handlers(self, 1); |
|
1386 |
|
1387 return (PyObject*)self; |
|
1388 } |
|
1389 |
|
1390 |
|
1391 static void |
|
1392 xmlparse_dealloc(xmlparseobject *self) |
|
1393 { |
|
1394 int i; |
|
1395 #ifdef Py_TPFLAGS_HAVE_GC |
|
1396 PyObject_GC_UnTrack(self); |
|
1397 #else |
|
1398 PyObject_GC_Fini(self); |
|
1399 #endif |
|
1400 if (self->itself != NULL) |
|
1401 XML_ParserFree(self->itself); |
|
1402 self->itself = NULL; |
|
1403 |
|
1404 if (self->handlers != NULL) { |
|
1405 PyObject *temp; |
|
1406 for (i = 0; handler_info[i].name != NULL; i++) { |
|
1407 temp = self->handlers[i]; |
|
1408 self->handlers[i] = NULL; |
|
1409 Py_XDECREF(temp); |
|
1410 } |
|
1411 free(self->handlers); |
|
1412 self->handlers = NULL; |
|
1413 } |
|
1414 if (self->buffer != NULL) { |
|
1415 free(self->buffer); |
|
1416 self->buffer = NULL; |
|
1417 } |
|
1418 Py_XDECREF(self->intern); |
|
1419 #ifndef Py_TPFLAGS_HAVE_GC |
|
1420 /* Code for versions 2.0 and 2.1 */ |
|
1421 PyObject_Del(self); |
|
1422 #else |
|
1423 /* Code for versions 2.2 and later. */ |
|
1424 PyObject_GC_Del(self); |
|
1425 #endif |
|
1426 } |
|
1427 |
|
1428 static int |
|
1429 handlername2int(const char *name) |
|
1430 { |
|
1431 int i; |
|
1432 for (i = 0; handler_info[i].name != NULL; i++) { |
|
1433 if (strcmp(name, handler_info[i].name) == 0) { |
|
1434 return i; |
|
1435 } |
|
1436 } |
|
1437 return -1; |
|
1438 } |
|
1439 |
|
1440 static PyObject * |
|
1441 get_pybool(int istrue) |
|
1442 { |
|
1443 PyObject *result = istrue ? Py_True : Py_False; |
|
1444 Py_INCREF(result); |
|
1445 return result; |
|
1446 } |
|
1447 |
|
1448 static PyObject * |
|
1449 xmlparse_getattr(xmlparseobject *self, char *name) |
|
1450 { |
|
1451 int handlernum = handlername2int(name); |
|
1452 |
|
1453 if (handlernum != -1) { |
|
1454 PyObject *result = self->handlers[handlernum]; |
|
1455 if (result == NULL) |
|
1456 result = Py_None; |
|
1457 Py_INCREF(result); |
|
1458 return result; |
|
1459 } |
|
1460 if (name[0] == 'E') { |
|
1461 if (strcmp(name, "ErrorCode") == 0) |
|
1462 return PyInt_FromLong((long) |
|
1463 XML_GetErrorCode(self->itself)); |
|
1464 if (strcmp(name, "ErrorLineNumber") == 0) |
|
1465 return PyInt_FromLong((long) |
|
1466 XML_GetErrorLineNumber(self->itself)); |
|
1467 if (strcmp(name, "ErrorColumnNumber") == 0) |
|
1468 return PyInt_FromLong((long) |
|
1469 XML_GetErrorColumnNumber(self->itself)); |
|
1470 if (strcmp(name, "ErrorByteIndex") == 0) |
|
1471 return PyInt_FromLong((long) |
|
1472 XML_GetErrorByteIndex(self->itself)); |
|
1473 } |
|
1474 if (name[0] == 'C') { |
|
1475 if (strcmp(name, "CurrentLineNumber") == 0) |
|
1476 return PyInt_FromLong((long) |
|
1477 XML_GetCurrentLineNumber(self->itself)); |
|
1478 if (strcmp(name, "CurrentColumnNumber") == 0) |
|
1479 return PyInt_FromLong((long) |
|
1480 XML_GetCurrentColumnNumber(self->itself)); |
|
1481 if (strcmp(name, "CurrentByteIndex") == 0) |
|
1482 return PyInt_FromLong((long) |
|
1483 XML_GetCurrentByteIndex(self->itself)); |
|
1484 } |
|
1485 if (name[0] == 'b') { |
|
1486 if (strcmp(name, "buffer_size") == 0) |
|
1487 return PyInt_FromLong((long) self->buffer_size); |
|
1488 if (strcmp(name, "buffer_text") == 0) |
|
1489 return get_pybool(self->buffer != NULL); |
|
1490 if (strcmp(name, "buffer_used") == 0) |
|
1491 return PyInt_FromLong((long) self->buffer_used); |
|
1492 } |
|
1493 if (strcmp(name, "namespace_prefixes") == 0) |
|
1494 return get_pybool(self->ns_prefixes); |
|
1495 if (strcmp(name, "ordered_attributes") == 0) |
|
1496 return get_pybool(self->ordered_attributes); |
|
1497 if (strcmp(name, "returns_unicode") == 0) |
|
1498 return get_pybool((long) self->returns_unicode); |
|
1499 if (strcmp(name, "specified_attributes") == 0) |
|
1500 return get_pybool((long) self->specified_attributes); |
|
1501 if (strcmp(name, "intern") == 0) { |
|
1502 if (self->intern == NULL) { |
|
1503 Py_INCREF(Py_None); |
|
1504 return Py_None; |
|
1505 } |
|
1506 else { |
|
1507 Py_INCREF(self->intern); |
|
1508 return self->intern; |
|
1509 } |
|
1510 } |
|
1511 |
|
1512 #define APPEND(list, str) \ |
|
1513 do { \ |
|
1514 PyObject *o = PyString_FromString(str); \ |
|
1515 if (o != NULL) \ |
|
1516 PyList_Append(list, o); \ |
|
1517 Py_XDECREF(o); \ |
|
1518 } while (0) |
|
1519 |
|
1520 if (strcmp(name, "__members__") == 0) { |
|
1521 int i; |
|
1522 PyObject *rc = PyList_New(0); |
|
1523 if (!rc) |
|
1524 return NULL; |
|
1525 for (i = 0; handler_info[i].name != NULL; i++) { |
|
1526 PyObject *o = get_handler_name(&handler_info[i]); |
|
1527 if (o != NULL) |
|
1528 PyList_Append(rc, o); |
|
1529 Py_XDECREF(o); |
|
1530 } |
|
1531 APPEND(rc, "ErrorCode"); |
|
1532 APPEND(rc, "ErrorLineNumber"); |
|
1533 APPEND(rc, "ErrorColumnNumber"); |
|
1534 APPEND(rc, "ErrorByteIndex"); |
|
1535 APPEND(rc, "CurrentLineNumber"); |
|
1536 APPEND(rc, "CurrentColumnNumber"); |
|
1537 APPEND(rc, "CurrentByteIndex"); |
|
1538 APPEND(rc, "buffer_size"); |
|
1539 APPEND(rc, "buffer_text"); |
|
1540 APPEND(rc, "buffer_used"); |
|
1541 APPEND(rc, "namespace_prefixes"); |
|
1542 APPEND(rc, "ordered_attributes"); |
|
1543 APPEND(rc, "returns_unicode"); |
|
1544 APPEND(rc, "specified_attributes"); |
|
1545 APPEND(rc, "intern"); |
|
1546 |
|
1547 #undef APPEND |
|
1548 return rc; |
|
1549 } |
|
1550 return Py_FindMethod(xmlparse_methods, (PyObject *)self, name); |
|
1551 } |
|
1552 |
|
1553 static int |
|
1554 sethandler(xmlparseobject *self, const char *name, PyObject* v) |
|
1555 { |
|
1556 int handlernum = handlername2int(name); |
|
1557 if (handlernum >= 0) { |
|
1558 xmlhandler c_handler = NULL; |
|
1559 PyObject *temp = self->handlers[handlernum]; |
|
1560 |
|
1561 if (v == Py_None) { |
|
1562 /* If this is the character data handler, and a character |
|
1563 data handler is already active, we need to be more |
|
1564 careful. What we can safely do is replace the existing |
|
1565 character data handler callback function with a no-op |
|
1566 function that will refuse to call Python. The downside |
|
1567 is that this doesn't completely remove the character |
|
1568 data handler from the C layer if there's any callback |
|
1569 active, so Expat does a little more work than it |
|
1570 otherwise would, but that's really an odd case. A more |
|
1571 elaborate system of handlers and state could remove the |
|
1572 C handler more effectively. */ |
|
1573 if (handlernum == CharacterData && self->in_callback) |
|
1574 c_handler = noop_character_data_handler; |
|
1575 v = NULL; |
|
1576 } |
|
1577 else if (v != NULL) { |
|
1578 Py_INCREF(v); |
|
1579 c_handler = handler_info[handlernum].handler; |
|
1580 } |
|
1581 self->handlers[handlernum] = v; |
|
1582 Py_XDECREF(temp); |
|
1583 handler_info[handlernum].setter(self->itself, c_handler); |
|
1584 return 1; |
|
1585 } |
|
1586 return 0; |
|
1587 } |
|
1588 |
|
1589 static int |
|
1590 xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v) |
|
1591 { |
|
1592 /* Set attribute 'name' to value 'v'. v==NULL means delete */ |
|
1593 if (v == NULL) { |
|
1594 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute"); |
|
1595 return -1; |
|
1596 } |
|
1597 if (strcmp(name, "buffer_text") == 0) { |
|
1598 if (PyObject_IsTrue(v)) { |
|
1599 if (self->buffer == NULL) { |
|
1600 self->buffer = malloc(self->buffer_size); |
|
1601 if (self->buffer == NULL) { |
|
1602 PyErr_NoMemory(); |
|
1603 return -1; |
|
1604 } |
|
1605 self->buffer_used = 0; |
|
1606 } |
|
1607 } |
|
1608 else if (self->buffer != NULL) { |
|
1609 if (flush_character_buffer(self) < 0) |
|
1610 return -1; |
|
1611 free(self->buffer); |
|
1612 self->buffer = NULL; |
|
1613 } |
|
1614 return 0; |
|
1615 } |
|
1616 if (strcmp(name, "namespace_prefixes") == 0) { |
|
1617 if (PyObject_IsTrue(v)) |
|
1618 self->ns_prefixes = 1; |
|
1619 else |
|
1620 self->ns_prefixes = 0; |
|
1621 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes); |
|
1622 return 0; |
|
1623 } |
|
1624 if (strcmp(name, "ordered_attributes") == 0) { |
|
1625 if (PyObject_IsTrue(v)) |
|
1626 self->ordered_attributes = 1; |
|
1627 else |
|
1628 self->ordered_attributes = 0; |
|
1629 return 0; |
|
1630 } |
|
1631 if (strcmp(name, "returns_unicode") == 0) { |
|
1632 if (PyObject_IsTrue(v)) { |
|
1633 #ifndef Py_USING_UNICODE |
|
1634 PyErr_SetString(PyExc_ValueError, |
|
1635 "Unicode support not available"); |
|
1636 return -1; |
|
1637 #else |
|
1638 self->returns_unicode = 1; |
|
1639 #endif |
|
1640 } |
|
1641 else |
|
1642 self->returns_unicode = 0; |
|
1643 return 0; |
|
1644 } |
|
1645 if (strcmp(name, "specified_attributes") == 0) { |
|
1646 if (PyObject_IsTrue(v)) |
|
1647 self->specified_attributes = 1; |
|
1648 else |
|
1649 self->specified_attributes = 0; |
|
1650 return 0; |
|
1651 } |
|
1652 |
|
1653 if (strcmp(name, "buffer_size") == 0) { |
|
1654 long new_buffer_size; |
|
1655 if (!PyInt_Check(v)) { |
|
1656 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer"); |
|
1657 return -1; |
|
1658 } |
|
1659 |
|
1660 new_buffer_size=PyInt_AS_LONG(v); |
|
1661 /* trivial case -- no change */ |
|
1662 if (new_buffer_size == self->buffer_size) { |
|
1663 return 0; |
|
1664 } |
|
1665 |
|
1666 if (new_buffer_size <= 0) { |
|
1667 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero"); |
|
1668 return -1; |
|
1669 } |
|
1670 |
|
1671 /* check maximum */ |
|
1672 if (new_buffer_size > INT_MAX) { |
|
1673 char errmsg[100]; |
|
1674 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX); |
|
1675 PyErr_SetString(PyExc_ValueError, errmsg); |
|
1676 return -1; |
|
1677 } |
|
1678 |
|
1679 if (self->buffer != NULL) { |
|
1680 /* there is already a buffer */ |
|
1681 if (self->buffer_used != 0) { |
|
1682 flush_character_buffer(self); |
|
1683 } |
|
1684 /* free existing buffer */ |
|
1685 free(self->buffer); |
|
1686 } |
|
1687 self->buffer = malloc(new_buffer_size); |
|
1688 if (self->buffer == NULL) { |
|
1689 PyErr_NoMemory(); |
|
1690 return -1; |
|
1691 } |
|
1692 self->buffer_size = new_buffer_size; |
|
1693 return 0; |
|
1694 } |
|
1695 |
|
1696 if (strcmp(name, "CharacterDataHandler") == 0) { |
|
1697 /* If we're changing the character data handler, flush all |
|
1698 * cached data with the old handler. Not sure there's a |
|
1699 * "right" thing to do, though, but this probably won't |
|
1700 * happen. |
|
1701 */ |
|
1702 if (flush_character_buffer(self) < 0) |
|
1703 return -1; |
|
1704 } |
|
1705 if (sethandler(self, name, v)) { |
|
1706 return 0; |
|
1707 } |
|
1708 PyErr_SetString(PyExc_AttributeError, name); |
|
1709 return -1; |
|
1710 } |
|
1711 |
|
1712 #ifdef WITH_CYCLE_GC |
|
1713 static int |
|
1714 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg) |
|
1715 { |
|
1716 int i; |
|
1717 for (i = 0; handler_info[i].name != NULL; i++) |
|
1718 Py_VISIT(op->handlers[i]); |
|
1719 return 0; |
|
1720 } |
|
1721 |
|
1722 static int |
|
1723 xmlparse_clear(xmlparseobject *op) |
|
1724 { |
|
1725 clear_handlers(op, 0); |
|
1726 Py_CLEAR(op->intern); |
|
1727 return 0; |
|
1728 } |
|
1729 #endif |
|
1730 |
|
1731 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser"); |
|
1732 |
|
1733 static PyTypeObject Xmlparsetype = { |
|
1734 PyVarObject_HEAD_INIT(NULL, 0) |
|
1735 "pyexpat.xmlparser", /*tp_name*/ |
|
1736 sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/ |
|
1737 0, /*tp_itemsize*/ |
|
1738 /* methods */ |
|
1739 (destructor)xmlparse_dealloc, /*tp_dealloc*/ |
|
1740 (printfunc)0, /*tp_print*/ |
|
1741 (getattrfunc)xmlparse_getattr, /*tp_getattr*/ |
|
1742 (setattrfunc)xmlparse_setattr, /*tp_setattr*/ |
|
1743 (cmpfunc)0, /*tp_compare*/ |
|
1744 (reprfunc)0, /*tp_repr*/ |
|
1745 0, /*tp_as_number*/ |
|
1746 0, /*tp_as_sequence*/ |
|
1747 0, /*tp_as_mapping*/ |
|
1748 (hashfunc)0, /*tp_hash*/ |
|
1749 (ternaryfunc)0, /*tp_call*/ |
|
1750 (reprfunc)0, /*tp_str*/ |
|
1751 0, /* tp_getattro */ |
|
1752 0, /* tp_setattro */ |
|
1753 0, /* tp_as_buffer */ |
|
1754 #ifdef Py_TPFLAGS_HAVE_GC |
|
1755 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ |
|
1756 #else |
|
1757 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/ |
|
1758 #endif |
|
1759 Xmlparsetype__doc__, /* tp_doc - Documentation string */ |
|
1760 #ifdef WITH_CYCLE_GC |
|
1761 (traverseproc)xmlparse_traverse, /* tp_traverse */ |
|
1762 (inquiry)xmlparse_clear /* tp_clear */ |
|
1763 #else |
|
1764 0, 0 |
|
1765 #endif |
|
1766 }; |
|
1767 |
|
1768 /* End of code for xmlparser objects */ |
|
1769 /* -------------------------------------------------------- */ |
|
1770 |
|
1771 PyDoc_STRVAR(pyexpat_ParserCreate__doc__, |
|
1772 "ParserCreate([encoding[, namespace_separator]]) -> parser\n\ |
|
1773 Return a new XML parser object."); |
|
1774 |
|
1775 static PyObject * |
|
1776 pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw) |
|
1777 { |
|
1778 char *encoding = NULL; |
|
1779 char *namespace_separator = NULL; |
|
1780 PyObject *intern = NULL; |
|
1781 PyObject *result; |
|
1782 int intern_decref = 0; |
|
1783 static char *kwlist[] = {"encoding", "namespace_separator", |
|
1784 "intern", NULL}; |
|
1785 |
|
1786 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist, |
|
1787 &encoding, &namespace_separator, &intern)) |
|
1788 return NULL; |
|
1789 if (namespace_separator != NULL |
|
1790 && strlen(namespace_separator) > 1) { |
|
1791 PyErr_SetString(PyExc_ValueError, |
|
1792 "namespace_separator must be at most one" |
|
1793 " character, omitted, or None"); |
|
1794 return NULL; |
|
1795 } |
|
1796 /* Explicitly passing None means no interning is desired. |
|
1797 Not passing anything means that a new dictionary is used. */ |
|
1798 if (intern == Py_None) |
|
1799 intern = NULL; |
|
1800 else if (intern == NULL) { |
|
1801 intern = PyDict_New(); |
|
1802 if (!intern) |
|
1803 return NULL; |
|
1804 intern_decref = 1; |
|
1805 } |
|
1806 else if (!PyDict_Check(intern)) { |
|
1807 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary"); |
|
1808 return NULL; |
|
1809 } |
|
1810 |
|
1811 result = newxmlparseobject(encoding, namespace_separator, intern); |
|
1812 if (intern_decref) { |
|
1813 Py_DECREF(intern); |
|
1814 } |
|
1815 return result; |
|
1816 } |
|
1817 |
|
1818 PyDoc_STRVAR(pyexpat_ErrorString__doc__, |
|
1819 "ErrorString(errno) -> string\n\ |
|
1820 Returns string error for given number."); |
|
1821 |
|
1822 static PyObject * |
|
1823 pyexpat_ErrorString(PyObject *self, PyObject *args) |
|
1824 { |
|
1825 long code = 0; |
|
1826 |
|
1827 if (!PyArg_ParseTuple(args, "l:ErrorString", &code)) |
|
1828 return NULL; |
|
1829 return Py_BuildValue("z", XML_ErrorString((int)code)); |
|
1830 } |
|
1831 |
|
1832 /* List of methods defined in the module */ |
|
1833 |
|
1834 static struct PyMethodDef pyexpat_methods[] = { |
|
1835 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate, |
|
1836 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__}, |
|
1837 {"ErrorString", (PyCFunction)pyexpat_ErrorString, |
|
1838 METH_VARARGS, pyexpat_ErrorString__doc__}, |
|
1839 |
|
1840 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */ |
|
1841 }; |
|
1842 |
|
1843 /* Module docstring */ |
|
1844 |
|
1845 PyDoc_STRVAR(pyexpat_module_documentation, |
|
1846 "Python wrapper for Expat parser."); |
|
1847 |
|
1848 /* Return a Python string that represents the version number without the |
|
1849 * extra cruft added by revision control, even if the right options were |
|
1850 * given to the "cvs export" command to make it not include the extra |
|
1851 * cruft. |
|
1852 */ |
|
1853 static PyObject * |
|
1854 get_version_string(void) |
|
1855 { |
|
1856 static char *rcsid = "$Revision: 64048 $"; |
|
1857 char *rev = rcsid; |
|
1858 int i = 0; |
|
1859 |
|
1860 while (!isdigit(Py_CHARMASK(*rev))) |
|
1861 ++rev; |
|
1862 while (rev[i] != ' ' && rev[i] != '\0') |
|
1863 ++i; |
|
1864 |
|
1865 return PyString_FromStringAndSize(rev, i); |
|
1866 } |
|
1867 |
|
1868 /* Initialization function for the module */ |
|
1869 |
|
1870 #ifndef MODULE_NAME |
|
1871 #define MODULE_NAME "pyexpat" |
|
1872 #endif |
|
1873 |
|
1874 #ifndef MODULE_INITFUNC |
|
1875 #define MODULE_INITFUNC initpyexpat |
|
1876 #endif |
|
1877 |
|
1878 #ifndef PyMODINIT_FUNC |
|
1879 # ifdef MS_WINDOWS |
|
1880 # define PyMODINIT_FUNC __declspec(dllexport) void |
|
1881 # else |
|
1882 # define PyMODINIT_FUNC void |
|
1883 # endif |
|
1884 #endif |
|
1885 |
|
1886 PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */ |
|
1887 |
|
1888 PyMODINIT_FUNC |
|
1889 MODULE_INITFUNC(void) |
|
1890 { |
|
1891 PyObject *m, *d; |
|
1892 PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors"); |
|
1893 PyObject *errors_module; |
|
1894 PyObject *modelmod_name; |
|
1895 PyObject *model_module; |
|
1896 PyObject *sys_modules; |
|
1897 static struct PyExpat_CAPI capi; |
|
1898 PyObject* capi_object; |
|
1899 |
|
1900 if (errmod_name == NULL) |
|
1901 return; |
|
1902 modelmod_name = PyString_FromString(MODULE_NAME ".model"); |
|
1903 if (modelmod_name == NULL) |
|
1904 return; |
|
1905 |
|
1906 Py_TYPE(&Xmlparsetype) = &PyType_Type; |
|
1907 |
|
1908 /* Create the module and add the functions */ |
|
1909 m = Py_InitModule3(MODULE_NAME, pyexpat_methods, |
|
1910 pyexpat_module_documentation); |
|
1911 if (m == NULL) |
|
1912 return; |
|
1913 |
|
1914 /* Add some symbolic constants to the module */ |
|
1915 if (ErrorObject == NULL) { |
|
1916 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError", |
|
1917 NULL, NULL); |
|
1918 if (ErrorObject == NULL) |
|
1919 return; |
|
1920 } |
|
1921 Py_INCREF(ErrorObject); |
|
1922 PyModule_AddObject(m, "error", ErrorObject); |
|
1923 Py_INCREF(ErrorObject); |
|
1924 PyModule_AddObject(m, "ExpatError", ErrorObject); |
|
1925 Py_INCREF(&Xmlparsetype); |
|
1926 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype); |
|
1927 |
|
1928 PyModule_AddObject(m, "__version__", get_version_string()); |
|
1929 PyModule_AddStringConstant(m, "EXPAT_VERSION", |
|
1930 (char *) XML_ExpatVersion()); |
|
1931 { |
|
1932 XML_Expat_Version info = XML_ExpatVersionInfo(); |
|
1933 PyModule_AddObject(m, "version_info", |
|
1934 Py_BuildValue("(iii)", info.major, |
|
1935 info.minor, info.micro)); |
|
1936 } |
|
1937 #ifdef Py_USING_UNICODE |
|
1938 init_template_buffer(); |
|
1939 #endif |
|
1940 /* XXX When Expat supports some way of figuring out how it was |
|
1941 compiled, this should check and set native_encoding |
|
1942 appropriately. |
|
1943 */ |
|
1944 PyModule_AddStringConstant(m, "native_encoding", "UTF-8"); |
|
1945 |
|
1946 sys_modules = PySys_GetObject("modules"); |
|
1947 d = PyModule_GetDict(m); |
|
1948 errors_module = PyDict_GetItem(d, errmod_name); |
|
1949 if (errors_module == NULL) { |
|
1950 errors_module = PyModule_New(MODULE_NAME ".errors"); |
|
1951 if (errors_module != NULL) { |
|
1952 PyDict_SetItem(sys_modules, errmod_name, errors_module); |
|
1953 /* gives away the reference to errors_module */ |
|
1954 PyModule_AddObject(m, "errors", errors_module); |
|
1955 } |
|
1956 } |
|
1957 Py_DECREF(errmod_name); |
|
1958 model_module = PyDict_GetItem(d, modelmod_name); |
|
1959 if (model_module == NULL) { |
|
1960 model_module = PyModule_New(MODULE_NAME ".model"); |
|
1961 if (model_module != NULL) { |
|
1962 PyDict_SetItem(sys_modules, modelmod_name, model_module); |
|
1963 /* gives away the reference to model_module */ |
|
1964 PyModule_AddObject(m, "model", model_module); |
|
1965 } |
|
1966 } |
|
1967 Py_DECREF(modelmod_name); |
|
1968 if (errors_module == NULL || model_module == NULL) |
|
1969 /* Don't core dump later! */ |
|
1970 return; |
|
1971 |
|
1972 #if XML_COMBINED_VERSION > 19505 |
|
1973 { |
|
1974 const XML_Feature *features = XML_GetFeatureList(); |
|
1975 PyObject *list = PyList_New(0); |
|
1976 if (list == NULL) |
|
1977 /* just ignore it */ |
|
1978 PyErr_Clear(); |
|
1979 else { |
|
1980 int i = 0; |
|
1981 for (; features[i].feature != XML_FEATURE_END; ++i) { |
|
1982 int ok; |
|
1983 PyObject *item = Py_BuildValue("si", features[i].name, |
|
1984 features[i].value); |
|
1985 if (item == NULL) { |
|
1986 Py_DECREF(list); |
|
1987 list = NULL; |
|
1988 break; |
|
1989 } |
|
1990 ok = PyList_Append(list, item); |
|
1991 Py_DECREF(item); |
|
1992 if (ok < 0) { |
|
1993 PyErr_Clear(); |
|
1994 break; |
|
1995 } |
|
1996 } |
|
1997 if (list != NULL) |
|
1998 PyModule_AddObject(m, "features", list); |
|
1999 } |
|
2000 } |
|
2001 #endif |
|
2002 |
|
2003 #define MYCONST(name) \ |
|
2004 PyModule_AddStringConstant(errors_module, #name, \ |
|
2005 (char*)XML_ErrorString(name)) |
|
2006 |
|
2007 MYCONST(XML_ERROR_NO_MEMORY); |
|
2008 MYCONST(XML_ERROR_SYNTAX); |
|
2009 MYCONST(XML_ERROR_NO_ELEMENTS); |
|
2010 MYCONST(XML_ERROR_INVALID_TOKEN); |
|
2011 MYCONST(XML_ERROR_UNCLOSED_TOKEN); |
|
2012 MYCONST(XML_ERROR_PARTIAL_CHAR); |
|
2013 MYCONST(XML_ERROR_TAG_MISMATCH); |
|
2014 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE); |
|
2015 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT); |
|
2016 MYCONST(XML_ERROR_PARAM_ENTITY_REF); |
|
2017 MYCONST(XML_ERROR_UNDEFINED_ENTITY); |
|
2018 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF); |
|
2019 MYCONST(XML_ERROR_ASYNC_ENTITY); |
|
2020 MYCONST(XML_ERROR_BAD_CHAR_REF); |
|
2021 MYCONST(XML_ERROR_BINARY_ENTITY_REF); |
|
2022 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF); |
|
2023 MYCONST(XML_ERROR_MISPLACED_XML_PI); |
|
2024 MYCONST(XML_ERROR_UNKNOWN_ENCODING); |
|
2025 MYCONST(XML_ERROR_INCORRECT_ENCODING); |
|
2026 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION); |
|
2027 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING); |
|
2028 MYCONST(XML_ERROR_NOT_STANDALONE); |
|
2029 MYCONST(XML_ERROR_UNEXPECTED_STATE); |
|
2030 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE); |
|
2031 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD); |
|
2032 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING); |
|
2033 /* Added in Expat 1.95.7. */ |
|
2034 MYCONST(XML_ERROR_UNBOUND_PREFIX); |
|
2035 /* Added in Expat 1.95.8. */ |
|
2036 MYCONST(XML_ERROR_UNDECLARING_PREFIX); |
|
2037 MYCONST(XML_ERROR_INCOMPLETE_PE); |
|
2038 MYCONST(XML_ERROR_XML_DECL); |
|
2039 MYCONST(XML_ERROR_TEXT_DECL); |
|
2040 MYCONST(XML_ERROR_PUBLICID); |
|
2041 MYCONST(XML_ERROR_SUSPENDED); |
|
2042 MYCONST(XML_ERROR_NOT_SUSPENDED); |
|
2043 MYCONST(XML_ERROR_ABORTED); |
|
2044 MYCONST(XML_ERROR_FINISHED); |
|
2045 MYCONST(XML_ERROR_SUSPEND_PE); |
|
2046 |
|
2047 PyModule_AddStringConstant(errors_module, "__doc__", |
|
2048 "Constants used to describe error conditions."); |
|
2049 |
|
2050 #undef MYCONST |
|
2051 |
|
2052 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c) |
|
2053 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER); |
|
2054 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); |
|
2055 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS); |
|
2056 #undef MYCONST |
|
2057 |
|
2058 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c) |
|
2059 PyModule_AddStringConstant(model_module, "__doc__", |
|
2060 "Constants used to interpret content model information."); |
|
2061 |
|
2062 MYCONST(XML_CTYPE_EMPTY); |
|
2063 MYCONST(XML_CTYPE_ANY); |
|
2064 MYCONST(XML_CTYPE_MIXED); |
|
2065 MYCONST(XML_CTYPE_NAME); |
|
2066 MYCONST(XML_CTYPE_CHOICE); |
|
2067 MYCONST(XML_CTYPE_SEQ); |
|
2068 |
|
2069 MYCONST(XML_CQUANT_NONE); |
|
2070 MYCONST(XML_CQUANT_OPT); |
|
2071 MYCONST(XML_CQUANT_REP); |
|
2072 MYCONST(XML_CQUANT_PLUS); |
|
2073 #undef MYCONST |
|
2074 |
|
2075 /* initialize pyexpat dispatch table */ |
|
2076 capi.size = sizeof(capi); |
|
2077 capi.magic = PyExpat_CAPI_MAGIC; |
|
2078 capi.MAJOR_VERSION = XML_MAJOR_VERSION; |
|
2079 capi.MINOR_VERSION = XML_MINOR_VERSION; |
|
2080 capi.MICRO_VERSION = XML_MICRO_VERSION; |
|
2081 capi.ErrorString = XML_ErrorString; |
|
2082 capi.GetErrorCode = XML_GetErrorCode; |
|
2083 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber; |
|
2084 capi.GetErrorLineNumber = XML_GetErrorLineNumber; |
|
2085 capi.Parse = XML_Parse; |
|
2086 capi.ParserCreate_MM = XML_ParserCreate_MM; |
|
2087 capi.ParserFree = XML_ParserFree; |
|
2088 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler; |
|
2089 capi.SetCommentHandler = XML_SetCommentHandler; |
|
2090 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand; |
|
2091 capi.SetElementHandler = XML_SetElementHandler; |
|
2092 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler; |
|
2093 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler; |
|
2094 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler; |
|
2095 capi.SetUserData = XML_SetUserData; |
|
2096 |
|
2097 /* export as cobject */ |
|
2098 capi_object = PyCObject_FromVoidPtr(&capi, NULL); |
|
2099 if (capi_object) |
|
2100 PyModule_AddObject(m, "expat_CAPI", capi_object); |
|
2101 } |
|
2102 |
|
2103 static void |
|
2104 clear_handlers(xmlparseobject *self, int initial) |
|
2105 { |
|
2106 int i = 0; |
|
2107 PyObject *temp; |
|
2108 |
|
2109 for (; handler_info[i].name != NULL; i++) { |
|
2110 if (initial) |
|
2111 self->handlers[i] = NULL; |
|
2112 else { |
|
2113 temp = self->handlers[i]; |
|
2114 self->handlers[i] = NULL; |
|
2115 Py_XDECREF(temp); |
|
2116 handler_info[i].setter(self->itself, NULL); |
|
2117 } |
|
2118 } |
|
2119 } |
|
2120 |
|
2121 static struct HandlerInfo handler_info[] = { |
|
2122 {"StartElementHandler", |
|
2123 (xmlhandlersetter)XML_SetStartElementHandler, |
|
2124 (xmlhandler)my_StartElementHandler}, |
|
2125 {"EndElementHandler", |
|
2126 (xmlhandlersetter)XML_SetEndElementHandler, |
|
2127 (xmlhandler)my_EndElementHandler}, |
|
2128 {"ProcessingInstructionHandler", |
|
2129 (xmlhandlersetter)XML_SetProcessingInstructionHandler, |
|
2130 (xmlhandler)my_ProcessingInstructionHandler}, |
|
2131 {"CharacterDataHandler", |
|
2132 (xmlhandlersetter)XML_SetCharacterDataHandler, |
|
2133 (xmlhandler)my_CharacterDataHandler}, |
|
2134 {"UnparsedEntityDeclHandler", |
|
2135 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler, |
|
2136 (xmlhandler)my_UnparsedEntityDeclHandler}, |
|
2137 {"NotationDeclHandler", |
|
2138 (xmlhandlersetter)XML_SetNotationDeclHandler, |
|
2139 (xmlhandler)my_NotationDeclHandler}, |
|
2140 {"StartNamespaceDeclHandler", |
|
2141 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler, |
|
2142 (xmlhandler)my_StartNamespaceDeclHandler}, |
|
2143 {"EndNamespaceDeclHandler", |
|
2144 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler, |
|
2145 (xmlhandler)my_EndNamespaceDeclHandler}, |
|
2146 {"CommentHandler", |
|
2147 (xmlhandlersetter)XML_SetCommentHandler, |
|
2148 (xmlhandler)my_CommentHandler}, |
|
2149 {"StartCdataSectionHandler", |
|
2150 (xmlhandlersetter)XML_SetStartCdataSectionHandler, |
|
2151 (xmlhandler)my_StartCdataSectionHandler}, |
|
2152 {"EndCdataSectionHandler", |
|
2153 (xmlhandlersetter)XML_SetEndCdataSectionHandler, |
|
2154 (xmlhandler)my_EndCdataSectionHandler}, |
|
2155 {"DefaultHandler", |
|
2156 (xmlhandlersetter)XML_SetDefaultHandler, |
|
2157 (xmlhandler)my_DefaultHandler}, |
|
2158 {"DefaultHandlerExpand", |
|
2159 (xmlhandlersetter)XML_SetDefaultHandlerExpand, |
|
2160 (xmlhandler)my_DefaultHandlerExpandHandler}, |
|
2161 {"NotStandaloneHandler", |
|
2162 (xmlhandlersetter)XML_SetNotStandaloneHandler, |
|
2163 (xmlhandler)my_NotStandaloneHandler}, |
|
2164 {"ExternalEntityRefHandler", |
|
2165 (xmlhandlersetter)XML_SetExternalEntityRefHandler, |
|
2166 (xmlhandler)my_ExternalEntityRefHandler}, |
|
2167 {"StartDoctypeDeclHandler", |
|
2168 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler, |
|
2169 (xmlhandler)my_StartDoctypeDeclHandler}, |
|
2170 {"EndDoctypeDeclHandler", |
|
2171 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler, |
|
2172 (xmlhandler)my_EndDoctypeDeclHandler}, |
|
2173 {"EntityDeclHandler", |
|
2174 (xmlhandlersetter)XML_SetEntityDeclHandler, |
|
2175 (xmlhandler)my_EntityDeclHandler}, |
|
2176 {"XmlDeclHandler", |
|
2177 (xmlhandlersetter)XML_SetXmlDeclHandler, |
|
2178 (xmlhandler)my_XmlDeclHandler}, |
|
2179 {"ElementDeclHandler", |
|
2180 (xmlhandlersetter)XML_SetElementDeclHandler, |
|
2181 (xmlhandler)my_ElementDeclHandler}, |
|
2182 {"AttlistDeclHandler", |
|
2183 (xmlhandlersetter)XML_SetAttlistDeclHandler, |
|
2184 (xmlhandler)my_AttlistDeclHandler}, |
|
2185 #if XML_COMBINED_VERSION >= 19504 |
|
2186 {"SkippedEntityHandler", |
|
2187 (xmlhandlersetter)XML_SetSkippedEntityHandler, |
|
2188 (xmlhandler)my_SkippedEntityHandler}, |
|
2189 #endif |
|
2190 |
|
2191 {NULL, NULL, NULL} /* sentinel */ |
|
2192 }; |