|
1 /* ------------------------------------------------------------------------ |
|
2 |
|
3 Python Codec Registry and support functions |
|
4 |
|
5 Written by Marc-Andre Lemburg (mal@lemburg.com). |
|
6 |
|
7 Copyright (c) Corporation for National Research Initiatives. |
|
8 |
|
9 ------------------------------------------------------------------------ */ |
|
10 |
|
11 #include "Python.h" |
|
12 #include <ctype.h> |
|
13 |
|
14 /* --- Codec Registry ----------------------------------------------------- */ |
|
15 |
|
16 /* Import the standard encodings package which will register the first |
|
17 codec search function. |
|
18 |
|
19 This is done in a lazy way so that the Unicode implementation does |
|
20 not downgrade startup time of scripts not needing it. |
|
21 |
|
22 ImportErrors are silently ignored by this function. Only one try is |
|
23 made. |
|
24 |
|
25 */ |
|
26 |
|
27 static int _PyCodecRegistry_Init(void); /* Forward */ |
|
28 |
|
29 int PyCodec_Register(PyObject *search_function) |
|
30 { |
|
31 PyInterpreterState *interp = PyThreadState_GET()->interp; |
|
32 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) |
|
33 goto onError; |
|
34 if (search_function == NULL) { |
|
35 PyErr_BadArgument(); |
|
36 goto onError; |
|
37 } |
|
38 if (!PyCallable_Check(search_function)) { |
|
39 PyErr_SetString(PyExc_TypeError, "argument must be callable"); |
|
40 goto onError; |
|
41 } |
|
42 return PyList_Append(interp->codec_search_path, search_function); |
|
43 |
|
44 onError: |
|
45 return -1; |
|
46 } |
|
47 |
|
48 /* Convert a string to a normalized Python string: all characters are |
|
49 converted to lower case, spaces are replaced with underscores. */ |
|
50 |
|
51 static |
|
52 PyObject *normalizestring(const char *string) |
|
53 { |
|
54 register size_t i; |
|
55 size_t len = strlen(string); |
|
56 char *p; |
|
57 PyObject *v; |
|
58 |
|
59 if (len > PY_SSIZE_T_MAX) { |
|
60 PyErr_SetString(PyExc_OverflowError, "string is too large"); |
|
61 return NULL; |
|
62 } |
|
63 |
|
64 v = PyString_FromStringAndSize(NULL, len); |
|
65 if (v == NULL) |
|
66 return NULL; |
|
67 p = PyString_AS_STRING(v); |
|
68 for (i = 0; i < len; i++) { |
|
69 register char ch = string[i]; |
|
70 if (ch == ' ') |
|
71 ch = '-'; |
|
72 else |
|
73 ch = tolower(Py_CHARMASK(ch)); |
|
74 p[i] = ch; |
|
75 } |
|
76 return v; |
|
77 } |
|
78 |
|
79 /* Lookup the given encoding and return a tuple providing the codec |
|
80 facilities. |
|
81 |
|
82 The encoding string is looked up converted to all lower-case |
|
83 characters. This makes encodings looked up through this mechanism |
|
84 effectively case-insensitive. |
|
85 |
|
86 If no codec is found, a LookupError is set and NULL returned. |
|
87 |
|
88 As side effect, this tries to load the encodings package, if not |
|
89 yet done. This is part of the lazy load strategy for the encodings |
|
90 package. |
|
91 |
|
92 */ |
|
93 |
|
94 PyObject *_PyCodec_Lookup(const char *encoding) |
|
95 { |
|
96 PyInterpreterState *interp; |
|
97 PyObject *result, *args = NULL, *v; |
|
98 Py_ssize_t i, len; |
|
99 |
|
100 if (encoding == NULL) { |
|
101 PyErr_BadArgument(); |
|
102 goto onError; |
|
103 } |
|
104 |
|
105 interp = PyThreadState_GET()->interp; |
|
106 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) |
|
107 goto onError; |
|
108 |
|
109 /* Convert the encoding to a normalized Python string: all |
|
110 characters are converted to lower case, spaces and hyphens are |
|
111 replaced with underscores. */ |
|
112 v = normalizestring(encoding); |
|
113 if (v == NULL) |
|
114 goto onError; |
|
115 PyString_InternInPlace(&v); |
|
116 |
|
117 /* First, try to lookup the name in the registry dictionary */ |
|
118 result = PyDict_GetItem(interp->codec_search_cache, v); |
|
119 if (result != NULL) { |
|
120 Py_INCREF(result); |
|
121 Py_DECREF(v); |
|
122 return result; |
|
123 } |
|
124 |
|
125 /* Next, scan the search functions in order of registration */ |
|
126 args = PyTuple_New(1); |
|
127 if (args == NULL) |
|
128 goto onError; |
|
129 PyTuple_SET_ITEM(args,0,v); |
|
130 |
|
131 len = PyList_Size(interp->codec_search_path); |
|
132 if (len < 0) |
|
133 goto onError; |
|
134 if (len == 0) { |
|
135 PyErr_SetString(PyExc_LookupError, |
|
136 "no codec search functions registered: " |
|
137 "can't find encoding"); |
|
138 goto onError; |
|
139 } |
|
140 |
|
141 for (i = 0; i < len; i++) { |
|
142 PyObject *func; |
|
143 |
|
144 func = PyList_GetItem(interp->codec_search_path, i); |
|
145 if (func == NULL) |
|
146 goto onError; |
|
147 result = PyEval_CallObject(func, args); |
|
148 if (result == NULL) |
|
149 goto onError; |
|
150 if (result == Py_None) { |
|
151 Py_DECREF(result); |
|
152 continue; |
|
153 } |
|
154 if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) { |
|
155 PyErr_SetString(PyExc_TypeError, |
|
156 "codec search functions must return 4-tuples"); |
|
157 Py_DECREF(result); |
|
158 goto onError; |
|
159 } |
|
160 break; |
|
161 } |
|
162 if (i == len) { |
|
163 /* XXX Perhaps we should cache misses too ? */ |
|
164 PyErr_Format(PyExc_LookupError, |
|
165 "unknown encoding: %s", encoding); |
|
166 goto onError; |
|
167 } |
|
168 |
|
169 /* Cache and return the result */ |
|
170 PyDict_SetItem(interp->codec_search_cache, v, result); |
|
171 Py_DECREF(args); |
|
172 return result; |
|
173 |
|
174 onError: |
|
175 Py_XDECREF(args); |
|
176 return NULL; |
|
177 } |
|
178 |
|
179 static |
|
180 PyObject *args_tuple(PyObject *object, |
|
181 const char *errors) |
|
182 { |
|
183 PyObject *args; |
|
184 |
|
185 args = PyTuple_New(1 + (errors != NULL)); |
|
186 if (args == NULL) |
|
187 return NULL; |
|
188 Py_INCREF(object); |
|
189 PyTuple_SET_ITEM(args,0,object); |
|
190 if (errors) { |
|
191 PyObject *v; |
|
192 |
|
193 v = PyString_FromString(errors); |
|
194 if (v == NULL) { |
|
195 Py_DECREF(args); |
|
196 return NULL; |
|
197 } |
|
198 PyTuple_SET_ITEM(args, 1, v); |
|
199 } |
|
200 return args; |
|
201 } |
|
202 |
|
203 /* Helper function to get a codec item */ |
|
204 |
|
205 static |
|
206 PyObject *codec_getitem(const char *encoding, int index) |
|
207 { |
|
208 PyObject *codecs; |
|
209 PyObject *v; |
|
210 |
|
211 codecs = _PyCodec_Lookup(encoding); |
|
212 if (codecs == NULL) |
|
213 return NULL; |
|
214 v = PyTuple_GET_ITEM(codecs, index); |
|
215 Py_DECREF(codecs); |
|
216 Py_INCREF(v); |
|
217 return v; |
|
218 } |
|
219 |
|
220 /* Helper function to create an incremental codec. */ |
|
221 |
|
222 static |
|
223 PyObject *codec_getincrementalcodec(const char *encoding, |
|
224 const char *errors, |
|
225 const char *attrname) |
|
226 { |
|
227 PyObject *codecs, *ret, *inccodec; |
|
228 |
|
229 codecs = _PyCodec_Lookup(encoding); |
|
230 if (codecs == NULL) |
|
231 return NULL; |
|
232 inccodec = PyObject_GetAttrString(codecs, attrname); |
|
233 Py_DECREF(codecs); |
|
234 if (inccodec == NULL) |
|
235 return NULL; |
|
236 if (errors) |
|
237 ret = PyObject_CallFunction(inccodec, "s", errors); |
|
238 else |
|
239 ret = PyObject_CallFunction(inccodec, NULL); |
|
240 Py_DECREF(inccodec); |
|
241 return ret; |
|
242 } |
|
243 |
|
244 /* Helper function to create a stream codec. */ |
|
245 |
|
246 static |
|
247 PyObject *codec_getstreamcodec(const char *encoding, |
|
248 PyObject *stream, |
|
249 const char *errors, |
|
250 const int index) |
|
251 { |
|
252 PyObject *codecs, *streamcodec, *codeccls; |
|
253 |
|
254 codecs = _PyCodec_Lookup(encoding); |
|
255 if (codecs == NULL) |
|
256 return NULL; |
|
257 |
|
258 codeccls = PyTuple_GET_ITEM(codecs, index); |
|
259 if (errors != NULL) |
|
260 streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors); |
|
261 else |
|
262 streamcodec = PyObject_CallFunction(codeccls, "O", stream); |
|
263 Py_DECREF(codecs); |
|
264 return streamcodec; |
|
265 } |
|
266 |
|
267 /* Convenience APIs to query the Codec registry. |
|
268 |
|
269 All APIs return a codec object with incremented refcount. |
|
270 |
|
271 */ |
|
272 |
|
273 PyObject *PyCodec_Encoder(const char *encoding) |
|
274 { |
|
275 return codec_getitem(encoding, 0); |
|
276 } |
|
277 |
|
278 PyObject *PyCodec_Decoder(const char *encoding) |
|
279 { |
|
280 return codec_getitem(encoding, 1); |
|
281 } |
|
282 |
|
283 PyObject *PyCodec_IncrementalEncoder(const char *encoding, |
|
284 const char *errors) |
|
285 { |
|
286 return codec_getincrementalcodec(encoding, errors, "incrementalencoder"); |
|
287 } |
|
288 |
|
289 PyObject *PyCodec_IncrementalDecoder(const char *encoding, |
|
290 const char *errors) |
|
291 { |
|
292 return codec_getincrementalcodec(encoding, errors, "incrementaldecoder"); |
|
293 } |
|
294 |
|
295 PyObject *PyCodec_StreamReader(const char *encoding, |
|
296 PyObject *stream, |
|
297 const char *errors) |
|
298 { |
|
299 return codec_getstreamcodec(encoding, stream, errors, 2); |
|
300 } |
|
301 |
|
302 PyObject *PyCodec_StreamWriter(const char *encoding, |
|
303 PyObject *stream, |
|
304 const char *errors) |
|
305 { |
|
306 return codec_getstreamcodec(encoding, stream, errors, 3); |
|
307 } |
|
308 |
|
309 /* Encode an object (e.g. an Unicode object) using the given encoding |
|
310 and return the resulting encoded object (usually a Python string). |
|
311 |
|
312 errors is passed to the encoder factory as argument if non-NULL. */ |
|
313 |
|
314 PyObject *PyCodec_Encode(PyObject *object, |
|
315 const char *encoding, |
|
316 const char *errors) |
|
317 { |
|
318 PyObject *encoder = NULL; |
|
319 PyObject *args = NULL, *result = NULL; |
|
320 PyObject *v; |
|
321 |
|
322 encoder = PyCodec_Encoder(encoding); |
|
323 if (encoder == NULL) |
|
324 goto onError; |
|
325 |
|
326 args = args_tuple(object, errors); |
|
327 if (args == NULL) |
|
328 goto onError; |
|
329 |
|
330 result = PyEval_CallObject(encoder,args); |
|
331 if (result == NULL) |
|
332 goto onError; |
|
333 |
|
334 if (!PyTuple_Check(result) || |
|
335 PyTuple_GET_SIZE(result) != 2) { |
|
336 PyErr_SetString(PyExc_TypeError, |
|
337 "encoder must return a tuple (object,integer)"); |
|
338 goto onError; |
|
339 } |
|
340 v = PyTuple_GET_ITEM(result,0); |
|
341 Py_INCREF(v); |
|
342 /* We don't check or use the second (integer) entry. */ |
|
343 |
|
344 Py_DECREF(args); |
|
345 Py_DECREF(encoder); |
|
346 Py_DECREF(result); |
|
347 return v; |
|
348 |
|
349 onError: |
|
350 Py_XDECREF(result); |
|
351 Py_XDECREF(args); |
|
352 Py_XDECREF(encoder); |
|
353 return NULL; |
|
354 } |
|
355 |
|
356 /* Decode an object (usually a Python string) using the given encoding |
|
357 and return an equivalent object (e.g. an Unicode object). |
|
358 |
|
359 errors is passed to the decoder factory as argument if non-NULL. */ |
|
360 |
|
361 PyObject *PyCodec_Decode(PyObject *object, |
|
362 const char *encoding, |
|
363 const char *errors) |
|
364 { |
|
365 PyObject *decoder = NULL; |
|
366 PyObject *args = NULL, *result = NULL; |
|
367 PyObject *v; |
|
368 |
|
369 decoder = PyCodec_Decoder(encoding); |
|
370 if (decoder == NULL) |
|
371 goto onError; |
|
372 |
|
373 args = args_tuple(object, errors); |
|
374 if (args == NULL) |
|
375 goto onError; |
|
376 |
|
377 result = PyEval_CallObject(decoder,args); |
|
378 if (result == NULL) |
|
379 goto onError; |
|
380 if (!PyTuple_Check(result) || |
|
381 PyTuple_GET_SIZE(result) != 2) { |
|
382 PyErr_SetString(PyExc_TypeError, |
|
383 "decoder must return a tuple (object,integer)"); |
|
384 goto onError; |
|
385 } |
|
386 v = PyTuple_GET_ITEM(result,0); |
|
387 Py_INCREF(v); |
|
388 /* We don't check or use the second (integer) entry. */ |
|
389 |
|
390 Py_DECREF(args); |
|
391 Py_DECREF(decoder); |
|
392 Py_DECREF(result); |
|
393 return v; |
|
394 |
|
395 onError: |
|
396 Py_XDECREF(args); |
|
397 Py_XDECREF(decoder); |
|
398 Py_XDECREF(result); |
|
399 return NULL; |
|
400 } |
|
401 |
|
402 /* Register the error handling callback function error under the name |
|
403 name. This function will be called by the codec when it encounters |
|
404 an unencodable characters/undecodable bytes and doesn't know the |
|
405 callback name, when name is specified as the error parameter |
|
406 in the call to the encode/decode function. |
|
407 Return 0 on success, -1 on error */ |
|
408 int PyCodec_RegisterError(const char *name, PyObject *error) |
|
409 { |
|
410 PyInterpreterState *interp = PyThreadState_GET()->interp; |
|
411 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) |
|
412 return -1; |
|
413 if (!PyCallable_Check(error)) { |
|
414 PyErr_SetString(PyExc_TypeError, "handler must be callable"); |
|
415 return -1; |
|
416 } |
|
417 return PyDict_SetItemString(interp->codec_error_registry, |
|
418 (char *)name, error); |
|
419 } |
|
420 |
|
421 /* Lookup the error handling callback function registered under the |
|
422 name error. As a special case NULL can be passed, in which case |
|
423 the error handling callback for strict encoding will be returned. */ |
|
424 PyObject *PyCodec_LookupError(const char *name) |
|
425 { |
|
426 PyObject *handler = NULL; |
|
427 |
|
428 PyInterpreterState *interp = PyThreadState_GET()->interp; |
|
429 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) |
|
430 return NULL; |
|
431 |
|
432 if (name==NULL) |
|
433 name = "strict"; |
|
434 handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name); |
|
435 if (!handler) |
|
436 PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name); |
|
437 else |
|
438 Py_INCREF(handler); |
|
439 return handler; |
|
440 } |
|
441 |
|
442 static void wrong_exception_type(PyObject *exc) |
|
443 { |
|
444 PyObject *type = PyObject_GetAttrString(exc, "__class__"); |
|
445 if (type != NULL) { |
|
446 PyObject *name = PyObject_GetAttrString(type, "__name__"); |
|
447 Py_DECREF(type); |
|
448 if (name != NULL) { |
|
449 PyObject *string = PyObject_Str(name); |
|
450 Py_DECREF(name); |
|
451 if (string != NULL) { |
|
452 PyErr_Format(PyExc_TypeError, |
|
453 "don't know how to handle %.400s in error callback", |
|
454 PyString_AS_STRING(string)); |
|
455 Py_DECREF(string); |
|
456 } |
|
457 } |
|
458 } |
|
459 } |
|
460 |
|
461 PyObject *PyCodec_StrictErrors(PyObject *exc) |
|
462 { |
|
463 if (PyExceptionInstance_Check(exc)) |
|
464 PyErr_SetObject(PyExceptionInstance_Class(exc), exc); |
|
465 else |
|
466 PyErr_SetString(PyExc_TypeError, "codec must pass exception instance"); |
|
467 return NULL; |
|
468 } |
|
469 |
|
470 |
|
471 #ifdef Py_USING_UNICODE |
|
472 PyObject *PyCodec_IgnoreErrors(PyObject *exc) |
|
473 { |
|
474 Py_ssize_t end; |
|
475 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { |
|
476 if (PyUnicodeEncodeError_GetEnd(exc, &end)) |
|
477 return NULL; |
|
478 } |
|
479 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { |
|
480 if (PyUnicodeDecodeError_GetEnd(exc, &end)) |
|
481 return NULL; |
|
482 } |
|
483 else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) { |
|
484 if (PyUnicodeTranslateError_GetEnd(exc, &end)) |
|
485 return NULL; |
|
486 } |
|
487 else { |
|
488 wrong_exception_type(exc); |
|
489 return NULL; |
|
490 } |
|
491 /* ouch: passing NULL, 0, pos gives None instead of u'' */ |
|
492 return Py_BuildValue("(u#n)", &end, 0, end); |
|
493 } |
|
494 |
|
495 |
|
496 PyObject *PyCodec_ReplaceErrors(PyObject *exc) |
|
497 { |
|
498 PyObject *restuple; |
|
499 Py_ssize_t start; |
|
500 Py_ssize_t end; |
|
501 Py_ssize_t i; |
|
502 |
|
503 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { |
|
504 PyObject *res; |
|
505 Py_UNICODE *p; |
|
506 if (PyUnicodeEncodeError_GetStart(exc, &start)) |
|
507 return NULL; |
|
508 if (PyUnicodeEncodeError_GetEnd(exc, &end)) |
|
509 return NULL; |
|
510 res = PyUnicode_FromUnicode(NULL, end-start); |
|
511 if (res == NULL) |
|
512 return NULL; |
|
513 for (p = PyUnicode_AS_UNICODE(res), i = start; |
|
514 i<end; ++p, ++i) |
|
515 *p = '?'; |
|
516 restuple = Py_BuildValue("(On)", res, end); |
|
517 Py_DECREF(res); |
|
518 return restuple; |
|
519 } |
|
520 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { |
|
521 Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER; |
|
522 if (PyUnicodeDecodeError_GetEnd(exc, &end)) |
|
523 return NULL; |
|
524 return Py_BuildValue("(u#n)", &res, 1, end); |
|
525 } |
|
526 else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) { |
|
527 PyObject *res; |
|
528 Py_UNICODE *p; |
|
529 if (PyUnicodeTranslateError_GetStart(exc, &start)) |
|
530 return NULL; |
|
531 if (PyUnicodeTranslateError_GetEnd(exc, &end)) |
|
532 return NULL; |
|
533 res = PyUnicode_FromUnicode(NULL, end-start); |
|
534 if (res == NULL) |
|
535 return NULL; |
|
536 for (p = PyUnicode_AS_UNICODE(res), i = start; |
|
537 i<end; ++p, ++i) |
|
538 *p = Py_UNICODE_REPLACEMENT_CHARACTER; |
|
539 restuple = Py_BuildValue("(On)", res, end); |
|
540 Py_DECREF(res); |
|
541 return restuple; |
|
542 } |
|
543 else { |
|
544 wrong_exception_type(exc); |
|
545 return NULL; |
|
546 } |
|
547 } |
|
548 |
|
549 PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) |
|
550 { |
|
551 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { |
|
552 PyObject *restuple; |
|
553 PyObject *object; |
|
554 Py_ssize_t start; |
|
555 Py_ssize_t end; |
|
556 PyObject *res; |
|
557 Py_UNICODE *p; |
|
558 Py_UNICODE *startp; |
|
559 Py_UNICODE *outp; |
|
560 int ressize; |
|
561 if (PyUnicodeEncodeError_GetStart(exc, &start)) |
|
562 return NULL; |
|
563 if (PyUnicodeEncodeError_GetEnd(exc, &end)) |
|
564 return NULL; |
|
565 if (!(object = PyUnicodeEncodeError_GetObject(exc))) |
|
566 return NULL; |
|
567 startp = PyUnicode_AS_UNICODE(object); |
|
568 for (p = startp+start, ressize = 0; p < startp+end; ++p) { |
|
569 if (*p<10) |
|
570 ressize += 2+1+1; |
|
571 else if (*p<100) |
|
572 ressize += 2+2+1; |
|
573 else if (*p<1000) |
|
574 ressize += 2+3+1; |
|
575 else if (*p<10000) |
|
576 ressize += 2+4+1; |
|
577 #ifndef Py_UNICODE_WIDE |
|
578 else |
|
579 ressize += 2+5+1; |
|
580 #else |
|
581 else if (*p<100000) |
|
582 ressize += 2+5+1; |
|
583 else if (*p<1000000) |
|
584 ressize += 2+6+1; |
|
585 else |
|
586 ressize += 2+7+1; |
|
587 #endif |
|
588 } |
|
589 /* allocate replacement */ |
|
590 res = PyUnicode_FromUnicode(NULL, ressize); |
|
591 if (res == NULL) { |
|
592 Py_DECREF(object); |
|
593 return NULL; |
|
594 } |
|
595 /* generate replacement */ |
|
596 for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); |
|
597 p < startp+end; ++p) { |
|
598 Py_UNICODE c = *p; |
|
599 int digits; |
|
600 int base; |
|
601 *outp++ = '&'; |
|
602 *outp++ = '#'; |
|
603 if (*p<10) { |
|
604 digits = 1; |
|
605 base = 1; |
|
606 } |
|
607 else if (*p<100) { |
|
608 digits = 2; |
|
609 base = 10; |
|
610 } |
|
611 else if (*p<1000) { |
|
612 digits = 3; |
|
613 base = 100; |
|
614 } |
|
615 else if (*p<10000) { |
|
616 digits = 4; |
|
617 base = 1000; |
|
618 } |
|
619 #ifndef Py_UNICODE_WIDE |
|
620 else { |
|
621 digits = 5; |
|
622 base = 10000; |
|
623 } |
|
624 #else |
|
625 else if (*p<100000) { |
|
626 digits = 5; |
|
627 base = 10000; |
|
628 } |
|
629 else if (*p<1000000) { |
|
630 digits = 6; |
|
631 base = 100000; |
|
632 } |
|
633 else { |
|
634 digits = 7; |
|
635 base = 1000000; |
|
636 } |
|
637 #endif |
|
638 while (digits-->0) { |
|
639 *outp++ = '0' + c/base; |
|
640 c %= base; |
|
641 base /= 10; |
|
642 } |
|
643 *outp++ = ';'; |
|
644 } |
|
645 restuple = Py_BuildValue("(On)", res, end); |
|
646 Py_DECREF(res); |
|
647 Py_DECREF(object); |
|
648 return restuple; |
|
649 } |
|
650 else { |
|
651 wrong_exception_type(exc); |
|
652 return NULL; |
|
653 } |
|
654 } |
|
655 |
|
656 static Py_UNICODE hexdigits[] = { |
|
657 '0', '1', '2', '3', '4', '5', '6', '7', |
|
658 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' |
|
659 }; |
|
660 |
|
661 PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) |
|
662 { |
|
663 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { |
|
664 PyObject *restuple; |
|
665 PyObject *object; |
|
666 Py_ssize_t start; |
|
667 Py_ssize_t end; |
|
668 PyObject *res; |
|
669 Py_UNICODE *p; |
|
670 Py_UNICODE *startp; |
|
671 Py_UNICODE *outp; |
|
672 int ressize; |
|
673 if (PyUnicodeEncodeError_GetStart(exc, &start)) |
|
674 return NULL; |
|
675 if (PyUnicodeEncodeError_GetEnd(exc, &end)) |
|
676 return NULL; |
|
677 if (!(object = PyUnicodeEncodeError_GetObject(exc))) |
|
678 return NULL; |
|
679 startp = PyUnicode_AS_UNICODE(object); |
|
680 for (p = startp+start, ressize = 0; p < startp+end; ++p) { |
|
681 #ifdef Py_UNICODE_WIDE |
|
682 if (*p >= 0x00010000) |
|
683 ressize += 1+1+8; |
|
684 else |
|
685 #endif |
|
686 if (*p >= 0x100) { |
|
687 ressize += 1+1+4; |
|
688 } |
|
689 else |
|
690 ressize += 1+1+2; |
|
691 } |
|
692 res = PyUnicode_FromUnicode(NULL, ressize); |
|
693 if (res==NULL) |
|
694 return NULL; |
|
695 for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); |
|
696 p < startp+end; ++p) { |
|
697 Py_UNICODE c = *p; |
|
698 *outp++ = '\\'; |
|
699 #ifdef Py_UNICODE_WIDE |
|
700 if (c >= 0x00010000) { |
|
701 *outp++ = 'U'; |
|
702 *outp++ = hexdigits[(c>>28)&0xf]; |
|
703 *outp++ = hexdigits[(c>>24)&0xf]; |
|
704 *outp++ = hexdigits[(c>>20)&0xf]; |
|
705 *outp++ = hexdigits[(c>>16)&0xf]; |
|
706 *outp++ = hexdigits[(c>>12)&0xf]; |
|
707 *outp++ = hexdigits[(c>>8)&0xf]; |
|
708 } |
|
709 else |
|
710 #endif |
|
711 if (c >= 0x100) { |
|
712 *outp++ = 'u'; |
|
713 *outp++ = hexdigits[(c>>12)&0xf]; |
|
714 *outp++ = hexdigits[(c>>8)&0xf]; |
|
715 } |
|
716 else |
|
717 *outp++ = 'x'; |
|
718 *outp++ = hexdigits[(c>>4)&0xf]; |
|
719 *outp++ = hexdigits[c&0xf]; |
|
720 } |
|
721 |
|
722 restuple = Py_BuildValue("(On)", res, end); |
|
723 Py_DECREF(res); |
|
724 Py_DECREF(object); |
|
725 return restuple; |
|
726 } |
|
727 else { |
|
728 wrong_exception_type(exc); |
|
729 return NULL; |
|
730 } |
|
731 } |
|
732 #endif |
|
733 |
|
734 static PyObject *strict_errors(PyObject *self, PyObject *exc) |
|
735 { |
|
736 return PyCodec_StrictErrors(exc); |
|
737 } |
|
738 |
|
739 |
|
740 #ifdef Py_USING_UNICODE |
|
741 static PyObject *ignore_errors(PyObject *self, PyObject *exc) |
|
742 { |
|
743 return PyCodec_IgnoreErrors(exc); |
|
744 } |
|
745 |
|
746 |
|
747 static PyObject *replace_errors(PyObject *self, PyObject *exc) |
|
748 { |
|
749 return PyCodec_ReplaceErrors(exc); |
|
750 } |
|
751 |
|
752 |
|
753 static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc) |
|
754 { |
|
755 return PyCodec_XMLCharRefReplaceErrors(exc); |
|
756 } |
|
757 |
|
758 |
|
759 static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc) |
|
760 { |
|
761 return PyCodec_BackslashReplaceErrors(exc); |
|
762 } |
|
763 #endif |
|
764 |
|
765 static int _PyCodecRegistry_Init(void) |
|
766 { |
|
767 static struct { |
|
768 char *name; |
|
769 PyMethodDef def; |
|
770 } methods[] = |
|
771 { |
|
772 { |
|
773 "strict", |
|
774 { |
|
775 "strict_errors", |
|
776 strict_errors, |
|
777 METH_O |
|
778 } |
|
779 }, |
|
780 #ifdef Py_USING_UNICODE |
|
781 { |
|
782 "ignore", |
|
783 { |
|
784 "ignore_errors", |
|
785 ignore_errors, |
|
786 METH_O |
|
787 } |
|
788 }, |
|
789 { |
|
790 "replace", |
|
791 { |
|
792 "replace_errors", |
|
793 replace_errors, |
|
794 METH_O |
|
795 } |
|
796 }, |
|
797 { |
|
798 "xmlcharrefreplace", |
|
799 { |
|
800 "xmlcharrefreplace_errors", |
|
801 xmlcharrefreplace_errors, |
|
802 METH_O |
|
803 } |
|
804 }, |
|
805 { |
|
806 "backslashreplace", |
|
807 { |
|
808 "backslashreplace_errors", |
|
809 backslashreplace_errors, |
|
810 METH_O |
|
811 } |
|
812 } |
|
813 #endif |
|
814 }; |
|
815 |
|
816 PyInterpreterState *interp = PyThreadState_GET()->interp; |
|
817 PyObject *mod; |
|
818 unsigned i; |
|
819 |
|
820 if (interp->codec_search_path != NULL) |
|
821 return 0; |
|
822 |
|
823 interp->codec_search_path = PyList_New(0); |
|
824 interp->codec_search_cache = PyDict_New(); |
|
825 interp->codec_error_registry = PyDict_New(); |
|
826 |
|
827 if (interp->codec_error_registry) { |
|
828 for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) { |
|
829 PyObject *func = PyCFunction_New(&methods[i].def, NULL); |
|
830 int res; |
|
831 if (!func) |
|
832 Py_FatalError("can't initialize codec error registry"); |
|
833 res = PyCodec_RegisterError(methods[i].name, func); |
|
834 Py_DECREF(func); |
|
835 if (res) |
|
836 Py_FatalError("can't initialize codec error registry"); |
|
837 } |
|
838 } |
|
839 |
|
840 if (interp->codec_search_path == NULL || |
|
841 interp->codec_search_cache == NULL || |
|
842 interp->codec_error_registry == NULL) |
|
843 Py_FatalError("can't initialize codec registry"); |
|
844 |
|
845 mod = PyImport_ImportModuleLevel("encodings", NULL, NULL, NULL, 0); |
|
846 if (mod == NULL) { |
|
847 if (PyErr_ExceptionMatches(PyExc_ImportError)) { |
|
848 /* Ignore ImportErrors... this is done so that |
|
849 distributions can disable the encodings package. Note |
|
850 that other errors are not masked, e.g. SystemErrors |
|
851 raised to inform the user of an error in the Python |
|
852 configuration are still reported back to the user. */ |
|
853 PyErr_Clear(); |
|
854 return 0; |
|
855 } |
|
856 return -1; |
|
857 } |
|
858 Py_DECREF(mod); |
|
859 return 0; |
|
860 } |