|
1 /* |
|
2 * multibytecodec.c: Common Multibyte Codec Implementation |
|
3 * |
|
4 * Written by Hye-Shik Chang <perky@FreeBSD.org> |
|
5 */ |
|
6 |
|
7 #define PY_SSIZE_T_CLEAN |
|
8 #include "Python.h" |
|
9 #include "structmember.h" |
|
10 #include "multibytecodec.h" |
|
11 |
|
12 typedef struct { |
|
13 const Py_UNICODE *inbuf, *inbuf_top, *inbuf_end; |
|
14 unsigned char *outbuf, *outbuf_end; |
|
15 PyObject *excobj, *outobj; |
|
16 } MultibyteEncodeBuffer; |
|
17 |
|
18 typedef struct { |
|
19 const unsigned char *inbuf, *inbuf_top, *inbuf_end; |
|
20 Py_UNICODE *outbuf, *outbuf_end; |
|
21 PyObject *excobj, *outobj; |
|
22 } MultibyteDecodeBuffer; |
|
23 |
|
24 PyDoc_STRVAR(MultibyteCodec_Encode__doc__, |
|
25 "I.encode(unicode[, errors]) -> (string, length consumed)\n\ |
|
26 \n\ |
|
27 Return an encoded string version of `unicode'. errors may be given to\n\ |
|
28 set a different error handling scheme. Default is 'strict' meaning that\n\ |
|
29 encoding errors raise a UnicodeEncodeError. Other possible values are\n\ |
|
30 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\ |
|
31 registered with codecs.register_error that can handle UnicodeEncodeErrors."); |
|
32 |
|
33 PyDoc_STRVAR(MultibyteCodec_Decode__doc__, |
|
34 "I.decode(string[, errors]) -> (unicodeobject, length consumed)\n\ |
|
35 \n\ |
|
36 Decodes `string' using I, an MultibyteCodec instance. errors may be given\n\ |
|
37 to set a different error handling scheme. Default is 'strict' meaning\n\ |
|
38 that encoding errors raise a UnicodeDecodeError. Other possible values\n\ |
|
39 are 'ignore' and 'replace' as well as any other name registered with\n\ |
|
40 codecs.register_error that is able to handle UnicodeDecodeErrors."); |
|
41 |
|
42 static char *codeckwarglist[] = {"input", "errors", NULL}; |
|
43 static char *incnewkwarglist[] = {"errors", NULL}; |
|
44 static char *incrementalkwarglist[] = {"input", "final", NULL}; |
|
45 static char *streamkwarglist[] = {"stream", "errors", NULL}; |
|
46 |
|
47 static PyObject *multibytecodec_encode(MultibyteCodec *, |
|
48 MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t, |
|
49 PyObject *, int); |
|
50 |
|
51 #define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */ |
|
52 |
|
53 static PyObject * |
|
54 make_tuple(PyObject *object, Py_ssize_t len) |
|
55 { |
|
56 PyObject *v, *w; |
|
57 |
|
58 if (object == NULL) |
|
59 return NULL; |
|
60 |
|
61 v = PyTuple_New(2); |
|
62 if (v == NULL) { |
|
63 Py_DECREF(object); |
|
64 return NULL; |
|
65 } |
|
66 PyTuple_SET_ITEM(v, 0, object); |
|
67 |
|
68 w = PyInt_FromSsize_t(len); |
|
69 if (w == NULL) { |
|
70 Py_DECREF(v); |
|
71 return NULL; |
|
72 } |
|
73 PyTuple_SET_ITEM(v, 1, w); |
|
74 |
|
75 return v; |
|
76 } |
|
77 |
|
78 static PyObject * |
|
79 internal_error_callback(const char *errors) |
|
80 { |
|
81 if (errors == NULL || strcmp(errors, "strict") == 0) |
|
82 return ERROR_STRICT; |
|
83 else if (strcmp(errors, "ignore") == 0) |
|
84 return ERROR_IGNORE; |
|
85 else if (strcmp(errors, "replace") == 0) |
|
86 return ERROR_REPLACE; |
|
87 else |
|
88 return PyString_FromString(errors); |
|
89 } |
|
90 |
|
91 static PyObject * |
|
92 call_error_callback(PyObject *errors, PyObject *exc) |
|
93 { |
|
94 PyObject *args, *cb, *r; |
|
95 |
|
96 assert(PyString_Check(errors)); |
|
97 cb = PyCodec_LookupError(PyString_AS_STRING(errors)); |
|
98 if (cb == NULL) |
|
99 return NULL; |
|
100 |
|
101 args = PyTuple_New(1); |
|
102 if (args == NULL) { |
|
103 Py_DECREF(cb); |
|
104 return NULL; |
|
105 } |
|
106 |
|
107 PyTuple_SET_ITEM(args, 0, exc); |
|
108 Py_INCREF(exc); |
|
109 |
|
110 r = PyObject_CallObject(cb, args); |
|
111 Py_DECREF(args); |
|
112 Py_DECREF(cb); |
|
113 return r; |
|
114 } |
|
115 |
|
116 static PyObject * |
|
117 codecctx_errors_get(MultibyteStatefulCodecContext *self) |
|
118 { |
|
119 const char *errors; |
|
120 |
|
121 if (self->errors == ERROR_STRICT) |
|
122 errors = "strict"; |
|
123 else if (self->errors == ERROR_IGNORE) |
|
124 errors = "ignore"; |
|
125 else if (self->errors == ERROR_REPLACE) |
|
126 errors = "replace"; |
|
127 else { |
|
128 Py_INCREF(self->errors); |
|
129 return self->errors; |
|
130 } |
|
131 |
|
132 return PyString_FromString(errors); |
|
133 } |
|
134 |
|
135 static int |
|
136 codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value, |
|
137 void *closure) |
|
138 { |
|
139 PyObject *cb; |
|
140 |
|
141 if (!PyString_Check(value)) { |
|
142 PyErr_SetString(PyExc_TypeError, "errors must be a string"); |
|
143 return -1; |
|
144 } |
|
145 |
|
146 cb = internal_error_callback(PyString_AS_STRING(value)); |
|
147 if (cb == NULL) |
|
148 return -1; |
|
149 |
|
150 ERROR_DECREF(self->errors); |
|
151 self->errors = cb; |
|
152 return 0; |
|
153 } |
|
154 |
|
155 /* This getset handlers list is used by all the stateful codec objects */ |
|
156 static PyGetSetDef codecctx_getsets[] = { |
|
157 {"errors", (getter)codecctx_errors_get, |
|
158 (setter)codecctx_errors_set, |
|
159 PyDoc_STR("how to treat errors")}, |
|
160 {NULL,} |
|
161 }; |
|
162 |
|
163 static int |
|
164 expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize) |
|
165 { |
|
166 Py_ssize_t orgpos, orgsize, incsize; |
|
167 |
|
168 orgpos = (Py_ssize_t)((char *)buf->outbuf - |
|
169 PyString_AS_STRING(buf->outobj)); |
|
170 orgsize = PyString_GET_SIZE(buf->outobj); |
|
171 incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize); |
|
172 |
|
173 if (orgsize > PY_SSIZE_T_MAX - incsize) |
|
174 return -1; |
|
175 |
|
176 if (_PyString_Resize(&buf->outobj, orgsize + incsize) == -1) |
|
177 return -1; |
|
178 |
|
179 buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj) +orgpos; |
|
180 buf->outbuf_end = (unsigned char *)PyString_AS_STRING(buf->outobj) |
|
181 + PyString_GET_SIZE(buf->outobj); |
|
182 |
|
183 return 0; |
|
184 } |
|
185 #define REQUIRE_ENCODEBUFFER(buf, s) { \ |
|
186 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ |
|
187 if (expand_encodebuffer(buf, s) == -1) \ |
|
188 goto errorexit; \ |
|
189 } |
|
190 |
|
191 static int |
|
192 expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize) |
|
193 { |
|
194 Py_ssize_t orgpos, orgsize; |
|
195 |
|
196 orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj)); |
|
197 orgsize = PyUnicode_GET_SIZE(buf->outobj); |
|
198 if (PyUnicode_Resize(&buf->outobj, orgsize + ( |
|
199 esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) |
|
200 return -1; |
|
201 |
|
202 buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos; |
|
203 buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj) |
|
204 + PyUnicode_GET_SIZE(buf->outobj); |
|
205 |
|
206 return 0; |
|
207 } |
|
208 #define REQUIRE_DECODEBUFFER(buf, s) { \ |
|
209 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ |
|
210 if (expand_decodebuffer(buf, s) == -1) \ |
|
211 goto errorexit; \ |
|
212 } |
|
213 |
|
214 |
|
215 /** |
|
216 * MultibyteCodec object |
|
217 */ |
|
218 |
|
219 static int |
|
220 multibytecodec_encerror(MultibyteCodec *codec, |
|
221 MultibyteCodec_State *state, |
|
222 MultibyteEncodeBuffer *buf, |
|
223 PyObject *errors, Py_ssize_t e) |
|
224 { |
|
225 PyObject *retobj = NULL, *retstr = NULL, *tobj; |
|
226 Py_ssize_t retstrsize, newpos; |
|
227 Py_ssize_t esize, start, end; |
|
228 const char *reason; |
|
229 |
|
230 if (e > 0) { |
|
231 reason = "illegal multibyte sequence"; |
|
232 esize = e; |
|
233 } |
|
234 else { |
|
235 switch (e) { |
|
236 case MBERR_TOOSMALL: |
|
237 REQUIRE_ENCODEBUFFER(buf, -1); |
|
238 return 0; /* retry it */ |
|
239 case MBERR_TOOFEW: |
|
240 reason = "incomplete multibyte sequence"; |
|
241 esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); |
|
242 break; |
|
243 case MBERR_INTERNAL: |
|
244 PyErr_SetString(PyExc_RuntimeError, |
|
245 "internal codec error"); |
|
246 return -1; |
|
247 default: |
|
248 PyErr_SetString(PyExc_RuntimeError, |
|
249 "unknown runtime error"); |
|
250 return -1; |
|
251 } |
|
252 } |
|
253 |
|
254 if (errors == ERROR_REPLACE) { |
|
255 const Py_UNICODE replchar = '?', *inbuf = &replchar; |
|
256 Py_ssize_t r; |
|
257 |
|
258 for (;;) { |
|
259 Py_ssize_t outleft; |
|
260 |
|
261 outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); |
|
262 r = codec->encode(state, codec->config, &inbuf, 1, |
|
263 &buf->outbuf, outleft, 0); |
|
264 if (r == MBERR_TOOSMALL) { |
|
265 REQUIRE_ENCODEBUFFER(buf, -1); |
|
266 continue; |
|
267 } |
|
268 else |
|
269 break; |
|
270 } |
|
271 |
|
272 if (r != 0) { |
|
273 REQUIRE_ENCODEBUFFER(buf, 1); |
|
274 *buf->outbuf++ = '?'; |
|
275 } |
|
276 } |
|
277 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { |
|
278 buf->inbuf += esize; |
|
279 return 0; |
|
280 } |
|
281 |
|
282 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top); |
|
283 end = start + esize; |
|
284 |
|
285 /* use cached exception object if available */ |
|
286 if (buf->excobj == NULL) { |
|
287 buf->excobj = PyUnicodeEncodeError_Create(codec->encoding, |
|
288 buf->inbuf_top, |
|
289 buf->inbuf_end - buf->inbuf_top, |
|
290 start, end, reason); |
|
291 if (buf->excobj == NULL) |
|
292 goto errorexit; |
|
293 } |
|
294 else |
|
295 if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 || |
|
296 PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 || |
|
297 PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0) |
|
298 goto errorexit; |
|
299 |
|
300 if (errors == ERROR_STRICT) { |
|
301 PyCodec_StrictErrors(buf->excobj); |
|
302 goto errorexit; |
|
303 } |
|
304 |
|
305 retobj = call_error_callback(errors, buf->excobj); |
|
306 if (retobj == NULL) |
|
307 goto errorexit; |
|
308 |
|
309 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || |
|
310 !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) || |
|
311 !(PyInt_Check(PyTuple_GET_ITEM(retobj, 1)) || |
|
312 PyLong_Check(PyTuple_GET_ITEM(retobj, 1)))) { |
|
313 PyErr_SetString(PyExc_TypeError, |
|
314 "encoding error handler must return " |
|
315 "(unicode, int) tuple"); |
|
316 goto errorexit; |
|
317 } |
|
318 |
|
319 { |
|
320 const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj); |
|
321 |
|
322 retstr = multibytecodec_encode(codec, state, &uraw, |
|
323 PyUnicode_GET_SIZE(tobj), ERROR_STRICT, |
|
324 MBENC_FLUSH); |
|
325 if (retstr == NULL) |
|
326 goto errorexit; |
|
327 } |
|
328 |
|
329 retstrsize = PyString_GET_SIZE(retstr); |
|
330 REQUIRE_ENCODEBUFFER(buf, retstrsize); |
|
331 |
|
332 memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize); |
|
333 buf->outbuf += retstrsize; |
|
334 |
|
335 newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); |
|
336 if (newpos < 0 && !PyErr_Occurred()) |
|
337 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top); |
|
338 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { |
|
339 PyErr_Clear(); |
|
340 PyErr_Format(PyExc_IndexError, |
|
341 "position %zd from error handler out of bounds", |
|
342 newpos); |
|
343 goto errorexit; |
|
344 } |
|
345 buf->inbuf = buf->inbuf_top + newpos; |
|
346 |
|
347 Py_DECREF(retobj); |
|
348 Py_DECREF(retstr); |
|
349 return 0; |
|
350 |
|
351 errorexit: |
|
352 Py_XDECREF(retobj); |
|
353 Py_XDECREF(retstr); |
|
354 return -1; |
|
355 } |
|
356 |
|
357 static int |
|
358 multibytecodec_decerror(MultibyteCodec *codec, |
|
359 MultibyteCodec_State *state, |
|
360 MultibyteDecodeBuffer *buf, |
|
361 PyObject *errors, Py_ssize_t e) |
|
362 { |
|
363 PyObject *retobj = NULL, *retuni = NULL; |
|
364 Py_ssize_t retunisize, newpos; |
|
365 const char *reason; |
|
366 Py_ssize_t esize, start, end; |
|
367 |
|
368 if (e > 0) { |
|
369 reason = "illegal multibyte sequence"; |
|
370 esize = e; |
|
371 } |
|
372 else { |
|
373 switch (e) { |
|
374 case MBERR_TOOSMALL: |
|
375 REQUIRE_DECODEBUFFER(buf, -1); |
|
376 return 0; /* retry it */ |
|
377 case MBERR_TOOFEW: |
|
378 reason = "incomplete multibyte sequence"; |
|
379 esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); |
|
380 break; |
|
381 case MBERR_INTERNAL: |
|
382 PyErr_SetString(PyExc_RuntimeError, |
|
383 "internal codec error"); |
|
384 return -1; |
|
385 default: |
|
386 PyErr_SetString(PyExc_RuntimeError, |
|
387 "unknown runtime error"); |
|
388 return -1; |
|
389 } |
|
390 } |
|
391 |
|
392 if (errors == ERROR_REPLACE) { |
|
393 REQUIRE_DECODEBUFFER(buf, 1); |
|
394 *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER; |
|
395 } |
|
396 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { |
|
397 buf->inbuf += esize; |
|
398 return 0; |
|
399 } |
|
400 |
|
401 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top); |
|
402 end = start + esize; |
|
403 |
|
404 /* use cached exception object if available */ |
|
405 if (buf->excobj == NULL) { |
|
406 buf->excobj = PyUnicodeDecodeError_Create(codec->encoding, |
|
407 (const char *)buf->inbuf_top, |
|
408 (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top), |
|
409 start, end, reason); |
|
410 if (buf->excobj == NULL) |
|
411 goto errorexit; |
|
412 } |
|
413 else |
|
414 if (PyUnicodeDecodeError_SetStart(buf->excobj, start) || |
|
415 PyUnicodeDecodeError_SetEnd(buf->excobj, end) || |
|
416 PyUnicodeDecodeError_SetReason(buf->excobj, reason)) |
|
417 goto errorexit; |
|
418 |
|
419 if (errors == ERROR_STRICT) { |
|
420 PyCodec_StrictErrors(buf->excobj); |
|
421 goto errorexit; |
|
422 } |
|
423 |
|
424 retobj = call_error_callback(errors, buf->excobj); |
|
425 if (retobj == NULL) |
|
426 goto errorexit; |
|
427 |
|
428 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || |
|
429 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || |
|
430 !(PyInt_Check(PyTuple_GET_ITEM(retobj, 1)) || |
|
431 PyLong_Check(PyTuple_GET_ITEM(retobj, 1)))) { |
|
432 PyErr_SetString(PyExc_TypeError, |
|
433 "decoding error handler must return " |
|
434 "(unicode, int) tuple"); |
|
435 goto errorexit; |
|
436 } |
|
437 |
|
438 retunisize = PyUnicode_GET_SIZE(retuni); |
|
439 if (retunisize > 0) { |
|
440 REQUIRE_DECODEBUFFER(buf, retunisize); |
|
441 memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni), |
|
442 retunisize * Py_UNICODE_SIZE); |
|
443 buf->outbuf += retunisize; |
|
444 } |
|
445 |
|
446 newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); |
|
447 if (newpos < 0 && !PyErr_Occurred()) |
|
448 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top); |
|
449 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { |
|
450 PyErr_Clear(); |
|
451 PyErr_Format(PyExc_IndexError, |
|
452 "position %zd from error handler out of bounds", |
|
453 newpos); |
|
454 goto errorexit; |
|
455 } |
|
456 buf->inbuf = buf->inbuf_top + newpos; |
|
457 Py_DECREF(retobj); |
|
458 return 0; |
|
459 |
|
460 errorexit: |
|
461 Py_XDECREF(retobj); |
|
462 return -1; |
|
463 } |
|
464 |
|
465 static PyObject * |
|
466 multibytecodec_encode(MultibyteCodec *codec, |
|
467 MultibyteCodec_State *state, |
|
468 const Py_UNICODE **data, Py_ssize_t datalen, |
|
469 PyObject *errors, int flags) |
|
470 { |
|
471 MultibyteEncodeBuffer buf; |
|
472 Py_ssize_t finalsize, r = 0; |
|
473 |
|
474 if (datalen == 0) |
|
475 return PyString_FromString(""); |
|
476 |
|
477 buf.excobj = NULL; |
|
478 buf.inbuf = buf.inbuf_top = *data; |
|
479 buf.inbuf_end = buf.inbuf_top + datalen; |
|
480 |
|
481 if (datalen > (PY_SSIZE_T_MAX - 16) / 2) { |
|
482 PyErr_NoMemory(); |
|
483 goto errorexit; |
|
484 } |
|
485 |
|
486 buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16); |
|
487 if (buf.outobj == NULL) |
|
488 goto errorexit; |
|
489 buf.outbuf = (unsigned char *)PyString_AS_STRING(buf.outobj); |
|
490 buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj); |
|
491 |
|
492 while (buf.inbuf < buf.inbuf_end) { |
|
493 Py_ssize_t inleft, outleft; |
|
494 |
|
495 /* we don't reuse inleft and outleft here. |
|
496 * error callbacks can relocate the cursor anywhere on buffer*/ |
|
497 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); |
|
498 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); |
|
499 r = codec->encode(state, codec->config, &buf.inbuf, inleft, |
|
500 &buf.outbuf, outleft, flags); |
|
501 *data = buf.inbuf; |
|
502 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH))) |
|
503 break; |
|
504 else if (multibytecodec_encerror(codec, state, &buf, errors,r)) |
|
505 goto errorexit; |
|
506 else if (r == MBERR_TOOFEW) |
|
507 break; |
|
508 } |
|
509 |
|
510 if (codec->encreset != NULL) |
|
511 for (;;) { |
|
512 Py_ssize_t outleft; |
|
513 |
|
514 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); |
|
515 r = codec->encreset(state, codec->config, &buf.outbuf, |
|
516 outleft); |
|
517 if (r == 0) |
|
518 break; |
|
519 else if (multibytecodec_encerror(codec, state, |
|
520 &buf, errors, r)) |
|
521 goto errorexit; |
|
522 } |
|
523 |
|
524 finalsize = (Py_ssize_t)((char *)buf.outbuf - |
|
525 PyString_AS_STRING(buf.outobj)); |
|
526 |
|
527 if (finalsize != PyString_GET_SIZE(buf.outobj)) |
|
528 if (_PyString_Resize(&buf.outobj, finalsize) == -1) |
|
529 goto errorexit; |
|
530 |
|
531 Py_XDECREF(buf.excobj); |
|
532 return buf.outobj; |
|
533 |
|
534 errorexit: |
|
535 Py_XDECREF(buf.excobj); |
|
536 Py_XDECREF(buf.outobj); |
|
537 return NULL; |
|
538 } |
|
539 |
|
540 static PyObject * |
|
541 MultibyteCodec_Encode(MultibyteCodecObject *self, |
|
542 PyObject *args, PyObject *kwargs) |
|
543 { |
|
544 MultibyteCodec_State state; |
|
545 Py_UNICODE *data; |
|
546 PyObject *errorcb, *r, *arg, *ucvt; |
|
547 const char *errors = NULL; |
|
548 Py_ssize_t datalen; |
|
549 |
|
550 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode", |
|
551 codeckwarglist, &arg, &errors)) |
|
552 return NULL; |
|
553 |
|
554 if (PyUnicode_Check(arg)) |
|
555 ucvt = NULL; |
|
556 else { |
|
557 arg = ucvt = PyObject_Unicode(arg); |
|
558 if (arg == NULL) |
|
559 return NULL; |
|
560 else if (!PyUnicode_Check(arg)) { |
|
561 PyErr_SetString(PyExc_TypeError, |
|
562 "couldn't convert the object to unicode."); |
|
563 Py_DECREF(ucvt); |
|
564 return NULL; |
|
565 } |
|
566 } |
|
567 |
|
568 data = PyUnicode_AS_UNICODE(arg); |
|
569 datalen = PyUnicode_GET_SIZE(arg); |
|
570 |
|
571 errorcb = internal_error_callback(errors); |
|
572 if (errorcb == NULL) { |
|
573 Py_XDECREF(ucvt); |
|
574 return NULL; |
|
575 } |
|
576 |
|
577 if (self->codec->encinit != NULL && |
|
578 self->codec->encinit(&state, self->codec->config) != 0) |
|
579 goto errorexit; |
|
580 r = multibytecodec_encode(self->codec, &state, |
|
581 (const Py_UNICODE **)&data, datalen, errorcb, |
|
582 MBENC_FLUSH | MBENC_RESET); |
|
583 if (r == NULL) |
|
584 goto errorexit; |
|
585 |
|
586 ERROR_DECREF(errorcb); |
|
587 Py_XDECREF(ucvt); |
|
588 return make_tuple(r, datalen); |
|
589 |
|
590 errorexit: |
|
591 ERROR_DECREF(errorcb); |
|
592 Py_XDECREF(ucvt); |
|
593 return NULL; |
|
594 } |
|
595 |
|
596 static PyObject * |
|
597 MultibyteCodec_Decode(MultibyteCodecObject *self, |
|
598 PyObject *args, PyObject *kwargs) |
|
599 { |
|
600 MultibyteCodec_State state; |
|
601 MultibyteDecodeBuffer buf; |
|
602 PyObject *errorcb; |
|
603 Py_buffer pdata; |
|
604 const char *data, *errors = NULL; |
|
605 Py_ssize_t datalen, finalsize; |
|
606 |
|
607 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|z:decode", |
|
608 codeckwarglist, &pdata, &errors)) |
|
609 return NULL; |
|
610 data = pdata.buf; |
|
611 datalen = pdata.len; |
|
612 |
|
613 errorcb = internal_error_callback(errors); |
|
614 if (errorcb == NULL) { |
|
615 PyBuffer_Release(&pdata); |
|
616 return NULL; |
|
617 } |
|
618 |
|
619 if (datalen == 0) { |
|
620 PyBuffer_Release(&pdata); |
|
621 ERROR_DECREF(errorcb); |
|
622 return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0); |
|
623 } |
|
624 |
|
625 buf.excobj = NULL; |
|
626 buf.inbuf = buf.inbuf_top = (unsigned char *)data; |
|
627 buf.inbuf_end = buf.inbuf_top + datalen; |
|
628 buf.outobj = PyUnicode_FromUnicode(NULL, datalen); |
|
629 if (buf.outobj == NULL) |
|
630 goto errorexit; |
|
631 buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj); |
|
632 buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj); |
|
633 |
|
634 if (self->codec->decinit != NULL && |
|
635 self->codec->decinit(&state, self->codec->config) != 0) |
|
636 goto errorexit; |
|
637 |
|
638 while (buf.inbuf < buf.inbuf_end) { |
|
639 Py_ssize_t inleft, outleft, r; |
|
640 |
|
641 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); |
|
642 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); |
|
643 |
|
644 r = self->codec->decode(&state, self->codec->config, |
|
645 &buf.inbuf, inleft, &buf.outbuf, outleft); |
|
646 if (r == 0) |
|
647 break; |
|
648 else if (multibytecodec_decerror(self->codec, &state, |
|
649 &buf, errorcb, r)) |
|
650 goto errorexit; |
|
651 } |
|
652 |
|
653 finalsize = (Py_ssize_t)(buf.outbuf - |
|
654 PyUnicode_AS_UNICODE(buf.outobj)); |
|
655 |
|
656 if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) |
|
657 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) |
|
658 goto errorexit; |
|
659 |
|
660 PyBuffer_Release(&pdata); |
|
661 Py_XDECREF(buf.excobj); |
|
662 ERROR_DECREF(errorcb); |
|
663 return make_tuple(buf.outobj, datalen); |
|
664 |
|
665 errorexit: |
|
666 PyBuffer_Release(&pdata); |
|
667 ERROR_DECREF(errorcb); |
|
668 Py_XDECREF(buf.excobj); |
|
669 Py_XDECREF(buf.outobj); |
|
670 |
|
671 return NULL; |
|
672 } |
|
673 |
|
674 static struct PyMethodDef multibytecodec_methods[] = { |
|
675 {"encode", (PyCFunction)MultibyteCodec_Encode, |
|
676 METH_VARARGS | METH_KEYWORDS, |
|
677 MultibyteCodec_Encode__doc__}, |
|
678 {"decode", (PyCFunction)MultibyteCodec_Decode, |
|
679 METH_VARARGS | METH_KEYWORDS, |
|
680 MultibyteCodec_Decode__doc__}, |
|
681 {NULL, NULL}, |
|
682 }; |
|
683 |
|
684 static void |
|
685 multibytecodec_dealloc(MultibyteCodecObject *self) |
|
686 { |
|
687 PyObject_Del(self); |
|
688 } |
|
689 |
|
690 static PyTypeObject MultibyteCodec_Type = { |
|
691 PyVarObject_HEAD_INIT(NULL, 0) |
|
692 "MultibyteCodec", /* tp_name */ |
|
693 sizeof(MultibyteCodecObject), /* tp_basicsize */ |
|
694 0, /* tp_itemsize */ |
|
695 /* methods */ |
|
696 (destructor)multibytecodec_dealloc, /* tp_dealloc */ |
|
697 0, /* tp_print */ |
|
698 0, /* tp_getattr */ |
|
699 0, /* tp_setattr */ |
|
700 0, /* tp_compare */ |
|
701 0, /* tp_repr */ |
|
702 0, /* tp_as_number */ |
|
703 0, /* tp_as_sequence */ |
|
704 0, /* tp_as_mapping */ |
|
705 0, /* tp_hash */ |
|
706 0, /* tp_call */ |
|
707 0, /* tp_str */ |
|
708 PyObject_GenericGetAttr, /* tp_getattro */ |
|
709 0, /* tp_setattro */ |
|
710 0, /* tp_as_buffer */ |
|
711 Py_TPFLAGS_DEFAULT, /* tp_flags */ |
|
712 0, /* tp_doc */ |
|
713 0, /* tp_traverse */ |
|
714 0, /* tp_clear */ |
|
715 0, /* tp_richcompare */ |
|
716 0, /* tp_weaklistoffset */ |
|
717 0, /* tp_iter */ |
|
718 0, /* tp_iterext */ |
|
719 multibytecodec_methods, /* tp_methods */ |
|
720 }; |
|
721 |
|
722 |
|
723 /** |
|
724 * Utility functions for stateful codec mechanism |
|
725 */ |
|
726 |
|
727 #define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o)) |
|
728 #define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o)) |
|
729 |
|
730 static PyObject * |
|
731 encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, |
|
732 PyObject *unistr, int final) |
|
733 { |
|
734 PyObject *ucvt, *r = NULL; |
|
735 Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL; |
|
736 Py_ssize_t datalen, origpending; |
|
737 |
|
738 if (PyUnicode_Check(unistr)) |
|
739 ucvt = NULL; |
|
740 else { |
|
741 unistr = ucvt = PyObject_Unicode(unistr); |
|
742 if (unistr == NULL) |
|
743 return NULL; |
|
744 else if (!PyUnicode_Check(unistr)) { |
|
745 PyErr_SetString(PyExc_TypeError, |
|
746 "couldn't convert the object to unicode."); |
|
747 Py_DECREF(ucvt); |
|
748 return NULL; |
|
749 } |
|
750 } |
|
751 |
|
752 datalen = PyUnicode_GET_SIZE(unistr); |
|
753 origpending = ctx->pendingsize; |
|
754 |
|
755 if (origpending > 0) { |
|
756 if (datalen > PY_SSIZE_T_MAX - ctx->pendingsize) { |
|
757 PyErr_NoMemory(); |
|
758 /* inbuf_tmp == NULL */ |
|
759 goto errorexit; |
|
760 } |
|
761 inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize); |
|
762 if (inbuf_tmp == NULL) |
|
763 goto errorexit; |
|
764 memcpy(inbuf_tmp, ctx->pending, |
|
765 Py_UNICODE_SIZE * ctx->pendingsize); |
|
766 memcpy(inbuf_tmp + ctx->pendingsize, |
|
767 PyUnicode_AS_UNICODE(unistr), |
|
768 Py_UNICODE_SIZE * datalen); |
|
769 datalen += ctx->pendingsize; |
|
770 ctx->pendingsize = 0; |
|
771 inbuf = inbuf_tmp; |
|
772 } |
|
773 else |
|
774 inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr); |
|
775 |
|
776 inbuf_end = inbuf + datalen; |
|
777 |
|
778 r = multibytecodec_encode(ctx->codec, &ctx->state, |
|
779 (const Py_UNICODE **)&inbuf, |
|
780 datalen, ctx->errors, final ? MBENC_FLUSH : 0); |
|
781 if (r == NULL) { |
|
782 /* recover the original pending buffer */ |
|
783 if (origpending > 0) |
|
784 memcpy(ctx->pending, inbuf_tmp, |
|
785 Py_UNICODE_SIZE * origpending); |
|
786 ctx->pendingsize = origpending; |
|
787 goto errorexit; |
|
788 } |
|
789 |
|
790 if (inbuf < inbuf_end) { |
|
791 ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf); |
|
792 if (ctx->pendingsize > MAXENCPENDING) { |
|
793 /* normal codecs can't reach here */ |
|
794 ctx->pendingsize = 0; |
|
795 PyErr_SetString(PyExc_UnicodeError, |
|
796 "pending buffer overflow"); |
|
797 goto errorexit; |
|
798 } |
|
799 memcpy(ctx->pending, inbuf, |
|
800 ctx->pendingsize * Py_UNICODE_SIZE); |
|
801 } |
|
802 |
|
803 if (inbuf_tmp != NULL) |
|
804 PyMem_Del(inbuf_tmp); |
|
805 Py_XDECREF(ucvt); |
|
806 return r; |
|
807 |
|
808 errorexit: |
|
809 if (inbuf_tmp != NULL) |
|
810 PyMem_Del(inbuf_tmp); |
|
811 Py_XDECREF(r); |
|
812 Py_XDECREF(ucvt); |
|
813 return NULL; |
|
814 } |
|
815 |
|
816 static int |
|
817 decoder_append_pending(MultibyteStatefulDecoderContext *ctx, |
|
818 MultibyteDecodeBuffer *buf) |
|
819 { |
|
820 Py_ssize_t npendings; |
|
821 |
|
822 npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); |
|
823 if (npendings + ctx->pendingsize > MAXDECPENDING || |
|
824 npendings > PY_SSIZE_T_MAX - ctx->pendingsize) { |
|
825 PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow"); |
|
826 return -1; |
|
827 } |
|
828 memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings); |
|
829 ctx->pendingsize += npendings; |
|
830 return 0; |
|
831 } |
|
832 |
|
833 static int |
|
834 decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data, |
|
835 Py_ssize_t size) |
|
836 { |
|
837 buf->inbuf = buf->inbuf_top = (const unsigned char *)data; |
|
838 buf->inbuf_end = buf->inbuf_top + size; |
|
839 if (buf->outobj == NULL) { /* only if outobj is not allocated yet */ |
|
840 buf->outobj = PyUnicode_FromUnicode(NULL, size); |
|
841 if (buf->outobj == NULL) |
|
842 return -1; |
|
843 buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj); |
|
844 buf->outbuf_end = buf->outbuf + |
|
845 PyUnicode_GET_SIZE(buf->outobj); |
|
846 } |
|
847 |
|
848 return 0; |
|
849 } |
|
850 |
|
851 static int |
|
852 decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx, |
|
853 MultibyteDecodeBuffer *buf) |
|
854 { |
|
855 while (buf->inbuf < buf->inbuf_end) { |
|
856 Py_ssize_t inleft, outleft; |
|
857 Py_ssize_t r; |
|
858 |
|
859 inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); |
|
860 outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); |
|
861 |
|
862 r = ctx->codec->decode(&ctx->state, ctx->codec->config, |
|
863 &buf->inbuf, inleft, &buf->outbuf, outleft); |
|
864 if (r == 0 || r == MBERR_TOOFEW) |
|
865 break; |
|
866 else if (multibytecodec_decerror(ctx->codec, &ctx->state, |
|
867 buf, ctx->errors, r)) |
|
868 return -1; |
|
869 } |
|
870 return 0; |
|
871 } |
|
872 |
|
873 |
|
874 /** |
|
875 * MultibyteIncrementalEncoder object |
|
876 */ |
|
877 |
|
878 static PyObject * |
|
879 mbiencoder_encode(MultibyteIncrementalEncoderObject *self, |
|
880 PyObject *args, PyObject *kwargs) |
|
881 { |
|
882 PyObject *data; |
|
883 int final = 0; |
|
884 |
|
885 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:encode", |
|
886 incrementalkwarglist, &data, &final)) |
|
887 return NULL; |
|
888 |
|
889 return encoder_encode_stateful(STATEFUL_ECTX(self), data, final); |
|
890 } |
|
891 |
|
892 static PyObject * |
|
893 mbiencoder_reset(MultibyteIncrementalEncoderObject *self) |
|
894 { |
|
895 if (self->codec->decreset != NULL && |
|
896 self->codec->decreset(&self->state, self->codec->config) != 0) |
|
897 return NULL; |
|
898 self->pendingsize = 0; |
|
899 |
|
900 Py_RETURN_NONE; |
|
901 } |
|
902 |
|
903 static struct PyMethodDef mbiencoder_methods[] = { |
|
904 {"encode", (PyCFunction)mbiencoder_encode, |
|
905 METH_VARARGS | METH_KEYWORDS, NULL}, |
|
906 {"reset", (PyCFunction)mbiencoder_reset, |
|
907 METH_NOARGS, NULL}, |
|
908 {NULL, NULL}, |
|
909 }; |
|
910 |
|
911 static PyObject * |
|
912 mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) |
|
913 { |
|
914 MultibyteIncrementalEncoderObject *self; |
|
915 PyObject *codec = NULL; |
|
916 char *errors = NULL; |
|
917 |
|
918 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder", |
|
919 incnewkwarglist, &errors)) |
|
920 return NULL; |
|
921 |
|
922 self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0); |
|
923 if (self == NULL) |
|
924 return NULL; |
|
925 |
|
926 codec = PyObject_GetAttrString((PyObject *)type, "codec"); |
|
927 if (codec == NULL) |
|
928 goto errorexit; |
|
929 if (!MultibyteCodec_Check(codec)) { |
|
930 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); |
|
931 goto errorexit; |
|
932 } |
|
933 |
|
934 self->codec = ((MultibyteCodecObject *)codec)->codec; |
|
935 self->pendingsize = 0; |
|
936 self->errors = internal_error_callback(errors); |
|
937 if (self->errors == NULL) |
|
938 goto errorexit; |
|
939 if (self->codec->encinit != NULL && |
|
940 self->codec->encinit(&self->state, self->codec->config) != 0) |
|
941 goto errorexit; |
|
942 |
|
943 Py_DECREF(codec); |
|
944 return (PyObject *)self; |
|
945 |
|
946 errorexit: |
|
947 Py_XDECREF(self); |
|
948 Py_XDECREF(codec); |
|
949 return NULL; |
|
950 } |
|
951 |
|
952 static int |
|
953 mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds) |
|
954 { |
|
955 return 0; |
|
956 } |
|
957 |
|
958 static int |
|
959 mbiencoder_traverse(MultibyteIncrementalEncoderObject *self, |
|
960 visitproc visit, void *arg) |
|
961 { |
|
962 if (ERROR_ISCUSTOM(self->errors)) |
|
963 Py_VISIT(self->errors); |
|
964 return 0; |
|
965 } |
|
966 |
|
967 static void |
|
968 mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self) |
|
969 { |
|
970 PyObject_GC_UnTrack(self); |
|
971 ERROR_DECREF(self->errors); |
|
972 Py_TYPE(self)->tp_free(self); |
|
973 } |
|
974 |
|
975 static PyTypeObject MultibyteIncrementalEncoder_Type = { |
|
976 PyVarObject_HEAD_INIT(NULL, 0) |
|
977 "MultibyteIncrementalEncoder", /* tp_name */ |
|
978 sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */ |
|
979 0, /* tp_itemsize */ |
|
980 /* methods */ |
|
981 (destructor)mbiencoder_dealloc, /* tp_dealloc */ |
|
982 0, /* tp_print */ |
|
983 0, /* tp_getattr */ |
|
984 0, /* tp_setattr */ |
|
985 0, /* tp_compare */ |
|
986 0, /* tp_repr */ |
|
987 0, /* tp_as_number */ |
|
988 0, /* tp_as_sequence */ |
|
989 0, /* tp_as_mapping */ |
|
990 0, /* tp_hash */ |
|
991 0, /* tp_call */ |
|
992 0, /* tp_str */ |
|
993 PyObject_GenericGetAttr, /* tp_getattro */ |
|
994 0, /* tp_setattro */ |
|
995 0, /* tp_as_buffer */ |
|
996 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
|
997 | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
998 0, /* tp_doc */ |
|
999 (traverseproc)mbiencoder_traverse, /* tp_traverse */ |
|
1000 0, /* tp_clear */ |
|
1001 0, /* tp_richcompare */ |
|
1002 0, /* tp_weaklistoffset */ |
|
1003 0, /* tp_iter */ |
|
1004 0, /* tp_iterext */ |
|
1005 mbiencoder_methods, /* tp_methods */ |
|
1006 0, /* tp_members */ |
|
1007 codecctx_getsets, /* tp_getset */ |
|
1008 0, /* tp_base */ |
|
1009 0, /* tp_dict */ |
|
1010 0, /* tp_descr_get */ |
|
1011 0, /* tp_descr_set */ |
|
1012 0, /* tp_dictoffset */ |
|
1013 mbiencoder_init, /* tp_init */ |
|
1014 0, /* tp_alloc */ |
|
1015 mbiencoder_new, /* tp_new */ |
|
1016 }; |
|
1017 |
|
1018 |
|
1019 /** |
|
1020 * MultibyteIncrementalDecoder object |
|
1021 */ |
|
1022 |
|
1023 static PyObject * |
|
1024 mbidecoder_decode(MultibyteIncrementalDecoderObject *self, |
|
1025 PyObject *args, PyObject *kwargs) |
|
1026 { |
|
1027 MultibyteDecodeBuffer buf; |
|
1028 char *data, *wdata = NULL; |
|
1029 Py_buffer pdata; |
|
1030 Py_ssize_t wsize, finalsize = 0, size, origpending; |
|
1031 int final = 0; |
|
1032 |
|
1033 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i:decode", |
|
1034 incrementalkwarglist, &pdata, &final)) |
|
1035 return NULL; |
|
1036 data = pdata.buf; |
|
1037 size = pdata.len; |
|
1038 |
|
1039 buf.outobj = buf.excobj = NULL; |
|
1040 origpending = self->pendingsize; |
|
1041 |
|
1042 if (self->pendingsize == 0) { |
|
1043 wsize = size; |
|
1044 wdata = data; |
|
1045 } |
|
1046 else { |
|
1047 if (size > PY_SSIZE_T_MAX - self->pendingsize) { |
|
1048 PyErr_NoMemory(); |
|
1049 goto errorexit; |
|
1050 } |
|
1051 wsize = size + self->pendingsize; |
|
1052 wdata = PyMem_Malloc(wsize); |
|
1053 if (wdata == NULL) |
|
1054 goto errorexit; |
|
1055 memcpy(wdata, self->pending, self->pendingsize); |
|
1056 memcpy(wdata + self->pendingsize, data, size); |
|
1057 self->pendingsize = 0; |
|
1058 } |
|
1059 |
|
1060 if (decoder_prepare_buffer(&buf, wdata, wsize) != 0) |
|
1061 goto errorexit; |
|
1062 |
|
1063 if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf)) |
|
1064 goto errorexit; |
|
1065 |
|
1066 if (final && buf.inbuf < buf.inbuf_end) { |
|
1067 if (multibytecodec_decerror(self->codec, &self->state, |
|
1068 &buf, self->errors, MBERR_TOOFEW)) { |
|
1069 /* recover the original pending buffer */ |
|
1070 memcpy(self->pending, wdata, origpending); |
|
1071 self->pendingsize = origpending; |
|
1072 goto errorexit; |
|
1073 } |
|
1074 } |
|
1075 |
|
1076 if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */ |
|
1077 if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0) |
|
1078 goto errorexit; |
|
1079 } |
|
1080 |
|
1081 finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); |
|
1082 if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) |
|
1083 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) |
|
1084 goto errorexit; |
|
1085 |
|
1086 PyBuffer_Release(&pdata); |
|
1087 if (wdata != data) |
|
1088 PyMem_Del(wdata); |
|
1089 Py_XDECREF(buf.excobj); |
|
1090 return buf.outobj; |
|
1091 |
|
1092 errorexit: |
|
1093 PyBuffer_Release(&pdata); |
|
1094 if (wdata != NULL && wdata != data) |
|
1095 PyMem_Del(wdata); |
|
1096 Py_XDECREF(buf.excobj); |
|
1097 Py_XDECREF(buf.outobj); |
|
1098 return NULL; |
|
1099 } |
|
1100 |
|
1101 static PyObject * |
|
1102 mbidecoder_reset(MultibyteIncrementalDecoderObject *self) |
|
1103 { |
|
1104 if (self->codec->decreset != NULL && |
|
1105 self->codec->decreset(&self->state, self->codec->config) != 0) |
|
1106 return NULL; |
|
1107 self->pendingsize = 0; |
|
1108 |
|
1109 Py_RETURN_NONE; |
|
1110 } |
|
1111 |
|
1112 static struct PyMethodDef mbidecoder_methods[] = { |
|
1113 {"decode", (PyCFunction)mbidecoder_decode, |
|
1114 METH_VARARGS | METH_KEYWORDS, NULL}, |
|
1115 {"reset", (PyCFunction)mbidecoder_reset, |
|
1116 METH_NOARGS, NULL}, |
|
1117 {NULL, NULL}, |
|
1118 }; |
|
1119 |
|
1120 static PyObject * |
|
1121 mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) |
|
1122 { |
|
1123 MultibyteIncrementalDecoderObject *self; |
|
1124 PyObject *codec = NULL; |
|
1125 char *errors = NULL; |
|
1126 |
|
1127 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder", |
|
1128 incnewkwarglist, &errors)) |
|
1129 return NULL; |
|
1130 |
|
1131 self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0); |
|
1132 if (self == NULL) |
|
1133 return NULL; |
|
1134 |
|
1135 codec = PyObject_GetAttrString((PyObject *)type, "codec"); |
|
1136 if (codec == NULL) |
|
1137 goto errorexit; |
|
1138 if (!MultibyteCodec_Check(codec)) { |
|
1139 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); |
|
1140 goto errorexit; |
|
1141 } |
|
1142 |
|
1143 self->codec = ((MultibyteCodecObject *)codec)->codec; |
|
1144 self->pendingsize = 0; |
|
1145 self->errors = internal_error_callback(errors); |
|
1146 if (self->errors == NULL) |
|
1147 goto errorexit; |
|
1148 if (self->codec->decinit != NULL && |
|
1149 self->codec->decinit(&self->state, self->codec->config) != 0) |
|
1150 goto errorexit; |
|
1151 |
|
1152 Py_DECREF(codec); |
|
1153 return (PyObject *)self; |
|
1154 |
|
1155 errorexit: |
|
1156 Py_XDECREF(self); |
|
1157 Py_XDECREF(codec); |
|
1158 return NULL; |
|
1159 } |
|
1160 |
|
1161 static int |
|
1162 mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds) |
|
1163 { |
|
1164 return 0; |
|
1165 } |
|
1166 |
|
1167 static int |
|
1168 mbidecoder_traverse(MultibyteIncrementalDecoderObject *self, |
|
1169 visitproc visit, void *arg) |
|
1170 { |
|
1171 if (ERROR_ISCUSTOM(self->errors)) |
|
1172 Py_VISIT(self->errors); |
|
1173 return 0; |
|
1174 } |
|
1175 |
|
1176 static void |
|
1177 mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self) |
|
1178 { |
|
1179 PyObject_GC_UnTrack(self); |
|
1180 ERROR_DECREF(self->errors); |
|
1181 Py_TYPE(self)->tp_free(self); |
|
1182 } |
|
1183 |
|
1184 static PyTypeObject MultibyteIncrementalDecoder_Type = { |
|
1185 PyVarObject_HEAD_INIT(NULL, 0) |
|
1186 "MultibyteIncrementalDecoder", /* tp_name */ |
|
1187 sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */ |
|
1188 0, /* tp_itemsize */ |
|
1189 /* methods */ |
|
1190 (destructor)mbidecoder_dealloc, /* tp_dealloc */ |
|
1191 0, /* tp_print */ |
|
1192 0, /* tp_getattr */ |
|
1193 0, /* tp_setattr */ |
|
1194 0, /* tp_compare */ |
|
1195 0, /* tp_repr */ |
|
1196 0, /* tp_as_number */ |
|
1197 0, /* tp_as_sequence */ |
|
1198 0, /* tp_as_mapping */ |
|
1199 0, /* tp_hash */ |
|
1200 0, /* tp_call */ |
|
1201 0, /* tp_str */ |
|
1202 PyObject_GenericGetAttr, /* tp_getattro */ |
|
1203 0, /* tp_setattro */ |
|
1204 0, /* tp_as_buffer */ |
|
1205 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
|
1206 | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
1207 0, /* tp_doc */ |
|
1208 (traverseproc)mbidecoder_traverse, /* tp_traverse */ |
|
1209 0, /* tp_clear */ |
|
1210 0, /* tp_richcompare */ |
|
1211 0, /* tp_weaklistoffset */ |
|
1212 0, /* tp_iter */ |
|
1213 0, /* tp_iterext */ |
|
1214 mbidecoder_methods, /* tp_methods */ |
|
1215 0, /* tp_members */ |
|
1216 codecctx_getsets, /* tp_getset */ |
|
1217 0, /* tp_base */ |
|
1218 0, /* tp_dict */ |
|
1219 0, /* tp_descr_get */ |
|
1220 0, /* tp_descr_set */ |
|
1221 0, /* tp_dictoffset */ |
|
1222 mbidecoder_init, /* tp_init */ |
|
1223 0, /* tp_alloc */ |
|
1224 mbidecoder_new, /* tp_new */ |
|
1225 }; |
|
1226 |
|
1227 |
|
1228 /** |
|
1229 * MultibyteStreamReader object |
|
1230 */ |
|
1231 |
|
1232 static PyObject * |
|
1233 mbstreamreader_iread(MultibyteStreamReaderObject *self, |
|
1234 const char *method, Py_ssize_t sizehint) |
|
1235 { |
|
1236 MultibyteDecodeBuffer buf; |
|
1237 PyObject *cres; |
|
1238 Py_ssize_t rsize, finalsize = 0; |
|
1239 |
|
1240 if (sizehint == 0) |
|
1241 return PyUnicode_FromUnicode(NULL, 0); |
|
1242 |
|
1243 buf.outobj = buf.excobj = NULL; |
|
1244 cres = NULL; |
|
1245 |
|
1246 for (;;) { |
|
1247 int endoffile; |
|
1248 |
|
1249 if (sizehint < 0) |
|
1250 cres = PyObject_CallMethod(self->stream, |
|
1251 (char *)method, NULL); |
|
1252 else |
|
1253 cres = PyObject_CallMethod(self->stream, |
|
1254 (char *)method, "i", sizehint); |
|
1255 if (cres == NULL) |
|
1256 goto errorexit; |
|
1257 |
|
1258 if (!PyString_Check(cres)) { |
|
1259 PyErr_SetString(PyExc_TypeError, |
|
1260 "stream function returned a " |
|
1261 "non-string object"); |
|
1262 goto errorexit; |
|
1263 } |
|
1264 |
|
1265 endoffile = (PyString_GET_SIZE(cres) == 0); |
|
1266 |
|
1267 if (self->pendingsize > 0) { |
|
1268 PyObject *ctr; |
|
1269 char *ctrdata; |
|
1270 |
|
1271 if (PyString_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) { |
|
1272 PyErr_NoMemory(); |
|
1273 goto errorexit; |
|
1274 } |
|
1275 rsize = PyString_GET_SIZE(cres) + self->pendingsize; |
|
1276 ctr = PyString_FromStringAndSize(NULL, rsize); |
|
1277 if (ctr == NULL) |
|
1278 goto errorexit; |
|
1279 ctrdata = PyString_AS_STRING(ctr); |
|
1280 memcpy(ctrdata, self->pending, self->pendingsize); |
|
1281 memcpy(ctrdata + self->pendingsize, |
|
1282 PyString_AS_STRING(cres), |
|
1283 PyString_GET_SIZE(cres)); |
|
1284 Py_DECREF(cres); |
|
1285 cres = ctr; |
|
1286 self->pendingsize = 0; |
|
1287 } |
|
1288 |
|
1289 rsize = PyString_GET_SIZE(cres); |
|
1290 if (decoder_prepare_buffer(&buf, PyString_AS_STRING(cres), |
|
1291 rsize) != 0) |
|
1292 goto errorexit; |
|
1293 |
|
1294 if (rsize > 0 && decoder_feed_buffer( |
|
1295 (MultibyteStatefulDecoderContext *)self, &buf)) |
|
1296 goto errorexit; |
|
1297 |
|
1298 if (endoffile || sizehint < 0) { |
|
1299 if (buf.inbuf < buf.inbuf_end && |
|
1300 multibytecodec_decerror(self->codec, &self->state, |
|
1301 &buf, self->errors, MBERR_TOOFEW)) |
|
1302 goto errorexit; |
|
1303 } |
|
1304 |
|
1305 if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */ |
|
1306 if (decoder_append_pending(STATEFUL_DCTX(self), |
|
1307 &buf) != 0) |
|
1308 goto errorexit; |
|
1309 } |
|
1310 |
|
1311 finalsize = (Py_ssize_t)(buf.outbuf - |
|
1312 PyUnicode_AS_UNICODE(buf.outobj)); |
|
1313 Py_DECREF(cres); |
|
1314 cres = NULL; |
|
1315 |
|
1316 if (sizehint < 0 || finalsize != 0 || rsize == 0) |
|
1317 break; |
|
1318 |
|
1319 sizehint = 1; /* read 1 more byte and retry */ |
|
1320 } |
|
1321 |
|
1322 if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) |
|
1323 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) |
|
1324 goto errorexit; |
|
1325 |
|
1326 Py_XDECREF(cres); |
|
1327 Py_XDECREF(buf.excobj); |
|
1328 return buf.outobj; |
|
1329 |
|
1330 errorexit: |
|
1331 Py_XDECREF(cres); |
|
1332 Py_XDECREF(buf.excobj); |
|
1333 Py_XDECREF(buf.outobj); |
|
1334 return NULL; |
|
1335 } |
|
1336 |
|
1337 static PyObject * |
|
1338 mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args) |
|
1339 { |
|
1340 PyObject *sizeobj = NULL; |
|
1341 Py_ssize_t size; |
|
1342 |
|
1343 if (!PyArg_UnpackTuple(args, "read", 0, 1, &sizeobj)) |
|
1344 return NULL; |
|
1345 |
|
1346 if (sizeobj == Py_None || sizeobj == NULL) |
|
1347 size = -1; |
|
1348 else if (PyInt_Check(sizeobj)) |
|
1349 size = PyInt_AsSsize_t(sizeobj); |
|
1350 else { |
|
1351 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); |
|
1352 return NULL; |
|
1353 } |
|
1354 |
|
1355 return mbstreamreader_iread(self, "read", size); |
|
1356 } |
|
1357 |
|
1358 static PyObject * |
|
1359 mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args) |
|
1360 { |
|
1361 PyObject *sizeobj = NULL; |
|
1362 Py_ssize_t size; |
|
1363 |
|
1364 if (!PyArg_UnpackTuple(args, "readline", 0, 1, &sizeobj)) |
|
1365 return NULL; |
|
1366 |
|
1367 if (sizeobj == Py_None || sizeobj == NULL) |
|
1368 size = -1; |
|
1369 else if (PyInt_Check(sizeobj)) |
|
1370 size = PyInt_AsSsize_t(sizeobj); |
|
1371 else { |
|
1372 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); |
|
1373 return NULL; |
|
1374 } |
|
1375 |
|
1376 return mbstreamreader_iread(self, "readline", size); |
|
1377 } |
|
1378 |
|
1379 static PyObject * |
|
1380 mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args) |
|
1381 { |
|
1382 PyObject *sizehintobj = NULL, *r, *sr; |
|
1383 Py_ssize_t sizehint; |
|
1384 |
|
1385 if (!PyArg_UnpackTuple(args, "readlines", 0, 1, &sizehintobj)) |
|
1386 return NULL; |
|
1387 |
|
1388 if (sizehintobj == Py_None || sizehintobj == NULL) |
|
1389 sizehint = -1; |
|
1390 else if (PyInt_Check(sizehintobj)) |
|
1391 sizehint = PyInt_AsSsize_t(sizehintobj); |
|
1392 else { |
|
1393 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); |
|
1394 return NULL; |
|
1395 } |
|
1396 |
|
1397 r = mbstreamreader_iread(self, "read", sizehint); |
|
1398 if (r == NULL) |
|
1399 return NULL; |
|
1400 |
|
1401 sr = PyUnicode_Splitlines(r, 1); |
|
1402 Py_DECREF(r); |
|
1403 return sr; |
|
1404 } |
|
1405 |
|
1406 static PyObject * |
|
1407 mbstreamreader_reset(MultibyteStreamReaderObject *self) |
|
1408 { |
|
1409 if (self->codec->decreset != NULL && |
|
1410 self->codec->decreset(&self->state, self->codec->config) != 0) |
|
1411 return NULL; |
|
1412 self->pendingsize = 0; |
|
1413 |
|
1414 Py_RETURN_NONE; |
|
1415 } |
|
1416 |
|
1417 static struct PyMethodDef mbstreamreader_methods[] = { |
|
1418 {"read", (PyCFunction)mbstreamreader_read, |
|
1419 METH_VARARGS, NULL}, |
|
1420 {"readline", (PyCFunction)mbstreamreader_readline, |
|
1421 METH_VARARGS, NULL}, |
|
1422 {"readlines", (PyCFunction)mbstreamreader_readlines, |
|
1423 METH_VARARGS, NULL}, |
|
1424 {"reset", (PyCFunction)mbstreamreader_reset, |
|
1425 METH_NOARGS, NULL}, |
|
1426 {NULL, NULL}, |
|
1427 }; |
|
1428 |
|
1429 static PyMemberDef mbstreamreader_members[] = { |
|
1430 {"stream", T_OBJECT, |
|
1431 offsetof(MultibyteStreamReaderObject, stream), |
|
1432 READONLY, NULL}, |
|
1433 {NULL,} |
|
1434 }; |
|
1435 |
|
1436 static PyObject * |
|
1437 mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds) |
|
1438 { |
|
1439 MultibyteStreamReaderObject *self; |
|
1440 PyObject *stream, *codec = NULL; |
|
1441 char *errors = NULL; |
|
1442 |
|
1443 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader", |
|
1444 streamkwarglist, &stream, &errors)) |
|
1445 return NULL; |
|
1446 |
|
1447 self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0); |
|
1448 if (self == NULL) |
|
1449 return NULL; |
|
1450 |
|
1451 codec = PyObject_GetAttrString((PyObject *)type, "codec"); |
|
1452 if (codec == NULL) |
|
1453 goto errorexit; |
|
1454 if (!MultibyteCodec_Check(codec)) { |
|
1455 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); |
|
1456 goto errorexit; |
|
1457 } |
|
1458 |
|
1459 self->codec = ((MultibyteCodecObject *)codec)->codec; |
|
1460 self->stream = stream; |
|
1461 Py_INCREF(stream); |
|
1462 self->pendingsize = 0; |
|
1463 self->errors = internal_error_callback(errors); |
|
1464 if (self->errors == NULL) |
|
1465 goto errorexit; |
|
1466 if (self->codec->decinit != NULL && |
|
1467 self->codec->decinit(&self->state, self->codec->config) != 0) |
|
1468 goto errorexit; |
|
1469 |
|
1470 Py_DECREF(codec); |
|
1471 return (PyObject *)self; |
|
1472 |
|
1473 errorexit: |
|
1474 Py_XDECREF(self); |
|
1475 Py_XDECREF(codec); |
|
1476 return NULL; |
|
1477 } |
|
1478 |
|
1479 static int |
|
1480 mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds) |
|
1481 { |
|
1482 return 0; |
|
1483 } |
|
1484 |
|
1485 static int |
|
1486 mbstreamreader_traverse(MultibyteStreamReaderObject *self, |
|
1487 visitproc visit, void *arg) |
|
1488 { |
|
1489 if (ERROR_ISCUSTOM(self->errors)) |
|
1490 Py_VISIT(self->errors); |
|
1491 Py_VISIT(self->stream); |
|
1492 return 0; |
|
1493 } |
|
1494 |
|
1495 static void |
|
1496 mbstreamreader_dealloc(MultibyteStreamReaderObject *self) |
|
1497 { |
|
1498 PyObject_GC_UnTrack(self); |
|
1499 ERROR_DECREF(self->errors); |
|
1500 Py_XDECREF(self->stream); |
|
1501 Py_TYPE(self)->tp_free(self); |
|
1502 } |
|
1503 |
|
1504 static PyTypeObject MultibyteStreamReader_Type = { |
|
1505 PyVarObject_HEAD_INIT(NULL, 0) |
|
1506 "MultibyteStreamReader", /* tp_name */ |
|
1507 sizeof(MultibyteStreamReaderObject), /* tp_basicsize */ |
|
1508 0, /* tp_itemsize */ |
|
1509 /* methods */ |
|
1510 (destructor)mbstreamreader_dealloc, /* tp_dealloc */ |
|
1511 0, /* tp_print */ |
|
1512 0, /* tp_getattr */ |
|
1513 0, /* tp_setattr */ |
|
1514 0, /* tp_compare */ |
|
1515 0, /* tp_repr */ |
|
1516 0, /* tp_as_number */ |
|
1517 0, /* tp_as_sequence */ |
|
1518 0, /* tp_as_mapping */ |
|
1519 0, /* tp_hash */ |
|
1520 0, /* tp_call */ |
|
1521 0, /* tp_str */ |
|
1522 PyObject_GenericGetAttr, /* tp_getattro */ |
|
1523 0, /* tp_setattro */ |
|
1524 0, /* tp_as_buffer */ |
|
1525 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
|
1526 | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
1527 0, /* tp_doc */ |
|
1528 (traverseproc)mbstreamreader_traverse, /* tp_traverse */ |
|
1529 0, /* tp_clear */ |
|
1530 0, /* tp_richcompare */ |
|
1531 0, /* tp_weaklistoffset */ |
|
1532 0, /* tp_iter */ |
|
1533 0, /* tp_iterext */ |
|
1534 mbstreamreader_methods, /* tp_methods */ |
|
1535 mbstreamreader_members, /* tp_members */ |
|
1536 codecctx_getsets, /* tp_getset */ |
|
1537 0, /* tp_base */ |
|
1538 0, /* tp_dict */ |
|
1539 0, /* tp_descr_get */ |
|
1540 0, /* tp_descr_set */ |
|
1541 0, /* tp_dictoffset */ |
|
1542 mbstreamreader_init, /* tp_init */ |
|
1543 0, /* tp_alloc */ |
|
1544 mbstreamreader_new, /* tp_new */ |
|
1545 }; |
|
1546 |
|
1547 |
|
1548 /** |
|
1549 * MultibyteStreamWriter object |
|
1550 */ |
|
1551 |
|
1552 static int |
|
1553 mbstreamwriter_iwrite(MultibyteStreamWriterObject *self, |
|
1554 PyObject *unistr) |
|
1555 { |
|
1556 PyObject *str, *wr; |
|
1557 |
|
1558 str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0); |
|
1559 if (str == NULL) |
|
1560 return -1; |
|
1561 |
|
1562 wr = PyObject_CallMethod(self->stream, "write", "O", str); |
|
1563 Py_DECREF(str); |
|
1564 if (wr == NULL) |
|
1565 return -1; |
|
1566 |
|
1567 Py_DECREF(wr); |
|
1568 return 0; |
|
1569 } |
|
1570 |
|
1571 static PyObject * |
|
1572 mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *strobj) |
|
1573 { |
|
1574 if (mbstreamwriter_iwrite(self, strobj)) |
|
1575 return NULL; |
|
1576 else |
|
1577 Py_RETURN_NONE; |
|
1578 } |
|
1579 |
|
1580 static PyObject * |
|
1581 mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *lines) |
|
1582 { |
|
1583 PyObject *strobj; |
|
1584 int i, r; |
|
1585 |
|
1586 if (!PySequence_Check(lines)) { |
|
1587 PyErr_SetString(PyExc_TypeError, |
|
1588 "arg must be a sequence object"); |
|
1589 return NULL; |
|
1590 } |
|
1591 |
|
1592 for (i = 0; i < PySequence_Length(lines); i++) { |
|
1593 /* length can be changed even within this loop */ |
|
1594 strobj = PySequence_GetItem(lines, i); |
|
1595 if (strobj == NULL) |
|
1596 return NULL; |
|
1597 |
|
1598 r = mbstreamwriter_iwrite(self, strobj); |
|
1599 Py_DECREF(strobj); |
|
1600 if (r == -1) |
|
1601 return NULL; |
|
1602 } |
|
1603 |
|
1604 Py_RETURN_NONE; |
|
1605 } |
|
1606 |
|
1607 static PyObject * |
|
1608 mbstreamwriter_reset(MultibyteStreamWriterObject *self) |
|
1609 { |
|
1610 const Py_UNICODE *pending; |
|
1611 PyObject *pwrt; |
|
1612 |
|
1613 pending = self->pending; |
|
1614 pwrt = multibytecodec_encode(self->codec, &self->state, |
|
1615 &pending, self->pendingsize, self->errors, |
|
1616 MBENC_FLUSH | MBENC_RESET); |
|
1617 /* some pending buffer can be truncated when UnicodeEncodeError is |
|
1618 * raised on 'strict' mode. but, 'reset' method is designed to |
|
1619 * reset the pending buffer or states so failed string sequence |
|
1620 * ought to be missed */ |
|
1621 self->pendingsize = 0; |
|
1622 if (pwrt == NULL) |
|
1623 return NULL; |
|
1624 |
|
1625 if (PyString_Size(pwrt) > 0) { |
|
1626 PyObject *wr; |
|
1627 wr = PyObject_CallMethod(self->stream, "write", "O", pwrt); |
|
1628 if (wr == NULL) { |
|
1629 Py_DECREF(pwrt); |
|
1630 return NULL; |
|
1631 } |
|
1632 } |
|
1633 Py_DECREF(pwrt); |
|
1634 |
|
1635 Py_RETURN_NONE; |
|
1636 } |
|
1637 |
|
1638 static PyObject * |
|
1639 mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) |
|
1640 { |
|
1641 MultibyteStreamWriterObject *self; |
|
1642 PyObject *stream, *codec = NULL; |
|
1643 char *errors = NULL; |
|
1644 |
|
1645 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter", |
|
1646 streamkwarglist, &stream, &errors)) |
|
1647 return NULL; |
|
1648 |
|
1649 self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0); |
|
1650 if (self == NULL) |
|
1651 return NULL; |
|
1652 |
|
1653 codec = PyObject_GetAttrString((PyObject *)type, "codec"); |
|
1654 if (codec == NULL) |
|
1655 goto errorexit; |
|
1656 if (!MultibyteCodec_Check(codec)) { |
|
1657 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); |
|
1658 goto errorexit; |
|
1659 } |
|
1660 |
|
1661 self->codec = ((MultibyteCodecObject *)codec)->codec; |
|
1662 self->stream = stream; |
|
1663 Py_INCREF(stream); |
|
1664 self->pendingsize = 0; |
|
1665 self->errors = internal_error_callback(errors); |
|
1666 if (self->errors == NULL) |
|
1667 goto errorexit; |
|
1668 if (self->codec->encinit != NULL && |
|
1669 self->codec->encinit(&self->state, self->codec->config) != 0) |
|
1670 goto errorexit; |
|
1671 |
|
1672 Py_DECREF(codec); |
|
1673 return (PyObject *)self; |
|
1674 |
|
1675 errorexit: |
|
1676 Py_XDECREF(self); |
|
1677 Py_XDECREF(codec); |
|
1678 return NULL; |
|
1679 } |
|
1680 |
|
1681 static int |
|
1682 mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds) |
|
1683 { |
|
1684 return 0; |
|
1685 } |
|
1686 |
|
1687 static int |
|
1688 mbstreamwriter_traverse(MultibyteStreamWriterObject *self, |
|
1689 visitproc visit, void *arg) |
|
1690 { |
|
1691 if (ERROR_ISCUSTOM(self->errors)) |
|
1692 Py_VISIT(self->errors); |
|
1693 Py_VISIT(self->stream); |
|
1694 return 0; |
|
1695 } |
|
1696 |
|
1697 static void |
|
1698 mbstreamwriter_dealloc(MultibyteStreamWriterObject *self) |
|
1699 { |
|
1700 PyObject_GC_UnTrack(self); |
|
1701 ERROR_DECREF(self->errors); |
|
1702 Py_XDECREF(self->stream); |
|
1703 Py_TYPE(self)->tp_free(self); |
|
1704 } |
|
1705 |
|
1706 static struct PyMethodDef mbstreamwriter_methods[] = { |
|
1707 {"write", (PyCFunction)mbstreamwriter_write, |
|
1708 METH_O, NULL}, |
|
1709 {"writelines", (PyCFunction)mbstreamwriter_writelines, |
|
1710 METH_O, NULL}, |
|
1711 {"reset", (PyCFunction)mbstreamwriter_reset, |
|
1712 METH_NOARGS, NULL}, |
|
1713 {NULL, NULL}, |
|
1714 }; |
|
1715 |
|
1716 static PyMemberDef mbstreamwriter_members[] = { |
|
1717 {"stream", T_OBJECT, |
|
1718 offsetof(MultibyteStreamWriterObject, stream), |
|
1719 READONLY, NULL}, |
|
1720 {NULL,} |
|
1721 }; |
|
1722 |
|
1723 static PyTypeObject MultibyteStreamWriter_Type = { |
|
1724 PyVarObject_HEAD_INIT(NULL, 0) |
|
1725 "MultibyteStreamWriter", /* tp_name */ |
|
1726 sizeof(MultibyteStreamWriterObject), /* tp_basicsize */ |
|
1727 0, /* tp_itemsize */ |
|
1728 /* methods */ |
|
1729 (destructor)mbstreamwriter_dealloc, /* tp_dealloc */ |
|
1730 0, /* tp_print */ |
|
1731 0, /* tp_getattr */ |
|
1732 0, /* tp_setattr */ |
|
1733 0, /* tp_compare */ |
|
1734 0, /* tp_repr */ |
|
1735 0, /* tp_as_number */ |
|
1736 0, /* tp_as_sequence */ |
|
1737 0, /* tp_as_mapping */ |
|
1738 0, /* tp_hash */ |
|
1739 0, /* tp_call */ |
|
1740 0, /* tp_str */ |
|
1741 PyObject_GenericGetAttr, /* tp_getattro */ |
|
1742 0, /* tp_setattro */ |
|
1743 0, /* tp_as_buffer */ |
|
1744 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
|
1745 | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
1746 0, /* tp_doc */ |
|
1747 (traverseproc)mbstreamwriter_traverse, /* tp_traverse */ |
|
1748 0, /* tp_clear */ |
|
1749 0, /* tp_richcompare */ |
|
1750 0, /* tp_weaklistoffset */ |
|
1751 0, /* tp_iter */ |
|
1752 0, /* tp_iterext */ |
|
1753 mbstreamwriter_methods, /* tp_methods */ |
|
1754 mbstreamwriter_members, /* tp_members */ |
|
1755 codecctx_getsets, /* tp_getset */ |
|
1756 0, /* tp_base */ |
|
1757 0, /* tp_dict */ |
|
1758 0, /* tp_descr_get */ |
|
1759 0, /* tp_descr_set */ |
|
1760 0, /* tp_dictoffset */ |
|
1761 mbstreamwriter_init, /* tp_init */ |
|
1762 0, /* tp_alloc */ |
|
1763 mbstreamwriter_new, /* tp_new */ |
|
1764 }; |
|
1765 |
|
1766 |
|
1767 /** |
|
1768 * Exposed factory function |
|
1769 */ |
|
1770 |
|
1771 static PyObject * |
|
1772 __create_codec(PyObject *ignore, PyObject *arg) |
|
1773 { |
|
1774 MultibyteCodecObject *self; |
|
1775 MultibyteCodec *codec; |
|
1776 |
|
1777 if (!PyCObject_Check(arg)) { |
|
1778 PyErr_SetString(PyExc_ValueError, "argument type invalid"); |
|
1779 return NULL; |
|
1780 } |
|
1781 |
|
1782 codec = PyCObject_AsVoidPtr(arg); |
|
1783 if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0) |
|
1784 return NULL; |
|
1785 |
|
1786 self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type); |
|
1787 if (self == NULL) |
|
1788 return NULL; |
|
1789 self->codec = codec; |
|
1790 |
|
1791 return (PyObject *)self; |
|
1792 } |
|
1793 |
|
1794 static struct PyMethodDef __methods[] = { |
|
1795 {"__create_codec", (PyCFunction)__create_codec, METH_O}, |
|
1796 {NULL, NULL}, |
|
1797 }; |
|
1798 |
|
1799 PyMODINIT_FUNC |
|
1800 init_multibytecodec(void) |
|
1801 { |
|
1802 int i; |
|
1803 PyObject *m; |
|
1804 PyTypeObject *typelist[] = { |
|
1805 &MultibyteIncrementalEncoder_Type, |
|
1806 &MultibyteIncrementalDecoder_Type, |
|
1807 &MultibyteStreamReader_Type, |
|
1808 &MultibyteStreamWriter_Type, |
|
1809 NULL |
|
1810 }; |
|
1811 |
|
1812 if (PyType_Ready(&MultibyteCodec_Type) < 0) |
|
1813 return; |
|
1814 |
|
1815 m = Py_InitModule("_multibytecodec", __methods); |
|
1816 if (m == NULL) |
|
1817 return; |
|
1818 |
|
1819 for (i = 0; typelist[i] != NULL; i++) { |
|
1820 if (PyType_Ready(typelist[i]) < 0) |
|
1821 return; |
|
1822 Py_INCREF(typelist[i]); |
|
1823 PyModule_AddObject(m, typelist[i]->tp_name, |
|
1824 (PyObject *)typelist[i]); |
|
1825 } |
|
1826 |
|
1827 if (PyErr_Occurred()) |
|
1828 Py_FatalError("can't initialize the _multibytecodec module"); |
|
1829 } |