symbian-qemu-0.9.1-12/python-2.6.1/Modules/_codecsmodule.c
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 /* ------------------------------------------------------------------------
       
     2 
       
     3    _codecs -- Provides access to the codec registry and the builtin
       
     4               codecs.
       
     5 
       
     6    This module should never be imported directly. The standard library
       
     7    module "codecs" wraps this builtin module for use within Python.
       
     8 
       
     9    The codec registry is accessible via:
       
    10 
       
    11      register(search_function) -> None
       
    12 
       
    13      lookup(encoding) -> CodecInfo object
       
    14 
       
    15    The builtin Unicode codecs use the following interface:
       
    16 
       
    17      <encoding>_encode(Unicode_object[,errors='strict']) ->
       
    18      	(string object, bytes consumed)
       
    19 
       
    20      <encoding>_decode(char_buffer_obj[,errors='strict']) ->
       
    21         (Unicode object, bytes consumed)
       
    22 
       
    23    <encoding>_encode() interfaces also accept non-Unicode object as
       
    24    input. The objects are then converted to Unicode using
       
    25    PyUnicode_FromObject() prior to applying the conversion.
       
    26 
       
    27    These <encoding>s are available: utf_8, unicode_escape,
       
    28    raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
       
    29    mbcs (on win32).
       
    30 
       
    31 
       
    32 Written by Marc-Andre Lemburg (mal@lemburg.com).
       
    33 
       
    34 Copyright (c) Corporation for National Research Initiatives.
       
    35 
       
    36    ------------------------------------------------------------------------ */
       
    37 
       
    38 #define PY_SSIZE_T_CLEAN
       
    39 #include "Python.h"
       
    40 
       
    41 /* --- Registry ----------------------------------------------------------- */
       
    42 
       
    43 PyDoc_STRVAR(register__doc__,
       
    44 "register(search_function)\n\
       
    45 \n\
       
    46 Register a codec search function. Search functions are expected to take\n\
       
    47 one argument, the encoding name in all lower case letters, and return\n\
       
    48 a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
       
    49 (or a CodecInfo object).");
       
    50 
       
    51 static
       
    52 PyObject *codec_register(PyObject *self, PyObject *search_function)
       
    53 {
       
    54     if (PyCodec_Register(search_function))
       
    55         return NULL;
       
    56 
       
    57     Py_RETURN_NONE;
       
    58 }
       
    59 
       
    60 PyDoc_STRVAR(lookup__doc__,
       
    61 "lookup(encoding) -> CodecInfo\n\
       
    62 \n\
       
    63 Looks up a codec tuple in the Python codec registry and returns\n\
       
    64 a tuple of function (or a CodecInfo object).");
       
    65 
       
    66 static
       
    67 PyObject *codec_lookup(PyObject *self, PyObject *args)
       
    68 {
       
    69     char *encoding;
       
    70 
       
    71     if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
       
    72         return NULL;
       
    73 
       
    74     return _PyCodec_Lookup(encoding);
       
    75 }
       
    76 
       
    77 PyDoc_STRVAR(encode__doc__,
       
    78 "encode(obj, [encoding[,errors]]) -> object\n\
       
    79 \n\
       
    80 Encodes obj using the codec registered for encoding. encoding defaults\n\
       
    81 to the default encoding. errors may be given to set a different error\n\
       
    82 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
       
    83 a ValueError. Other possible values are 'ignore', 'replace' and\n\
       
    84 'xmlcharrefreplace' as well as any other name registered with\n\
       
    85 codecs.register_error that can handle ValueErrors.");
       
    86 
       
    87 static PyObject *
       
    88 codec_encode(PyObject *self, PyObject *args)
       
    89 {
       
    90     const char *encoding = NULL;
       
    91     const char *errors = NULL;
       
    92     PyObject *v;
       
    93 
       
    94     if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
       
    95         return NULL;
       
    96 
       
    97 #ifdef Py_USING_UNICODE
       
    98     if (encoding == NULL)
       
    99 	encoding = PyUnicode_GetDefaultEncoding();
       
   100 #else
       
   101     if (encoding == NULL) {
       
   102 	PyErr_SetString(PyExc_ValueError, "no encoding specified");
       
   103 	return NULL;
       
   104     }
       
   105 #endif
       
   106 
       
   107     /* Encode via the codec registry */
       
   108     return PyCodec_Encode(v, encoding, errors);
       
   109 }
       
   110 
       
   111 PyDoc_STRVAR(decode__doc__,
       
   112 "decode(obj, [encoding[,errors]]) -> object\n\
       
   113 \n\
       
   114 Decodes obj using the codec registered for encoding. encoding defaults\n\
       
   115 to the default encoding. errors may be given to set a different error\n\
       
   116 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
       
   117 a ValueError. Other possible values are 'ignore' and 'replace'\n\
       
   118 as well as any other name registered with codecs.register_error that is\n\
       
   119 able to handle ValueErrors.");
       
   120 
       
   121 static PyObject *
       
   122 codec_decode(PyObject *self, PyObject *args)
       
   123 {
       
   124     const char *encoding = NULL;
       
   125     const char *errors = NULL;
       
   126     PyObject *v;
       
   127 
       
   128     if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
       
   129         return NULL;
       
   130 
       
   131 #ifdef Py_USING_UNICODE
       
   132     if (encoding == NULL)
       
   133 	encoding = PyUnicode_GetDefaultEncoding();
       
   134 #else
       
   135     if (encoding == NULL) {
       
   136 	PyErr_SetString(PyExc_ValueError, "no encoding specified");
       
   137 	return NULL;
       
   138     }
       
   139 #endif
       
   140 
       
   141     /* Decode via the codec registry */
       
   142     return PyCodec_Decode(v, encoding, errors);
       
   143 }
       
   144 
       
   145 /* --- Helpers ------------------------------------------------------------ */
       
   146 
       
   147 static
       
   148 PyObject *codec_tuple(PyObject *unicode,
       
   149 		      Py_ssize_t len)
       
   150 {
       
   151     PyObject *v;
       
   152     if (unicode == NULL)
       
   153         return NULL;
       
   154     v = Py_BuildValue("On", unicode, len);
       
   155     Py_DECREF(unicode);
       
   156     return v;
       
   157 }
       
   158 
       
   159 /* --- String codecs ------------------------------------------------------ */
       
   160 static PyObject *
       
   161 escape_decode(PyObject *self,
       
   162 	      PyObject *args)
       
   163 {
       
   164     const char *errors = NULL;
       
   165     const char *data;
       
   166     Py_ssize_t size;
       
   167 
       
   168     if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
       
   169 			  &data, &size, &errors))
       
   170 	return NULL;
       
   171     return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
       
   172 		       size);
       
   173 }
       
   174 
       
   175 static PyObject *
       
   176 escape_encode(PyObject *self,
       
   177 	      PyObject *args)
       
   178 {
       
   179 	PyObject *str;
       
   180 	const char *errors = NULL;
       
   181 	char *buf;
       
   182 	Py_ssize_t len;
       
   183 
       
   184 	if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
       
   185 			      &PyString_Type, &str, &errors))
       
   186 		return NULL;
       
   187 
       
   188 	str = PyString_Repr(str, 0);
       
   189 	if (!str)
       
   190 		return NULL;
       
   191 
       
   192 	/* The string will be quoted. Unquote, similar to unicode-escape. */
       
   193 	buf = PyString_AS_STRING (str);
       
   194 	len = PyString_GET_SIZE (str);
       
   195 	memmove(buf, buf+1, len-2);
       
   196 	if (_PyString_Resize(&str, len-2) < 0)
       
   197 		return NULL;
       
   198 	
       
   199 	return codec_tuple(str, PyString_Size(str));
       
   200 }
       
   201 
       
   202 #ifdef Py_USING_UNICODE
       
   203 /* --- Decoder ------------------------------------------------------------ */
       
   204 
       
   205 static PyObject *
       
   206 unicode_internal_decode(PyObject *self,
       
   207 			PyObject *args)
       
   208 {
       
   209     PyObject *obj;
       
   210     const char *errors = NULL;
       
   211     const char *data;
       
   212     Py_ssize_t size;
       
   213 
       
   214     if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
       
   215 			  &obj, &errors))
       
   216 	return NULL;
       
   217 
       
   218     if (PyUnicode_Check(obj)) {
       
   219 	Py_INCREF(obj);
       
   220 	return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
       
   221     }
       
   222     else {
       
   223 	if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
       
   224 	    return NULL;
       
   225 
       
   226 	return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
       
   227 			   size);
       
   228     }
       
   229 }
       
   230 
       
   231 static PyObject *
       
   232 utf_7_decode(PyObject *self,
       
   233              PyObject *args)
       
   234 {
       
   235 	Py_buffer pbuf;
       
   236     const char *errors = NULL;
       
   237     int final = 0;
       
   238     Py_ssize_t consumed;
       
   239     PyObject *decoded = NULL;
       
   240 
       
   241     if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode",
       
   242 			  &pbuf, &errors, &final))
       
   243 	return NULL;
       
   244     consumed = pbuf.len;
       
   245 
       
   246     decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
       
   247 					   final ? NULL : &consumed);
       
   248 	PyBuffer_Release(&pbuf);
       
   249     if (decoded == NULL)
       
   250         return NULL;
       
   251     return codec_tuple(decoded, consumed);
       
   252 }
       
   253 
       
   254 static PyObject *
       
   255 utf_8_decode(PyObject *self,
       
   256 	    PyObject *args)
       
   257 {
       
   258 	Py_buffer pbuf;
       
   259     const char *errors = NULL;
       
   260     int final = 0;
       
   261     Py_ssize_t consumed;
       
   262     PyObject *decoded = NULL;
       
   263 
       
   264     if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode",
       
   265 			  &pbuf, &errors, &final))
       
   266 	return NULL;
       
   267     consumed = pbuf.len;
       
   268 
       
   269     decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
       
   270 					   final ? NULL : &consumed);
       
   271 	PyBuffer_Release(&pbuf);
       
   272     if (decoded == NULL)
       
   273 	return NULL;
       
   274     return codec_tuple(decoded, consumed);
       
   275 }
       
   276 
       
   277 static PyObject *
       
   278 utf_16_decode(PyObject *self,
       
   279 	    PyObject *args)
       
   280 {
       
   281 	Py_buffer pbuf;
       
   282     const char *errors = NULL;
       
   283     int byteorder = 0;
       
   284     int final = 0;
       
   285     Py_ssize_t consumed;
       
   286     PyObject *decoded;
       
   287 
       
   288     if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode",
       
   289 			  &pbuf, &errors, &final))
       
   290 	return NULL;
       
   291     consumed = pbuf.len; /* This is overwritten unless final is true. */
       
   292     decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
       
   293 					&byteorder, final ? NULL : &consumed);
       
   294 	PyBuffer_Release(&pbuf);
       
   295     if (decoded == NULL)
       
   296 	return NULL;
       
   297     return codec_tuple(decoded, consumed);
       
   298 }
       
   299 
       
   300 static PyObject *
       
   301 utf_16_le_decode(PyObject *self,
       
   302 		 PyObject *args)
       
   303 {
       
   304 	Py_buffer pbuf;
       
   305     const char *errors = NULL;
       
   306     int byteorder = -1;
       
   307     int final = 0;
       
   308     Py_ssize_t consumed;
       
   309     PyObject *decoded = NULL;
       
   310 
       
   311     if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode",
       
   312 			  &pbuf, &errors, &final))
       
   313 	return NULL;
       
   314 
       
   315     consumed = pbuf.len; /* This is overwritten unless final is true. */
       
   316     decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
       
   317 	&byteorder, final ? NULL : &consumed);
       
   318 	PyBuffer_Release(&pbuf);
       
   319     if (decoded == NULL)
       
   320 	return NULL;
       
   321     return codec_tuple(decoded, consumed);
       
   322 }
       
   323 
       
   324 static PyObject *
       
   325 utf_16_be_decode(PyObject *self,
       
   326 		 PyObject *args)
       
   327 {
       
   328 	Py_buffer pbuf;
       
   329     const char *errors = NULL;
       
   330     int byteorder = 1;
       
   331     int final = 0;
       
   332     Py_ssize_t consumed;
       
   333     PyObject *decoded = NULL;
       
   334 
       
   335     if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode",
       
   336 			  &pbuf, &errors, &final))
       
   337 	return NULL;
       
   338 
       
   339     consumed = pbuf.len; /* This is overwritten unless final is true. */
       
   340     decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
       
   341 	&byteorder, final ? NULL : &consumed);
       
   342 	PyBuffer_Release(&pbuf);
       
   343     if (decoded == NULL)
       
   344 	return NULL;
       
   345     return codec_tuple(decoded, consumed);
       
   346 }
       
   347 
       
   348 /* This non-standard version also provides access to the byteorder
       
   349    parameter of the builtin UTF-16 codec.
       
   350 
       
   351    It returns a tuple (unicode, bytesread, byteorder) with byteorder
       
   352    being the value in effect at the end of data.
       
   353 
       
   354 */
       
   355 
       
   356 static PyObject *
       
   357 utf_16_ex_decode(PyObject *self,
       
   358 		 PyObject *args)
       
   359 {
       
   360 	Py_buffer pbuf;
       
   361     const char *errors = NULL;
       
   362     int byteorder = 0;
       
   363     PyObject *unicode, *tuple;
       
   364     int final = 0;
       
   365     Py_ssize_t consumed;
       
   366 
       
   367     if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode",
       
   368 			  &pbuf, &errors, &byteorder, &final))
       
   369 	return NULL;
       
   370     consumed = pbuf.len; /* This is overwritten unless final is true. */
       
   371     unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
       
   372 					&byteorder, final ? NULL : &consumed);
       
   373 	PyBuffer_Release(&pbuf);
       
   374     if (unicode == NULL)
       
   375 	return NULL;
       
   376     tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
       
   377     Py_DECREF(unicode);
       
   378     return tuple;
       
   379 }
       
   380 
       
   381 static PyObject *
       
   382 utf_32_decode(PyObject *self,
       
   383 	    PyObject *args)
       
   384 {
       
   385 	Py_buffer pbuf;
       
   386     const char *errors = NULL;
       
   387     int byteorder = 0;
       
   388     int final = 0;
       
   389     Py_ssize_t consumed;
       
   390     PyObject *decoded;
       
   391 
       
   392     if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode",
       
   393 			  &pbuf, &errors, &final))
       
   394 	return NULL;
       
   395     consumed = pbuf.len; /* This is overwritten unless final is true. */
       
   396     decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
       
   397 					&byteorder, final ? NULL : &consumed);
       
   398 	PyBuffer_Release(&pbuf);
       
   399     if (decoded == NULL)
       
   400 	return NULL;
       
   401     return codec_tuple(decoded, consumed);
       
   402 }
       
   403 
       
   404 static PyObject *
       
   405 utf_32_le_decode(PyObject *self,
       
   406 		 PyObject *args)
       
   407 {
       
   408 	Py_buffer pbuf;
       
   409     const char *errors = NULL;
       
   410     int byteorder = -1;
       
   411     int final = 0;
       
   412     Py_ssize_t consumed;
       
   413     PyObject *decoded;
       
   414 
       
   415     if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode",
       
   416 			  &pbuf, &errors, &final))
       
   417 	return NULL;
       
   418     consumed = pbuf.len; /* This is overwritten unless final is true. */
       
   419     decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
       
   420 					&byteorder, final ? NULL : &consumed);
       
   421 	PyBuffer_Release(&pbuf);
       
   422     if (decoded == NULL)
       
   423 	return NULL;
       
   424     return codec_tuple(decoded, consumed);
       
   425 }
       
   426 
       
   427 static PyObject *
       
   428 utf_32_be_decode(PyObject *self,
       
   429 		 PyObject *args)
       
   430 {
       
   431 	Py_buffer pbuf;
       
   432     const char *errors = NULL;
       
   433     int byteorder = 1;
       
   434     int final = 0;
       
   435     Py_ssize_t consumed;
       
   436     PyObject *decoded;
       
   437 
       
   438     if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode",
       
   439 			  &pbuf, &errors, &final))
       
   440 	return NULL;
       
   441     consumed = pbuf.len; /* This is overwritten unless final is true. */
       
   442     decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
       
   443 					&byteorder, final ? NULL : &consumed);
       
   444 	PyBuffer_Release(&pbuf);
       
   445     if (decoded == NULL)
       
   446 	return NULL;
       
   447     return codec_tuple(decoded, consumed);
       
   448 }
       
   449 
       
   450 /* This non-standard version also provides access to the byteorder
       
   451    parameter of the builtin UTF-32 codec.
       
   452 
       
   453    It returns a tuple (unicode, bytesread, byteorder) with byteorder
       
   454    being the value in effect at the end of data.
       
   455 
       
   456 */
       
   457 
       
   458 static PyObject *
       
   459 utf_32_ex_decode(PyObject *self,
       
   460 		 PyObject *args)
       
   461 {
       
   462 	Py_buffer pbuf;
       
   463     const char *errors = NULL;
       
   464     int byteorder = 0;
       
   465     PyObject *unicode, *tuple;
       
   466     int final = 0;
       
   467     Py_ssize_t consumed;
       
   468 
       
   469     if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode",
       
   470 			  &pbuf, &errors, &byteorder, &final))
       
   471 	return NULL;
       
   472     consumed = pbuf.len; /* This is overwritten unless final is true. */
       
   473     unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
       
   474 					&byteorder, final ? NULL : &consumed);
       
   475 	PyBuffer_Release(&pbuf);
       
   476     if (unicode == NULL)
       
   477 	return NULL;
       
   478     tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
       
   479     Py_DECREF(unicode);
       
   480     return tuple;
       
   481 }
       
   482 
       
   483 static PyObject *
       
   484 unicode_escape_decode(PyObject *self,
       
   485 		     PyObject *args)
       
   486 {
       
   487 	Py_buffer pbuf;
       
   488     const char *errors = NULL;
       
   489 	PyObject *unicode;
       
   490 
       
   491     if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
       
   492 			  &pbuf, &errors))
       
   493 	return NULL;
       
   494 
       
   495 	unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
       
   496 	PyBuffer_Release(&pbuf);
       
   497 	return codec_tuple(unicode, pbuf.len);
       
   498 }
       
   499 
       
   500 static PyObject *
       
   501 raw_unicode_escape_decode(PyObject *self,
       
   502 			PyObject *args)
       
   503 {
       
   504 	Py_buffer pbuf;
       
   505     const char *errors = NULL;
       
   506 	PyObject *unicode;
       
   507 
       
   508     if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
       
   509 			  &pbuf, &errors))
       
   510 	return NULL;
       
   511 
       
   512 	unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
       
   513 	PyBuffer_Release(&pbuf);
       
   514 	return codec_tuple(unicode, pbuf.len);
       
   515 }
       
   516 
       
   517 static PyObject *
       
   518 latin_1_decode(PyObject *self,
       
   519 	       PyObject *args)
       
   520 {
       
   521 	Py_buffer pbuf;
       
   522 	PyObject *unicode;
       
   523     const char *errors = NULL;
       
   524 
       
   525     if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode",
       
   526 			  &pbuf, &errors))
       
   527 	return NULL;
       
   528 
       
   529 	unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
       
   530 	PyBuffer_Release(&pbuf);
       
   531 	return codec_tuple(unicode, pbuf.len);
       
   532 }
       
   533 
       
   534 static PyObject *
       
   535 ascii_decode(PyObject *self,
       
   536 	     PyObject *args)
       
   537 {
       
   538 	Py_buffer pbuf;
       
   539 	PyObject *unicode;
       
   540     const char *errors = NULL;
       
   541 
       
   542     if (!PyArg_ParseTuple(args, "s*|z:ascii_decode",
       
   543 			  &pbuf, &errors))
       
   544 	return NULL;
       
   545 
       
   546 	unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
       
   547 	PyBuffer_Release(&pbuf);
       
   548 	return codec_tuple(unicode, pbuf.len);
       
   549 }
       
   550 
       
   551 static PyObject *
       
   552 charmap_decode(PyObject *self,
       
   553 	       PyObject *args)
       
   554 {
       
   555 	Py_buffer pbuf;
       
   556 	PyObject *unicode;
       
   557     const char *errors = NULL;
       
   558     PyObject *mapping = NULL;
       
   559 
       
   560     if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode",
       
   561 			  &pbuf, &errors, &mapping))
       
   562 	return NULL;
       
   563     if (mapping == Py_None)
       
   564 	mapping = NULL;
       
   565 
       
   566 	unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
       
   567 	PyBuffer_Release(&pbuf);
       
   568 	return codec_tuple(unicode, pbuf.len);
       
   569 }
       
   570 
       
   571 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
       
   572 
       
   573 static PyObject *
       
   574 mbcs_decode(PyObject *self,
       
   575 	    PyObject *args)
       
   576 {
       
   577 	Py_buffer pbuf;
       
   578     const char *errors = NULL;
       
   579     int final = 0;
       
   580     Py_ssize_t consumed;
       
   581     PyObject *decoded = NULL;
       
   582 
       
   583     if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode",
       
   584 			  &pbuf, &errors, &final))
       
   585 	return NULL;
       
   586     consumed = pbuf.len;
       
   587 
       
   588     decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
       
   589 					   final ? NULL : &consumed);
       
   590 	PyBuffer_Release(&pbuf);
       
   591     if (decoded == NULL)
       
   592 	return NULL;
       
   593     return codec_tuple(decoded, consumed);
       
   594 }
       
   595 
       
   596 #endif /* MS_WINDOWS */
       
   597 
       
   598 /* --- Encoder ------------------------------------------------------------ */
       
   599 
       
   600 static PyObject *
       
   601 readbuffer_encode(PyObject *self,
       
   602 		  PyObject *args)
       
   603 {
       
   604     const char *data;
       
   605     Py_ssize_t size;
       
   606     const char *errors = NULL;
       
   607 
       
   608     if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
       
   609 			  &data, &size, &errors))
       
   610 	return NULL;
       
   611 
       
   612     return codec_tuple(PyString_FromStringAndSize(data, size),
       
   613 		       size);
       
   614 }
       
   615 
       
   616 static PyObject *
       
   617 charbuffer_encode(PyObject *self,
       
   618 		  PyObject *args)
       
   619 {
       
   620     const char *data;
       
   621     Py_ssize_t size;
       
   622     const char *errors = NULL;
       
   623 
       
   624     if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
       
   625 			  &data, &size, &errors))
       
   626 	return NULL;
       
   627 
       
   628     return codec_tuple(PyString_FromStringAndSize(data, size),
       
   629 		       size);
       
   630 }
       
   631 
       
   632 static PyObject *
       
   633 unicode_internal_encode(PyObject *self,
       
   634 			PyObject *args)
       
   635 {
       
   636     PyObject *obj;
       
   637     const char *errors = NULL;
       
   638     const char *data;
       
   639     Py_ssize_t size;
       
   640 
       
   641     if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
       
   642 			  &obj, &errors))
       
   643 	return NULL;
       
   644 
       
   645     if (PyUnicode_Check(obj)) {
       
   646 	data = PyUnicode_AS_DATA(obj);
       
   647 	size = PyUnicode_GET_DATA_SIZE(obj);
       
   648 	return codec_tuple(PyString_FromStringAndSize(data, size),
       
   649 			   size);
       
   650     }
       
   651     else {
       
   652 	if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
       
   653 	    return NULL;
       
   654 	return codec_tuple(PyString_FromStringAndSize(data, size),
       
   655 			   size);
       
   656     }
       
   657 }
       
   658 
       
   659 static PyObject *
       
   660 utf_7_encode(PyObject *self,
       
   661 	    PyObject *args)
       
   662 {
       
   663     PyObject *str, *v;
       
   664     const char *errors = NULL;
       
   665 
       
   666     if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
       
   667 			  &str, &errors))
       
   668 	return NULL;
       
   669 
       
   670     str = PyUnicode_FromObject(str);
       
   671     if (str == NULL)
       
   672 	return NULL;
       
   673     v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
       
   674 					 PyUnicode_GET_SIZE(str),
       
   675 					 0,
       
   676 					 0,
       
   677 					 errors),
       
   678 		    PyUnicode_GET_SIZE(str));
       
   679     Py_DECREF(str);
       
   680     return v;
       
   681 }
       
   682 
       
   683 static PyObject *
       
   684 utf_8_encode(PyObject *self,
       
   685 	    PyObject *args)
       
   686 {
       
   687     PyObject *str, *v;
       
   688     const char *errors = NULL;
       
   689 
       
   690     if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
       
   691 			  &str, &errors))
       
   692 	return NULL;
       
   693 
       
   694     str = PyUnicode_FromObject(str);
       
   695     if (str == NULL)
       
   696 	return NULL;
       
   697     v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
       
   698 					 PyUnicode_GET_SIZE(str),
       
   699 					 errors),
       
   700 		    PyUnicode_GET_SIZE(str));
       
   701     Py_DECREF(str);
       
   702     return v;
       
   703 }
       
   704 
       
   705 /* This version provides access to the byteorder parameter of the
       
   706    builtin UTF-16 codecs as optional third argument. It defaults to 0
       
   707    which means: use the native byte order and prepend the data with a
       
   708    BOM mark.
       
   709 
       
   710 */
       
   711 
       
   712 static PyObject *
       
   713 utf_16_encode(PyObject *self,
       
   714 	    PyObject *args)
       
   715 {
       
   716     PyObject *str, *v;
       
   717     const char *errors = NULL;
       
   718     int byteorder = 0;
       
   719 
       
   720     if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
       
   721 			  &str, &errors, &byteorder))
       
   722 	return NULL;
       
   723 
       
   724     str = PyUnicode_FromObject(str);
       
   725     if (str == NULL)
       
   726 	return NULL;
       
   727     v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
       
   728 					  PyUnicode_GET_SIZE(str),
       
   729 					  errors,
       
   730 					  byteorder),
       
   731 		    PyUnicode_GET_SIZE(str));
       
   732     Py_DECREF(str);
       
   733     return v;
       
   734 }
       
   735 
       
   736 static PyObject *
       
   737 utf_16_le_encode(PyObject *self,
       
   738 		 PyObject *args)
       
   739 {
       
   740     PyObject *str, *v;
       
   741     const char *errors = NULL;
       
   742 
       
   743     if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
       
   744 			  &str, &errors))
       
   745 	return NULL;
       
   746 
       
   747     str = PyUnicode_FromObject(str);
       
   748     if (str == NULL)
       
   749 	return NULL;
       
   750     v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
       
   751 					     PyUnicode_GET_SIZE(str),
       
   752 					     errors,
       
   753 					     -1),
       
   754 		       PyUnicode_GET_SIZE(str));
       
   755     Py_DECREF(str);
       
   756     return v;
       
   757 }
       
   758 
       
   759 static PyObject *
       
   760 utf_16_be_encode(PyObject *self,
       
   761 		 PyObject *args)
       
   762 {
       
   763     PyObject *str, *v;
       
   764     const char *errors = NULL;
       
   765 
       
   766     if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
       
   767 			  &str, &errors))
       
   768 	return NULL;
       
   769 
       
   770     str = PyUnicode_FromObject(str);
       
   771     if (str == NULL)
       
   772 	return NULL;
       
   773     v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
       
   774 					  PyUnicode_GET_SIZE(str),
       
   775 					  errors,
       
   776 					  +1),
       
   777 		    PyUnicode_GET_SIZE(str));
       
   778     Py_DECREF(str);
       
   779     return v;
       
   780 }
       
   781 
       
   782 /* This version provides access to the byteorder parameter of the
       
   783    builtin UTF-32 codecs as optional third argument. It defaults to 0
       
   784    which means: use the native byte order and prepend the data with a
       
   785    BOM mark.
       
   786 
       
   787 */
       
   788 
       
   789 static PyObject *
       
   790 utf_32_encode(PyObject *self,
       
   791 	    PyObject *args)
       
   792 {
       
   793     PyObject *str, *v;
       
   794     const char *errors = NULL;
       
   795     int byteorder = 0;
       
   796 
       
   797     if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
       
   798 			  &str, &errors, &byteorder))
       
   799 	return NULL;
       
   800 
       
   801     str = PyUnicode_FromObject(str);
       
   802     if (str == NULL)
       
   803 	return NULL;
       
   804     v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
       
   805 					  PyUnicode_GET_SIZE(str),
       
   806 					  errors,
       
   807 					  byteorder),
       
   808 		    PyUnicode_GET_SIZE(str));
       
   809     Py_DECREF(str);
       
   810     return v;
       
   811 }
       
   812 
       
   813 static PyObject *
       
   814 utf_32_le_encode(PyObject *self,
       
   815 		 PyObject *args)
       
   816 {
       
   817     PyObject *str, *v;
       
   818     const char *errors = NULL;
       
   819 
       
   820     if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
       
   821 			  &str, &errors))
       
   822 	return NULL;
       
   823 
       
   824     str = PyUnicode_FromObject(str);
       
   825     if (str == NULL)
       
   826 	return NULL;
       
   827     v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
       
   828 					     PyUnicode_GET_SIZE(str),
       
   829 					     errors,
       
   830 					     -1),
       
   831 		       PyUnicode_GET_SIZE(str));
       
   832     Py_DECREF(str);
       
   833     return v;
       
   834 }
       
   835 
       
   836 static PyObject *
       
   837 utf_32_be_encode(PyObject *self,
       
   838 		 PyObject *args)
       
   839 {
       
   840     PyObject *str, *v;
       
   841     const char *errors = NULL;
       
   842 
       
   843     if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
       
   844 			  &str, &errors))
       
   845 	return NULL;
       
   846 
       
   847     str = PyUnicode_FromObject(str);
       
   848     if (str == NULL)
       
   849 	return NULL;
       
   850     v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
       
   851 					  PyUnicode_GET_SIZE(str),
       
   852 					  errors,
       
   853 					  +1),
       
   854 		    PyUnicode_GET_SIZE(str));
       
   855     Py_DECREF(str);
       
   856     return v;
       
   857 }
       
   858 
       
   859 static PyObject *
       
   860 unicode_escape_encode(PyObject *self,
       
   861 		     PyObject *args)
       
   862 {
       
   863     PyObject *str, *v;
       
   864     const char *errors = NULL;
       
   865 
       
   866     if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
       
   867 			  &str, &errors))
       
   868 	return NULL;
       
   869 
       
   870     str = PyUnicode_FromObject(str);
       
   871     if (str == NULL)
       
   872 	return NULL;
       
   873     v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
       
   874 						  PyUnicode_GET_SIZE(str)),
       
   875 		    PyUnicode_GET_SIZE(str));
       
   876     Py_DECREF(str);
       
   877     return v;
       
   878 }
       
   879 
       
   880 static PyObject *
       
   881 raw_unicode_escape_encode(PyObject *self,
       
   882 			PyObject *args)
       
   883 {
       
   884     PyObject *str, *v;
       
   885     const char *errors = NULL;
       
   886 
       
   887     if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
       
   888 			  &str, &errors))
       
   889 	return NULL;
       
   890 
       
   891     str = PyUnicode_FromObject(str);
       
   892     if (str == NULL)
       
   893 	return NULL;
       
   894     v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
       
   895 			       PyUnicode_AS_UNICODE(str),
       
   896 			       PyUnicode_GET_SIZE(str)),
       
   897 		    PyUnicode_GET_SIZE(str));
       
   898     Py_DECREF(str);
       
   899     return v;
       
   900 }
       
   901 
       
   902 static PyObject *
       
   903 latin_1_encode(PyObject *self,
       
   904 	       PyObject *args)
       
   905 {
       
   906     PyObject *str, *v;
       
   907     const char *errors = NULL;
       
   908 
       
   909     if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
       
   910 			  &str, &errors))
       
   911 	return NULL;
       
   912 
       
   913     str = PyUnicode_FromObject(str);
       
   914     if (str == NULL)
       
   915 	return NULL;
       
   916     v = codec_tuple(PyUnicode_EncodeLatin1(
       
   917 			       PyUnicode_AS_UNICODE(str),
       
   918 			       PyUnicode_GET_SIZE(str),
       
   919 			       errors),
       
   920 		    PyUnicode_GET_SIZE(str));
       
   921     Py_DECREF(str);
       
   922     return v;
       
   923 }
       
   924 
       
   925 static PyObject *
       
   926 ascii_encode(PyObject *self,
       
   927 	     PyObject *args)
       
   928 {
       
   929     PyObject *str, *v;
       
   930     const char *errors = NULL;
       
   931 
       
   932     if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
       
   933 			  &str, &errors))
       
   934 	return NULL;
       
   935 
       
   936     str = PyUnicode_FromObject(str);
       
   937     if (str == NULL)
       
   938 	return NULL;
       
   939     v = codec_tuple(PyUnicode_EncodeASCII(
       
   940 			       PyUnicode_AS_UNICODE(str),
       
   941 			       PyUnicode_GET_SIZE(str),
       
   942 			       errors),
       
   943 		    PyUnicode_GET_SIZE(str));
       
   944     Py_DECREF(str);
       
   945     return v;
       
   946 }
       
   947 
       
   948 static PyObject *
       
   949 charmap_encode(PyObject *self,
       
   950 	     PyObject *args)
       
   951 {
       
   952     PyObject *str, *v;
       
   953     const char *errors = NULL;
       
   954     PyObject *mapping = NULL;
       
   955 
       
   956     if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
       
   957 			  &str, &errors, &mapping))
       
   958 	return NULL;
       
   959     if (mapping == Py_None)
       
   960 	mapping = NULL;
       
   961 
       
   962     str = PyUnicode_FromObject(str);
       
   963     if (str == NULL)
       
   964 	return NULL;
       
   965     v = codec_tuple(PyUnicode_EncodeCharmap(
       
   966 			       PyUnicode_AS_UNICODE(str),
       
   967 			       PyUnicode_GET_SIZE(str),
       
   968 			       mapping,
       
   969 			       errors),
       
   970 		    PyUnicode_GET_SIZE(str));
       
   971     Py_DECREF(str);
       
   972     return v;
       
   973 }
       
   974 
       
   975 static PyObject*
       
   976 charmap_build(PyObject *self, PyObject *args)
       
   977 {
       
   978     PyObject *map;
       
   979     if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
       
   980         return NULL;
       
   981     return PyUnicode_BuildEncodingMap(map);
       
   982 }
       
   983 
       
   984 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
       
   985 
       
   986 static PyObject *
       
   987 mbcs_encode(PyObject *self,
       
   988 	    PyObject *args)
       
   989 {
       
   990     PyObject *str, *v;
       
   991     const char *errors = NULL;
       
   992 
       
   993     if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
       
   994 			  &str, &errors))
       
   995 	return NULL;
       
   996 
       
   997     str = PyUnicode_FromObject(str);
       
   998     if (str == NULL)
       
   999 	return NULL;
       
  1000     v = codec_tuple(PyUnicode_EncodeMBCS(
       
  1001 			       PyUnicode_AS_UNICODE(str),
       
  1002 			       PyUnicode_GET_SIZE(str),
       
  1003 			       errors),
       
  1004 		    PyUnicode_GET_SIZE(str));
       
  1005     Py_DECREF(str);
       
  1006     return v;
       
  1007 }
       
  1008 
       
  1009 #endif /* MS_WINDOWS */
       
  1010 #endif /* Py_USING_UNICODE */
       
  1011 
       
  1012 /* --- Error handler registry --------------------------------------------- */
       
  1013 
       
  1014 PyDoc_STRVAR(register_error__doc__,
       
  1015 "register_error(errors, handler)\n\
       
  1016 \n\
       
  1017 Register the specified error handler under the name\n\
       
  1018 errors. handler must be a callable object, that\n\
       
  1019 will be called with an exception instance containing\n\
       
  1020 information about the location of the encoding/decoding\n\
       
  1021 error and must return a (replacement, new position) tuple.");
       
  1022 
       
  1023 static PyObject *register_error(PyObject *self, PyObject *args)
       
  1024 {
       
  1025     const char *name;
       
  1026     PyObject *handler;
       
  1027 
       
  1028     if (!PyArg_ParseTuple(args, "sO:register_error",
       
  1029 			  &name, &handler))
       
  1030 	return NULL;
       
  1031     if (PyCodec_RegisterError(name, handler))
       
  1032         return NULL;
       
  1033     Py_RETURN_NONE;
       
  1034 }
       
  1035 
       
  1036 PyDoc_STRVAR(lookup_error__doc__,
       
  1037 "lookup_error(errors) -> handler\n\
       
  1038 \n\
       
  1039 Return the error handler for the specified error handling name\n\
       
  1040 or raise a LookupError, if no handler exists under this name.");
       
  1041 
       
  1042 static PyObject *lookup_error(PyObject *self, PyObject *args)
       
  1043 {
       
  1044     const char *name;
       
  1045 
       
  1046     if (!PyArg_ParseTuple(args, "s:lookup_error",
       
  1047 			  &name))
       
  1048 	return NULL;
       
  1049     return PyCodec_LookupError(name);
       
  1050 }
       
  1051 
       
  1052 /* --- Module API --------------------------------------------------------- */
       
  1053 
       
  1054 static PyMethodDef _codecs_functions[] = {
       
  1055     {"register",		codec_register,			METH_O,
       
  1056         register__doc__},
       
  1057     {"lookup",			codec_lookup, 			METH_VARARGS,
       
  1058         lookup__doc__},
       
  1059     {"encode",			codec_encode,			METH_VARARGS,
       
  1060 	encode__doc__},
       
  1061     {"decode",			codec_decode,			METH_VARARGS,
       
  1062 	decode__doc__},
       
  1063     {"escape_encode",		escape_encode,			METH_VARARGS},
       
  1064     {"escape_decode",		escape_decode,			METH_VARARGS},
       
  1065 #ifdef Py_USING_UNICODE
       
  1066     {"utf_8_encode",		utf_8_encode,			METH_VARARGS},
       
  1067     {"utf_8_decode",		utf_8_decode,			METH_VARARGS},
       
  1068     {"utf_7_encode",		utf_7_encode,			METH_VARARGS},
       
  1069     {"utf_7_decode",		utf_7_decode,			METH_VARARGS},
       
  1070     {"utf_16_encode",		utf_16_encode,			METH_VARARGS},
       
  1071     {"utf_16_le_encode",	utf_16_le_encode,		METH_VARARGS},
       
  1072     {"utf_16_be_encode",	utf_16_be_encode,		METH_VARARGS},
       
  1073     {"utf_16_decode",		utf_16_decode,			METH_VARARGS},
       
  1074     {"utf_16_le_decode",	utf_16_le_decode,		METH_VARARGS},
       
  1075     {"utf_16_be_decode",	utf_16_be_decode,		METH_VARARGS},
       
  1076     {"utf_16_ex_decode",	utf_16_ex_decode,		METH_VARARGS},
       
  1077     {"utf_32_encode",		utf_32_encode,			METH_VARARGS},
       
  1078     {"utf_32_le_encode",	utf_32_le_encode,		METH_VARARGS},
       
  1079     {"utf_32_be_encode",	utf_32_be_encode,		METH_VARARGS},
       
  1080     {"utf_32_decode",		utf_32_decode,			METH_VARARGS},
       
  1081     {"utf_32_le_decode",	utf_32_le_decode,		METH_VARARGS},
       
  1082     {"utf_32_be_decode",	utf_32_be_decode,		METH_VARARGS},
       
  1083     {"utf_32_ex_decode",	utf_32_ex_decode,		METH_VARARGS},
       
  1084     {"unicode_escape_encode",	unicode_escape_encode,		METH_VARARGS},
       
  1085     {"unicode_escape_decode",	unicode_escape_decode,		METH_VARARGS},
       
  1086     {"unicode_internal_encode",	unicode_internal_encode,	METH_VARARGS},
       
  1087     {"unicode_internal_decode",	unicode_internal_decode,	METH_VARARGS},
       
  1088     {"raw_unicode_escape_encode", raw_unicode_escape_encode,	METH_VARARGS},
       
  1089     {"raw_unicode_escape_decode", raw_unicode_escape_decode,	METH_VARARGS},
       
  1090     {"latin_1_encode", 		latin_1_encode,			METH_VARARGS},
       
  1091     {"latin_1_decode", 		latin_1_decode,			METH_VARARGS},
       
  1092     {"ascii_encode", 		ascii_encode,			METH_VARARGS},
       
  1093     {"ascii_decode", 		ascii_decode,			METH_VARARGS},
       
  1094     {"charmap_encode", 		charmap_encode,			METH_VARARGS},
       
  1095     {"charmap_decode", 		charmap_decode,			METH_VARARGS},
       
  1096     {"charmap_build", 		charmap_build,			METH_VARARGS},
       
  1097     {"readbuffer_encode",	readbuffer_encode,		METH_VARARGS},
       
  1098     {"charbuffer_encode",	charbuffer_encode,		METH_VARARGS},
       
  1099 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
       
  1100     {"mbcs_encode", 		mbcs_encode,			METH_VARARGS},
       
  1101     {"mbcs_decode", 		mbcs_decode,			METH_VARARGS},
       
  1102 #endif
       
  1103 #endif /* Py_USING_UNICODE */
       
  1104     {"register_error", 		register_error,			METH_VARARGS,
       
  1105         register_error__doc__},
       
  1106     {"lookup_error", 		lookup_error,			METH_VARARGS,
       
  1107         lookup_error__doc__},
       
  1108     {NULL, NULL}		/* sentinel */
       
  1109 };
       
  1110 
       
  1111 PyMODINIT_FUNC
       
  1112 init_codecs(void)
       
  1113 {
       
  1114     Py_InitModule("_codecs", _codecs_functions);
       
  1115 }