symbian-qemu-0.9.1-12/python-2.6.1/Python/codecs.c
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 /* ------------------------------------------------------------------------
       
     2 
       
     3    Python Codec Registry and support functions
       
     4 
       
     5 Written by Marc-Andre Lemburg (mal@lemburg.com).
       
     6 
       
     7 Copyright (c) Corporation for National Research Initiatives.
       
     8 
       
     9    ------------------------------------------------------------------------ */
       
    10 
       
    11 #include "Python.h"
       
    12 #include <ctype.h>
       
    13 
       
    14 /* --- Codec Registry ----------------------------------------------------- */
       
    15 
       
    16 /* Import the standard encodings package which will register the first
       
    17    codec search function. 
       
    18 
       
    19    This is done in a lazy way so that the Unicode implementation does
       
    20    not downgrade startup time of scripts not needing it.
       
    21 
       
    22    ImportErrors are silently ignored by this function. Only one try is
       
    23    made.
       
    24 
       
    25 */
       
    26 
       
    27 static int _PyCodecRegistry_Init(void); /* Forward */
       
    28 
       
    29 int PyCodec_Register(PyObject *search_function)
       
    30 {
       
    31     PyInterpreterState *interp = PyThreadState_GET()->interp;
       
    32     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
       
    33 	goto onError;
       
    34     if (search_function == NULL) {
       
    35 	PyErr_BadArgument();
       
    36 	goto onError;
       
    37     }
       
    38     if (!PyCallable_Check(search_function)) {
       
    39 	PyErr_SetString(PyExc_TypeError, "argument must be callable");
       
    40 	goto onError;
       
    41     }
       
    42     return PyList_Append(interp->codec_search_path, search_function);
       
    43 
       
    44  onError:
       
    45     return -1;
       
    46 }
       
    47 
       
    48 /* Convert a string to a normalized Python string: all characters are
       
    49    converted to lower case, spaces are replaced with underscores. */
       
    50 
       
    51 static
       
    52 PyObject *normalizestring(const char *string)
       
    53 {
       
    54     register size_t i;
       
    55     size_t len = strlen(string);
       
    56     char *p;
       
    57     PyObject *v;
       
    58     
       
    59     if (len > PY_SSIZE_T_MAX) {
       
    60 	PyErr_SetString(PyExc_OverflowError, "string is too large");
       
    61 	return NULL;
       
    62     }
       
    63 	
       
    64     v = PyString_FromStringAndSize(NULL, len);
       
    65     if (v == NULL)
       
    66 	return NULL;
       
    67     p = PyString_AS_STRING(v);
       
    68     for (i = 0; i < len; i++) {
       
    69         register char ch = string[i];
       
    70         if (ch == ' ')
       
    71             ch = '-';
       
    72         else
       
    73             ch = tolower(Py_CHARMASK(ch));
       
    74 	p[i] = ch;
       
    75     }
       
    76     return v;
       
    77 }
       
    78 
       
    79 /* Lookup the given encoding and return a tuple providing the codec
       
    80    facilities.
       
    81 
       
    82    The encoding string is looked up converted to all lower-case
       
    83    characters. This makes encodings looked up through this mechanism
       
    84    effectively case-insensitive.
       
    85 
       
    86    If no codec is found, a LookupError is set and NULL returned. 
       
    87 
       
    88    As side effect, this tries to load the encodings package, if not
       
    89    yet done. This is part of the lazy load strategy for the encodings
       
    90    package.
       
    91 
       
    92 */
       
    93 
       
    94 PyObject *_PyCodec_Lookup(const char *encoding)
       
    95 {
       
    96     PyInterpreterState *interp;
       
    97     PyObject *result, *args = NULL, *v;
       
    98     Py_ssize_t i, len;
       
    99 
       
   100     if (encoding == NULL) {
       
   101 	PyErr_BadArgument();
       
   102 	goto onError;
       
   103     }
       
   104 
       
   105     interp = PyThreadState_GET()->interp;
       
   106     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
       
   107 	goto onError;
       
   108 
       
   109     /* Convert the encoding to a normalized Python string: all
       
   110        characters are converted to lower case, spaces and hyphens are
       
   111        replaced with underscores. */
       
   112     v = normalizestring(encoding);
       
   113     if (v == NULL)
       
   114 	goto onError;
       
   115     PyString_InternInPlace(&v);
       
   116 
       
   117     /* First, try to lookup the name in the registry dictionary */
       
   118     result = PyDict_GetItem(interp->codec_search_cache, v);
       
   119     if (result != NULL) {
       
   120 	Py_INCREF(result);
       
   121 	Py_DECREF(v);
       
   122 	return result;
       
   123     }
       
   124     
       
   125     /* Next, scan the search functions in order of registration */
       
   126     args = PyTuple_New(1);
       
   127     if (args == NULL)
       
   128 	goto onError;
       
   129     PyTuple_SET_ITEM(args,0,v);
       
   130 
       
   131     len = PyList_Size(interp->codec_search_path);
       
   132     if (len < 0)
       
   133 	goto onError;
       
   134     if (len == 0) {
       
   135 	PyErr_SetString(PyExc_LookupError,
       
   136 			"no codec search functions registered: "
       
   137 			"can't find encoding");
       
   138 	goto onError;
       
   139     }
       
   140 
       
   141     for (i = 0; i < len; i++) {
       
   142 	PyObject *func;
       
   143 	
       
   144 	func = PyList_GetItem(interp->codec_search_path, i);
       
   145 	if (func == NULL)
       
   146 	    goto onError;
       
   147 	result = PyEval_CallObject(func, args);
       
   148 	if (result == NULL)
       
   149 	    goto onError;
       
   150 	if (result == Py_None) {
       
   151 	    Py_DECREF(result);
       
   152 	    continue;
       
   153 	}
       
   154 	if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
       
   155 	    PyErr_SetString(PyExc_TypeError,
       
   156 			    "codec search functions must return 4-tuples");
       
   157 	    Py_DECREF(result);
       
   158 	    goto onError;
       
   159 	}
       
   160 	break;
       
   161     }
       
   162     if (i == len) {
       
   163 	/* XXX Perhaps we should cache misses too ? */
       
   164 	PyErr_Format(PyExc_LookupError,
       
   165                      "unknown encoding: %s", encoding);
       
   166 	goto onError;
       
   167     }
       
   168 
       
   169     /* Cache and return the result */
       
   170     PyDict_SetItem(interp->codec_search_cache, v, result);
       
   171     Py_DECREF(args);
       
   172     return result;
       
   173 
       
   174  onError:
       
   175     Py_XDECREF(args);
       
   176     return NULL;
       
   177 }
       
   178 
       
   179 static
       
   180 PyObject *args_tuple(PyObject *object,
       
   181 		     const char *errors)
       
   182 {
       
   183     PyObject *args;
       
   184     
       
   185     args = PyTuple_New(1 + (errors != NULL));
       
   186     if (args == NULL)
       
   187 	return NULL;
       
   188     Py_INCREF(object);
       
   189     PyTuple_SET_ITEM(args,0,object);
       
   190     if (errors) {
       
   191 	PyObject *v;
       
   192 	
       
   193 	v = PyString_FromString(errors);
       
   194 	if (v == NULL) {
       
   195 	    Py_DECREF(args);
       
   196 	    return NULL;
       
   197 	}
       
   198 	PyTuple_SET_ITEM(args, 1, v);
       
   199     }
       
   200     return args;
       
   201 }
       
   202 
       
   203 /* Helper function to get a codec item */
       
   204 
       
   205 static
       
   206 PyObject *codec_getitem(const char *encoding, int index)
       
   207 {
       
   208     PyObject *codecs;
       
   209     PyObject *v;
       
   210 
       
   211     codecs = _PyCodec_Lookup(encoding);
       
   212     if (codecs == NULL)
       
   213 	return NULL;
       
   214     v = PyTuple_GET_ITEM(codecs, index);
       
   215     Py_DECREF(codecs);
       
   216     Py_INCREF(v);
       
   217     return v;
       
   218 }
       
   219 
       
   220 /* Helper function to create an incremental codec. */
       
   221 
       
   222 static
       
   223 PyObject *codec_getincrementalcodec(const char *encoding,
       
   224 				    const char *errors,
       
   225 				    const char *attrname)
       
   226 {
       
   227     PyObject *codecs, *ret, *inccodec;
       
   228 
       
   229     codecs = _PyCodec_Lookup(encoding);
       
   230     if (codecs == NULL)
       
   231 	return NULL;
       
   232     inccodec = PyObject_GetAttrString(codecs, attrname);
       
   233     Py_DECREF(codecs);
       
   234     if (inccodec == NULL)
       
   235 	return NULL;
       
   236     if (errors)
       
   237 	ret = PyObject_CallFunction(inccodec, "s", errors);
       
   238     else
       
   239 	ret = PyObject_CallFunction(inccodec, NULL);
       
   240     Py_DECREF(inccodec);
       
   241     return ret;
       
   242 }
       
   243 
       
   244 /* Helper function to create a stream codec. */
       
   245 
       
   246 static
       
   247 PyObject *codec_getstreamcodec(const char *encoding,
       
   248 			       PyObject *stream,
       
   249 			       const char *errors,
       
   250 			       const int index)
       
   251 {
       
   252     PyObject *codecs, *streamcodec, *codeccls;
       
   253 
       
   254     codecs = _PyCodec_Lookup(encoding);
       
   255     if (codecs == NULL)
       
   256 	return NULL;
       
   257 
       
   258     codeccls = PyTuple_GET_ITEM(codecs, index);
       
   259     if (errors != NULL)
       
   260 	streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors);
       
   261     else
       
   262 	streamcodec = PyObject_CallFunction(codeccls, "O", stream);
       
   263     Py_DECREF(codecs);
       
   264     return streamcodec;
       
   265 }
       
   266 
       
   267 /* Convenience APIs to query the Codec registry. 
       
   268    
       
   269    All APIs return a codec object with incremented refcount.
       
   270    
       
   271  */
       
   272 
       
   273 PyObject *PyCodec_Encoder(const char *encoding)
       
   274 {
       
   275     return codec_getitem(encoding, 0);
       
   276 }
       
   277 
       
   278 PyObject *PyCodec_Decoder(const char *encoding)
       
   279 {
       
   280     return codec_getitem(encoding, 1);
       
   281 }
       
   282 
       
   283 PyObject *PyCodec_IncrementalEncoder(const char *encoding,
       
   284 				     const char *errors)
       
   285 {
       
   286     return codec_getincrementalcodec(encoding, errors, "incrementalencoder");
       
   287 }
       
   288 
       
   289 PyObject *PyCodec_IncrementalDecoder(const char *encoding,
       
   290 				     const char *errors)
       
   291 {
       
   292     return codec_getincrementalcodec(encoding, errors, "incrementaldecoder");
       
   293 }
       
   294 
       
   295 PyObject *PyCodec_StreamReader(const char *encoding,
       
   296 			       PyObject *stream,
       
   297 			       const char *errors)
       
   298 {
       
   299     return codec_getstreamcodec(encoding, stream, errors, 2);
       
   300 }
       
   301 
       
   302 PyObject *PyCodec_StreamWriter(const char *encoding,
       
   303 			       PyObject *stream,
       
   304 			       const char *errors)
       
   305 {
       
   306     return codec_getstreamcodec(encoding, stream, errors, 3);
       
   307 }
       
   308 
       
   309 /* Encode an object (e.g. an Unicode object) using the given encoding
       
   310    and return the resulting encoded object (usually a Python string).
       
   311 
       
   312    errors is passed to the encoder factory as argument if non-NULL. */
       
   313 
       
   314 PyObject *PyCodec_Encode(PyObject *object,
       
   315 			 const char *encoding,
       
   316 			 const char *errors)
       
   317 {
       
   318     PyObject *encoder = NULL;
       
   319     PyObject *args = NULL, *result = NULL;
       
   320     PyObject *v;
       
   321 
       
   322     encoder = PyCodec_Encoder(encoding);
       
   323     if (encoder == NULL)
       
   324 	goto onError;
       
   325 
       
   326     args = args_tuple(object, errors);
       
   327     if (args == NULL)
       
   328 	goto onError;
       
   329     
       
   330     result = PyEval_CallObject(encoder,args);
       
   331     if (result == NULL)
       
   332 	goto onError;
       
   333 
       
   334     if (!PyTuple_Check(result) || 
       
   335 	PyTuple_GET_SIZE(result) != 2) {
       
   336 	PyErr_SetString(PyExc_TypeError,
       
   337 			"encoder must return a tuple (object,integer)");
       
   338 	goto onError;
       
   339     }
       
   340     v = PyTuple_GET_ITEM(result,0);
       
   341     Py_INCREF(v);
       
   342     /* We don't check or use the second (integer) entry. */
       
   343 
       
   344     Py_DECREF(args);
       
   345     Py_DECREF(encoder);
       
   346     Py_DECREF(result);
       
   347     return v;
       
   348 	
       
   349  onError:
       
   350     Py_XDECREF(result);
       
   351     Py_XDECREF(args);
       
   352     Py_XDECREF(encoder);
       
   353     return NULL;
       
   354 }
       
   355 
       
   356 /* Decode an object (usually a Python string) using the given encoding
       
   357    and return an equivalent object (e.g. an Unicode object).
       
   358 
       
   359    errors is passed to the decoder factory as argument if non-NULL. */
       
   360 
       
   361 PyObject *PyCodec_Decode(PyObject *object,
       
   362 			 const char *encoding,
       
   363 			 const char *errors)
       
   364 {
       
   365     PyObject *decoder = NULL;
       
   366     PyObject *args = NULL, *result = NULL;
       
   367     PyObject *v;
       
   368 
       
   369     decoder = PyCodec_Decoder(encoding);
       
   370     if (decoder == NULL)
       
   371 	goto onError;
       
   372 
       
   373     args = args_tuple(object, errors);
       
   374     if (args == NULL)
       
   375 	goto onError;
       
   376     
       
   377     result = PyEval_CallObject(decoder,args);
       
   378     if (result == NULL)
       
   379 	goto onError;
       
   380     if (!PyTuple_Check(result) || 
       
   381 	PyTuple_GET_SIZE(result) != 2) {
       
   382 	PyErr_SetString(PyExc_TypeError,
       
   383 			"decoder must return a tuple (object,integer)");
       
   384 	goto onError;
       
   385     }
       
   386     v = PyTuple_GET_ITEM(result,0);
       
   387     Py_INCREF(v);
       
   388     /* We don't check or use the second (integer) entry. */
       
   389 
       
   390     Py_DECREF(args);
       
   391     Py_DECREF(decoder);
       
   392     Py_DECREF(result);
       
   393     return v;
       
   394 	
       
   395  onError:
       
   396     Py_XDECREF(args);
       
   397     Py_XDECREF(decoder);
       
   398     Py_XDECREF(result);
       
   399     return NULL;
       
   400 }
       
   401 
       
   402 /* Register the error handling callback function error under the name
       
   403    name. This function will be called by the codec when it encounters
       
   404    an unencodable characters/undecodable bytes and doesn't know the
       
   405    callback name, when name is specified as the error parameter
       
   406    in the call to the encode/decode function.
       
   407    Return 0 on success, -1 on error */
       
   408 int PyCodec_RegisterError(const char *name, PyObject *error)
       
   409 {
       
   410     PyInterpreterState *interp = PyThreadState_GET()->interp;
       
   411     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
       
   412 	return -1;
       
   413     if (!PyCallable_Check(error)) {
       
   414 	PyErr_SetString(PyExc_TypeError, "handler must be callable");
       
   415 	return -1;
       
   416     }
       
   417     return PyDict_SetItemString(interp->codec_error_registry,
       
   418 	    			(char *)name, error);
       
   419 }
       
   420 
       
   421 /* Lookup the error handling callback function registered under the
       
   422    name error. As a special case NULL can be passed, in which case
       
   423    the error handling callback for strict encoding will be returned. */
       
   424 PyObject *PyCodec_LookupError(const char *name)
       
   425 {
       
   426     PyObject *handler = NULL;
       
   427 
       
   428     PyInterpreterState *interp = PyThreadState_GET()->interp;
       
   429     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
       
   430 	return NULL;
       
   431 
       
   432     if (name==NULL)
       
   433 	name = "strict";
       
   434     handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
       
   435     if (!handler)
       
   436 	PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
       
   437     else
       
   438 	Py_INCREF(handler);
       
   439     return handler;
       
   440 }
       
   441 
       
   442 static void wrong_exception_type(PyObject *exc)
       
   443 {
       
   444     PyObject *type = PyObject_GetAttrString(exc, "__class__");
       
   445     if (type != NULL) {
       
   446 	PyObject *name = PyObject_GetAttrString(type, "__name__");
       
   447 	Py_DECREF(type);
       
   448 	if (name != NULL) {
       
   449 	    PyObject *string = PyObject_Str(name);
       
   450 	    Py_DECREF(name);
       
   451 	    if (string != NULL) {
       
   452 	        PyErr_Format(PyExc_TypeError,
       
   453 		    "don't know how to handle %.400s in error callback",
       
   454 		    PyString_AS_STRING(string));
       
   455 	        Py_DECREF(string);
       
   456 	    }
       
   457 	}
       
   458     }
       
   459 }
       
   460 
       
   461 PyObject *PyCodec_StrictErrors(PyObject *exc)
       
   462 {
       
   463     if (PyExceptionInstance_Check(exc))
       
   464         PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
       
   465     else
       
   466 	PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
       
   467     return NULL;
       
   468 }
       
   469 
       
   470 
       
   471 #ifdef Py_USING_UNICODE
       
   472 PyObject *PyCodec_IgnoreErrors(PyObject *exc)
       
   473 {
       
   474     Py_ssize_t end;
       
   475     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
       
   476 	if (PyUnicodeEncodeError_GetEnd(exc, &end))
       
   477 	    return NULL;
       
   478     }
       
   479     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
       
   480 	if (PyUnicodeDecodeError_GetEnd(exc, &end))
       
   481 	    return NULL;
       
   482     }
       
   483     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
       
   484 	if (PyUnicodeTranslateError_GetEnd(exc, &end))
       
   485 	    return NULL;
       
   486     }
       
   487     else {
       
   488 	wrong_exception_type(exc);
       
   489 	return NULL;
       
   490     }
       
   491     /* ouch: passing NULL, 0, pos gives None instead of u'' */
       
   492     return Py_BuildValue("(u#n)", &end, 0, end);
       
   493 }
       
   494 
       
   495 
       
   496 PyObject *PyCodec_ReplaceErrors(PyObject *exc)
       
   497 {
       
   498     PyObject *restuple;
       
   499     Py_ssize_t start;
       
   500     Py_ssize_t end;
       
   501     Py_ssize_t i;
       
   502 
       
   503     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
       
   504 	PyObject *res;
       
   505 	Py_UNICODE *p;
       
   506 	if (PyUnicodeEncodeError_GetStart(exc, &start))
       
   507 	    return NULL;
       
   508 	if (PyUnicodeEncodeError_GetEnd(exc, &end))
       
   509 	    return NULL;
       
   510 	res = PyUnicode_FromUnicode(NULL, end-start);
       
   511 	if (res == NULL)
       
   512 	    return NULL;
       
   513 	for (p = PyUnicode_AS_UNICODE(res), i = start;
       
   514 	    i<end; ++p, ++i)
       
   515 	    *p = '?';
       
   516 	restuple = Py_BuildValue("(On)", res, end);
       
   517 	Py_DECREF(res);
       
   518 	return restuple;
       
   519     }
       
   520     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
       
   521 	Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
       
   522 	if (PyUnicodeDecodeError_GetEnd(exc, &end))
       
   523 	    return NULL;
       
   524 	return Py_BuildValue("(u#n)", &res, 1, end);
       
   525     }
       
   526     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
       
   527 	PyObject *res;
       
   528 	Py_UNICODE *p;
       
   529 	if (PyUnicodeTranslateError_GetStart(exc, &start))
       
   530 	    return NULL;
       
   531 	if (PyUnicodeTranslateError_GetEnd(exc, &end))
       
   532 	    return NULL;
       
   533 	res = PyUnicode_FromUnicode(NULL, end-start);
       
   534 	if (res == NULL)
       
   535 	    return NULL;
       
   536 	for (p = PyUnicode_AS_UNICODE(res), i = start;
       
   537 	    i<end; ++p, ++i)
       
   538 	    *p = Py_UNICODE_REPLACEMENT_CHARACTER;
       
   539 	restuple = Py_BuildValue("(On)", res, end);
       
   540 	Py_DECREF(res);
       
   541 	return restuple;
       
   542     }
       
   543     else {
       
   544 	wrong_exception_type(exc);
       
   545 	return NULL;
       
   546     }
       
   547 }
       
   548 
       
   549 PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
       
   550 {
       
   551     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
       
   552 	PyObject *restuple;
       
   553 	PyObject *object;
       
   554 	Py_ssize_t start;
       
   555 	Py_ssize_t end;
       
   556 	PyObject *res;
       
   557 	Py_UNICODE *p;
       
   558 	Py_UNICODE *startp;
       
   559 	Py_UNICODE *outp;
       
   560 	int ressize;
       
   561 	if (PyUnicodeEncodeError_GetStart(exc, &start))
       
   562 	    return NULL;
       
   563 	if (PyUnicodeEncodeError_GetEnd(exc, &end))
       
   564 	    return NULL;
       
   565 	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
       
   566 	    return NULL;
       
   567 	startp = PyUnicode_AS_UNICODE(object);
       
   568 	for (p = startp+start, ressize = 0; p < startp+end; ++p) {
       
   569 	    if (*p<10)
       
   570 		ressize += 2+1+1;
       
   571 	    else if (*p<100)
       
   572 		ressize += 2+2+1;
       
   573 	    else if (*p<1000)
       
   574 		ressize += 2+3+1;
       
   575 	    else if (*p<10000)
       
   576 		ressize += 2+4+1;
       
   577 #ifndef Py_UNICODE_WIDE
       
   578 	    else
       
   579 		ressize += 2+5+1;
       
   580 #else
       
   581 	    else if (*p<100000)
       
   582 		ressize += 2+5+1;
       
   583 	    else if (*p<1000000)
       
   584 		ressize += 2+6+1;
       
   585 	    else
       
   586 		ressize += 2+7+1;
       
   587 #endif
       
   588 	}
       
   589 	/* allocate replacement */
       
   590 	res = PyUnicode_FromUnicode(NULL, ressize);
       
   591 	if (res == NULL) {
       
   592 	    Py_DECREF(object);
       
   593 	    return NULL;
       
   594 	}
       
   595 	/* generate replacement */
       
   596 	for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
       
   597 	    p < startp+end; ++p) {
       
   598 	    Py_UNICODE c = *p;
       
   599 	    int digits;
       
   600 	    int base;
       
   601 	    *outp++ = '&';
       
   602 	    *outp++ = '#';
       
   603 	    if (*p<10) {
       
   604 		digits = 1;
       
   605 		base = 1;
       
   606 	    }
       
   607 	    else if (*p<100) {
       
   608 		digits = 2;
       
   609 		base = 10;
       
   610 	    }
       
   611 	    else if (*p<1000) {
       
   612 		digits = 3;
       
   613 		base = 100;
       
   614 	    }
       
   615 	    else if (*p<10000) {
       
   616 		digits = 4;
       
   617 		base = 1000;
       
   618 	    }
       
   619 #ifndef Py_UNICODE_WIDE
       
   620 	    else {
       
   621 		digits = 5;
       
   622 		base = 10000;
       
   623 	    }
       
   624 #else
       
   625 	    else if (*p<100000) {
       
   626 		digits = 5;
       
   627 		base = 10000;
       
   628 	    }
       
   629 	    else if (*p<1000000) {
       
   630 		digits = 6;
       
   631 		base = 100000;
       
   632 	    }
       
   633 	    else {
       
   634 		digits = 7;
       
   635 		base = 1000000;
       
   636 	    }
       
   637 #endif
       
   638 	    while (digits-->0) {
       
   639 		*outp++ = '0' + c/base;
       
   640 		c %= base;
       
   641 		base /= 10;
       
   642 	    }
       
   643 	    *outp++ = ';';
       
   644 	}
       
   645 	restuple = Py_BuildValue("(On)", res, end);
       
   646 	Py_DECREF(res);
       
   647 	Py_DECREF(object);
       
   648 	return restuple;
       
   649     }
       
   650     else {
       
   651 	wrong_exception_type(exc);
       
   652 	return NULL;
       
   653     }
       
   654 }
       
   655 
       
   656 static Py_UNICODE hexdigits[] = {
       
   657     '0', '1', '2', '3', '4', '5', '6', '7',
       
   658     '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
       
   659 };
       
   660 
       
   661 PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
       
   662 {
       
   663     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
       
   664 	PyObject *restuple;
       
   665 	PyObject *object;
       
   666 	Py_ssize_t start;
       
   667 	Py_ssize_t end;
       
   668 	PyObject *res;
       
   669 	Py_UNICODE *p;
       
   670 	Py_UNICODE *startp;
       
   671 	Py_UNICODE *outp;
       
   672 	int ressize;
       
   673 	if (PyUnicodeEncodeError_GetStart(exc, &start))
       
   674 	    return NULL;
       
   675 	if (PyUnicodeEncodeError_GetEnd(exc, &end))
       
   676 	    return NULL;
       
   677 	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
       
   678 	    return NULL;
       
   679 	startp = PyUnicode_AS_UNICODE(object);
       
   680 	for (p = startp+start, ressize = 0; p < startp+end; ++p) {
       
   681 #ifdef Py_UNICODE_WIDE
       
   682 	    if (*p >= 0x00010000)
       
   683 		ressize += 1+1+8;
       
   684 	    else
       
   685 #endif
       
   686 	    if (*p >= 0x100) {
       
   687 		ressize += 1+1+4;
       
   688 	    }
       
   689 	    else
       
   690 		ressize += 1+1+2;
       
   691 	}
       
   692 	res = PyUnicode_FromUnicode(NULL, ressize);
       
   693 	if (res==NULL)
       
   694 	    return NULL;
       
   695 	for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
       
   696 	    p < startp+end; ++p) {
       
   697 	    Py_UNICODE c = *p;
       
   698 	    *outp++ = '\\';
       
   699 #ifdef Py_UNICODE_WIDE
       
   700 	    if (c >= 0x00010000) {
       
   701 		*outp++ = 'U';
       
   702 		*outp++ = hexdigits[(c>>28)&0xf];
       
   703 		*outp++ = hexdigits[(c>>24)&0xf];
       
   704 		*outp++ = hexdigits[(c>>20)&0xf];
       
   705 		*outp++ = hexdigits[(c>>16)&0xf];
       
   706 		*outp++ = hexdigits[(c>>12)&0xf];
       
   707 		*outp++ = hexdigits[(c>>8)&0xf];
       
   708 	    }
       
   709 	    else
       
   710 #endif
       
   711 	    if (c >= 0x100) {
       
   712 		*outp++ = 'u';
       
   713 		*outp++ = hexdigits[(c>>12)&0xf];
       
   714 		*outp++ = hexdigits[(c>>8)&0xf];
       
   715 	    }
       
   716 	    else
       
   717 		*outp++ = 'x';
       
   718 	    *outp++ = hexdigits[(c>>4)&0xf];
       
   719 	    *outp++ = hexdigits[c&0xf];
       
   720 	}
       
   721 
       
   722 	restuple = Py_BuildValue("(On)", res, end);
       
   723 	Py_DECREF(res);
       
   724 	Py_DECREF(object);
       
   725 	return restuple;
       
   726     }
       
   727     else {
       
   728 	wrong_exception_type(exc);
       
   729 	return NULL;
       
   730     }
       
   731 }
       
   732 #endif
       
   733 
       
   734 static PyObject *strict_errors(PyObject *self, PyObject *exc)
       
   735 {
       
   736     return PyCodec_StrictErrors(exc);
       
   737 }
       
   738 
       
   739 
       
   740 #ifdef Py_USING_UNICODE
       
   741 static PyObject *ignore_errors(PyObject *self, PyObject *exc)
       
   742 {
       
   743     return PyCodec_IgnoreErrors(exc);
       
   744 }
       
   745 
       
   746 
       
   747 static PyObject *replace_errors(PyObject *self, PyObject *exc)
       
   748 {
       
   749     return PyCodec_ReplaceErrors(exc);
       
   750 }
       
   751 
       
   752 
       
   753 static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
       
   754 {
       
   755     return PyCodec_XMLCharRefReplaceErrors(exc);
       
   756 }
       
   757 
       
   758 
       
   759 static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
       
   760 {
       
   761     return PyCodec_BackslashReplaceErrors(exc);
       
   762 }
       
   763 #endif
       
   764 
       
   765 static int _PyCodecRegistry_Init(void)
       
   766 {
       
   767     static struct {
       
   768 	char *name;
       
   769 	PyMethodDef def;
       
   770     } methods[] =
       
   771     {
       
   772 	{
       
   773 	    "strict",
       
   774 	    {
       
   775 		"strict_errors",
       
   776 		strict_errors,
       
   777 		METH_O
       
   778 	    }
       
   779 	},
       
   780 #ifdef Py_USING_UNICODE
       
   781 	{
       
   782 	    "ignore",
       
   783 	    {
       
   784 		"ignore_errors",
       
   785 		ignore_errors,
       
   786 		METH_O
       
   787 	    }
       
   788 	},
       
   789 	{
       
   790 	    "replace",
       
   791 	    {
       
   792 		"replace_errors",
       
   793 		replace_errors,
       
   794 		METH_O
       
   795 	    }
       
   796 	},
       
   797 	{
       
   798 	    "xmlcharrefreplace",
       
   799 	    {
       
   800 		"xmlcharrefreplace_errors",
       
   801 		xmlcharrefreplace_errors,
       
   802 		METH_O
       
   803 	    }
       
   804 	},
       
   805 	{
       
   806 	    "backslashreplace",
       
   807 	    {
       
   808 		"backslashreplace_errors",
       
   809 		backslashreplace_errors,
       
   810 		METH_O
       
   811 	    }
       
   812 	}
       
   813 #endif
       
   814     };
       
   815 
       
   816     PyInterpreterState *interp = PyThreadState_GET()->interp;
       
   817     PyObject *mod;
       
   818     unsigned i;
       
   819 
       
   820     if (interp->codec_search_path != NULL)
       
   821 	return 0;
       
   822 
       
   823     interp->codec_search_path = PyList_New(0);
       
   824     interp->codec_search_cache = PyDict_New();
       
   825     interp->codec_error_registry = PyDict_New();
       
   826 
       
   827     if (interp->codec_error_registry) {
       
   828 	for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
       
   829 	    PyObject *func = PyCFunction_New(&methods[i].def, NULL);
       
   830 	    int res;
       
   831 	    if (!func)
       
   832 		Py_FatalError("can't initialize codec error registry");
       
   833 	    res = PyCodec_RegisterError(methods[i].name, func);
       
   834 	    Py_DECREF(func);
       
   835 	    if (res)
       
   836 		Py_FatalError("can't initialize codec error registry");
       
   837 	}
       
   838     }
       
   839 
       
   840     if (interp->codec_search_path == NULL ||
       
   841 	interp->codec_search_cache == NULL ||
       
   842 	interp->codec_error_registry == NULL)
       
   843 	Py_FatalError("can't initialize codec registry");
       
   844 
       
   845     mod = PyImport_ImportModuleLevel("encodings", NULL, NULL, NULL, 0);
       
   846     if (mod == NULL) {
       
   847 	if (PyErr_ExceptionMatches(PyExc_ImportError)) {
       
   848 	    /* Ignore ImportErrors... this is done so that
       
   849 	       distributions can disable the encodings package. Note
       
   850 	       that other errors are not masked, e.g. SystemErrors
       
   851 	       raised to inform the user of an error in the Python
       
   852 	       configuration are still reported back to the user. */
       
   853 	    PyErr_Clear();
       
   854 	    return 0;
       
   855 	}
       
   856 	return -1;
       
   857     }
       
   858     Py_DECREF(mod);
       
   859     return 0;
       
   860 }