symbian-qemu-0.9.1-12/python-2.6.1/Modules/cjkcodecs/cjkcodecs.h
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 /*
       
     2  * cjkcodecs.h: common header for cjkcodecs
       
     3  *
       
     4  * Written by Hye-Shik Chang <perky@FreeBSD.org>
       
     5  */
       
     6 
       
     7 #ifndef _CJKCODECS_H_
       
     8 #define _CJKCODECS_H_
       
     9 
       
    10 #define PY_SSIZE_T_CLEAN
       
    11 #include "Python.h"
       
    12 #include "multibytecodec.h"
       
    13 
       
    14 
       
    15 /* a unicode "undefined" codepoint */
       
    16 #define UNIINV	0xFFFE
       
    17 
       
    18 /* internal-use DBCS codepoints which aren't used by any charsets */
       
    19 #define NOCHAR	0xFFFF
       
    20 #define MULTIC	0xFFFE
       
    21 #define DBCINV	0xFFFD
       
    22 
       
    23 /* shorter macros to save source size of mapping tables */
       
    24 #define U UNIINV
       
    25 #define N NOCHAR
       
    26 #define M MULTIC
       
    27 #define D DBCINV
       
    28 
       
    29 struct dbcs_index {
       
    30 	const ucs2_t *map;
       
    31 	unsigned char bottom, top;
       
    32 };
       
    33 typedef struct dbcs_index decode_map;
       
    34 
       
    35 struct widedbcs_index {
       
    36 	const ucs4_t *map;
       
    37 	unsigned char bottom, top;
       
    38 };
       
    39 typedef struct widedbcs_index widedecode_map;
       
    40 
       
    41 struct unim_index {
       
    42 	const DBCHAR *map;
       
    43 	unsigned char bottom, top;
       
    44 };
       
    45 typedef struct unim_index encode_map;
       
    46 
       
    47 struct unim_index_bytebased {
       
    48 	const unsigned char *map;
       
    49 	unsigned char bottom, top;
       
    50 };
       
    51 
       
    52 struct dbcs_map {
       
    53 	const char *charset;
       
    54 	const struct unim_index *encmap;
       
    55 	const struct dbcs_index *decmap;
       
    56 };
       
    57 
       
    58 struct pair_encodemap {
       
    59 	ucs4_t uniseq;
       
    60 	DBCHAR code;
       
    61 };
       
    62 
       
    63 static const MultibyteCodec *codec_list;
       
    64 static const struct dbcs_map *mapping_list;
       
    65 
       
    66 #define CODEC_INIT(encoding)						\
       
    67 	static int encoding##_codec_init(const void *config)
       
    68 
       
    69 #define ENCODER_INIT(encoding)						\
       
    70 	static int encoding##_encode_init(				\
       
    71 		MultibyteCodec_State *state, const void *config)
       
    72 #define ENCODER(encoding)						\
       
    73 	static Py_ssize_t encoding##_encode(				\
       
    74 		MultibyteCodec_State *state, const void *config,	\
       
    75 		const Py_UNICODE **inbuf, Py_ssize_t inleft,		\
       
    76 		unsigned char **outbuf, Py_ssize_t outleft, int flags)
       
    77 #define ENCODER_RESET(encoding)						\
       
    78 	static Py_ssize_t encoding##_encode_reset(			\
       
    79 		MultibyteCodec_State *state, const void *config,	\
       
    80 		unsigned char **outbuf, Py_ssize_t outleft)
       
    81 
       
    82 #define DECODER_INIT(encoding)						\
       
    83 	static int encoding##_decode_init(				\
       
    84 		MultibyteCodec_State *state, const void *config)
       
    85 #define DECODER(encoding)						\
       
    86 	static Py_ssize_t encoding##_decode(				\
       
    87 		MultibyteCodec_State *state, const void *config,	\
       
    88 		const unsigned char **inbuf, Py_ssize_t inleft,		\
       
    89 		Py_UNICODE **outbuf, Py_ssize_t outleft)
       
    90 #define DECODER_RESET(encoding)						\
       
    91 	static Py_ssize_t encoding##_decode_reset(			\
       
    92 		MultibyteCodec_State *state, const void *config)
       
    93 
       
    94 #if Py_UNICODE_SIZE == 4
       
    95 #define UCS4INVALID(code)	\
       
    96 	if ((code) > 0xFFFF)	\
       
    97 	return 1;
       
    98 #else
       
    99 #define UCS4INVALID(code)	\
       
   100 	if (0) ;
       
   101 #endif
       
   102 
       
   103 #define NEXT_IN(i)				\
       
   104 	(*inbuf) += (i);			\
       
   105 	(inleft) -= (i);
       
   106 #define NEXT_OUT(o)				\
       
   107 	(*outbuf) += (o);			\
       
   108 	(outleft) -= (o);
       
   109 #define NEXT(i, o)				\
       
   110 	NEXT_IN(i) NEXT_OUT(o)
       
   111 
       
   112 #define REQUIRE_INBUF(n)			\
       
   113 	if (inleft < (n))			\
       
   114 		return MBERR_TOOFEW;
       
   115 #define REQUIRE_OUTBUF(n)			\
       
   116 	if (outleft < (n))			\
       
   117 		return MBERR_TOOSMALL;
       
   118 
       
   119 #define IN1 ((*inbuf)[0])
       
   120 #define IN2 ((*inbuf)[1])
       
   121 #define IN3 ((*inbuf)[2])
       
   122 #define IN4 ((*inbuf)[3])
       
   123 
       
   124 #define OUT1(c) ((*outbuf)[0]) = (c);
       
   125 #define OUT2(c) ((*outbuf)[1]) = (c);
       
   126 #define OUT3(c) ((*outbuf)[2]) = (c);
       
   127 #define OUT4(c) ((*outbuf)[3]) = (c);
       
   128 
       
   129 #define WRITE1(c1)		\
       
   130 	REQUIRE_OUTBUF(1)	\
       
   131 	(*outbuf)[0] = (c1);
       
   132 #define WRITE2(c1, c2)		\
       
   133 	REQUIRE_OUTBUF(2)	\
       
   134 	(*outbuf)[0] = (c1);	\
       
   135 	(*outbuf)[1] = (c2);
       
   136 #define WRITE3(c1, c2, c3)	\
       
   137 	REQUIRE_OUTBUF(3)	\
       
   138 	(*outbuf)[0] = (c1);	\
       
   139 	(*outbuf)[1] = (c2);	\
       
   140 	(*outbuf)[2] = (c3);
       
   141 #define WRITE4(c1, c2, c3, c4)	\
       
   142 	REQUIRE_OUTBUF(4)	\
       
   143 	(*outbuf)[0] = (c1);	\
       
   144 	(*outbuf)[1] = (c2);	\
       
   145 	(*outbuf)[2] = (c3);	\
       
   146 	(*outbuf)[3] = (c4);
       
   147 
       
   148 #if Py_UNICODE_SIZE == 2
       
   149 # define WRITEUCS4(c)						\
       
   150 	REQUIRE_OUTBUF(2)					\
       
   151 	(*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10);	\
       
   152 	(*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff);	\
       
   153 	NEXT_OUT(2)
       
   154 #else
       
   155 # define WRITEUCS4(c)						\
       
   156 	REQUIRE_OUTBUF(1)					\
       
   157 	**outbuf = (Py_UNICODE)(c);				\
       
   158 	NEXT_OUT(1)
       
   159 #endif
       
   160 
       
   161 #define _TRYMAP_ENC(m, assi, val)				\
       
   162 	((m)->map != NULL && (val) >= (m)->bottom &&		\
       
   163 	    (val)<= (m)->top && ((assi) = (m)->map[(val) -	\
       
   164 	    (m)->bottom]) != NOCHAR)
       
   165 #define TRYMAP_ENC_COND(charset, assi, uni)			\
       
   166 	_TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)
       
   167 #define TRYMAP_ENC(charset, assi, uni)				\
       
   168 	if TRYMAP_ENC_COND(charset, assi, uni)
       
   169 
       
   170 #define _TRYMAP_DEC(m, assi, val)				\
       
   171 	((m)->map != NULL && (val) >= (m)->bottom &&		\
       
   172 	    (val)<= (m)->top && ((assi) = (m)->map[(val) -	\
       
   173 	    (m)->bottom]) != UNIINV)
       
   174 #define TRYMAP_DEC(charset, assi, c1, c2)			\
       
   175 	if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
       
   176 
       
   177 #define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val)	\
       
   178 	((m)->map != NULL && (val) >= (m)->bottom &&		\
       
   179 	    (val)<= (m)->top &&					\
       
   180 	    ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
       
   181 	    (((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \
       
   182 	    (((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))
       
   183 #define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni)	\
       
   184 	if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
       
   185 			   assplane, asshi, asslo, (uni) & 0xff)
       
   186 #define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2)		\
       
   187 	if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)
       
   188 
       
   189 #if Py_UNICODE_SIZE == 2
       
   190 #define DECODE_SURROGATE(c)					\
       
   191 	if (c >> 10 == 0xd800 >> 10) { /* high surrogate */	\
       
   192 		REQUIRE_INBUF(2)				\
       
   193 		if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
       
   194 		    c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
       
   195 			((ucs4_t)(IN2) - 0xdc00);		\
       
   196 		}						\
       
   197 	}
       
   198 #define GET_INSIZE(c)	((c) > 0xffff ? 2 : 1)
       
   199 #else
       
   200 #define DECODE_SURROGATE(c) {;}
       
   201 #define GET_INSIZE(c)	1
       
   202 #endif
       
   203 
       
   204 #define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
       
   205 #define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
       
   206 #define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
       
   207 #define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},
       
   208 #define END_MAPPINGS_LIST				\
       
   209 	{"", NULL, NULL} };				\
       
   210 	static const struct dbcs_map *mapping_list =	\
       
   211 		(const struct dbcs_map *)_mapping_list;
       
   212 
       
   213 #define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = {
       
   214 #define _STATEFUL_METHODS(enc)		\
       
   215 	enc##_encode,			\
       
   216 	enc##_encode_init,		\
       
   217 	enc##_encode_reset,		\
       
   218 	enc##_decode,			\
       
   219 	enc##_decode_init,		\
       
   220 	enc##_decode_reset,
       
   221 #define _STATELESS_METHODS(enc)		\
       
   222 	enc##_encode, NULL, NULL,	\
       
   223 	enc##_decode, NULL, NULL,
       
   224 #define CODEC_STATEFUL(enc) {		\
       
   225 	#enc, NULL, NULL,		\
       
   226 	_STATEFUL_METHODS(enc)		\
       
   227 },
       
   228 #define CODEC_STATELESS(enc) {		\
       
   229 	#enc, NULL, NULL,		\
       
   230 	_STATELESS_METHODS(enc)		\
       
   231 },
       
   232 #define CODEC_STATELESS_WINIT(enc) {	\
       
   233 	#enc, NULL,			\
       
   234 	enc##_codec_init,		\
       
   235 	_STATELESS_METHODS(enc)		\
       
   236 },
       
   237 #define END_CODECS_LIST					\
       
   238 	{"", NULL,} };					\
       
   239 	static const MultibyteCodec *codec_list =	\
       
   240 		(const MultibyteCodec *)_codec_list;
       
   241 
       
   242 static PyObject *
       
   243 getmultibytecodec(void)
       
   244 {
       
   245 	static PyObject *cofunc = NULL;
       
   246 
       
   247 	if (cofunc == NULL) {
       
   248 		PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec");
       
   249 		if (mod == NULL)
       
   250 			return NULL;
       
   251 		cofunc = PyObject_GetAttrString(mod, "__create_codec");
       
   252 		Py_DECREF(mod);
       
   253 	}
       
   254 	return cofunc;
       
   255 }
       
   256 
       
   257 static PyObject *
       
   258 getcodec(PyObject *self, PyObject *encoding)
       
   259 {
       
   260 	PyObject *codecobj, *r, *cofunc;
       
   261 	const MultibyteCodec *codec;
       
   262 	const char *enc;
       
   263 
       
   264 	if (!PyString_Check(encoding)) {
       
   265 		PyErr_SetString(PyExc_TypeError,
       
   266 				"encoding name must be a string.");
       
   267 		return NULL;
       
   268 	}
       
   269 
       
   270 	cofunc = getmultibytecodec();
       
   271 	if (cofunc == NULL)
       
   272 		return NULL;
       
   273 
       
   274 	enc = PyString_AS_STRING(encoding);
       
   275 	for (codec = codec_list; codec->encoding[0]; codec++)
       
   276 		if (strcmp(codec->encoding, enc) == 0)
       
   277 			break;
       
   278 
       
   279 	if (codec->encoding[0] == '\0') {
       
   280 		PyErr_SetString(PyExc_LookupError,
       
   281 				"no such codec is supported.");
       
   282 		return NULL;
       
   283 	}
       
   284 
       
   285 	codecobj = PyCObject_FromVoidPtr((void *)codec, NULL);
       
   286 	if (codecobj == NULL)
       
   287 		return NULL;
       
   288 
       
   289 	r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL);
       
   290 	Py_DECREF(codecobj);
       
   291 
       
   292 	return r;
       
   293 }
       
   294 
       
   295 static struct PyMethodDef __methods[] = {
       
   296 	{"getcodec", (PyCFunction)getcodec, METH_O, ""},
       
   297 	{NULL, NULL},
       
   298 };
       
   299 
       
   300 static int
       
   301 register_maps(PyObject *module)
       
   302 {
       
   303 	const struct dbcs_map *h;
       
   304 
       
   305 	for (h = mapping_list; h->charset[0] != '\0'; h++) {
       
   306 		char mhname[256] = "__map_";
       
   307 		int r;
       
   308 		strcpy(mhname + sizeof("__map_") - 1, h->charset);
       
   309 		r = PyModule_AddObject(module, mhname,
       
   310 				PyCObject_FromVoidPtr((void *)h, NULL));
       
   311 		if (r == -1)
       
   312 			return -1;
       
   313 	}
       
   314 	return 0;
       
   315 }
       
   316 
       
   317 #ifdef USING_BINARY_PAIR_SEARCH
       
   318 static DBCHAR
       
   319 find_pairencmap(ucs2_t body, ucs2_t modifier,
       
   320 		const struct pair_encodemap *haystack, int haystacksize)
       
   321 {
       
   322 	int pos, min, max;
       
   323 	ucs4_t value = body << 16 | modifier;
       
   324 
       
   325 	min = 0;
       
   326 	max = haystacksize;
       
   327 
       
   328 	for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1)
       
   329 		if (value < haystack[pos].uniseq) {
       
   330 			if (max == pos) break;
       
   331 			else max = pos;
       
   332 		}
       
   333 		else if (value > haystack[pos].uniseq) {
       
   334 			if (min == pos) break;
       
   335 			else min = pos;
       
   336 		}
       
   337 		else
       
   338 			break;
       
   339 
       
   340 		if (value == haystack[pos].uniseq)
       
   341 			return haystack[pos].code;
       
   342 		else
       
   343 			return DBCINV;
       
   344 }
       
   345 #endif
       
   346 
       
   347 #ifdef USING_IMPORTED_MAPS
       
   348 #define IMPORT_MAP(locale, charset, encmap, decmap) \
       
   349 	importmap("_codecs_" #locale, "__map_" #charset, \
       
   350 		  (const void**)encmap, (const void**)decmap)
       
   351 
       
   352 static int
       
   353 importmap(const char *modname, const char *symbol,
       
   354 	  const void **encmap, const void **decmap)
       
   355 {
       
   356 	PyObject *o, *mod;
       
   357 
       
   358 	mod = PyImport_ImportModule((char *)modname);
       
   359 	if (mod == NULL)
       
   360 		return -1;
       
   361 
       
   362 	o = PyObject_GetAttrString(mod, (char*)symbol);
       
   363 	if (o == NULL)
       
   364 		goto errorexit;
       
   365 	else if (!PyCObject_Check(o)) {
       
   366 		PyErr_SetString(PyExc_ValueError,
       
   367 				"map data must be a CObject.");
       
   368 		goto errorexit;
       
   369 	}
       
   370 	else {
       
   371 		struct dbcs_map *map;
       
   372 		map = PyCObject_AsVoidPtr(o);
       
   373 		if (encmap != NULL)
       
   374 			*encmap = map->encmap;
       
   375 		if (decmap != NULL)
       
   376 			*decmap = map->decmap;
       
   377 		Py_DECREF(o);
       
   378 	}
       
   379 
       
   380 	Py_DECREF(mod);
       
   381 	return 0;
       
   382 
       
   383 errorexit:
       
   384 	Py_DECREF(mod);
       
   385 	return -1;
       
   386 }
       
   387 #endif
       
   388 
       
   389 #define I_AM_A_MODULE_FOR(loc)						\
       
   390 	void								\
       
   391 	init_codecs_##loc(void)						\
       
   392 	{								\
       
   393 		PyObject *m = Py_InitModule("_codecs_" #loc, __methods);\
       
   394 		if (m != NULL)						\
       
   395 			(void)register_maps(m);				\
       
   396 	}
       
   397 
       
   398 #endif