symbian-qemu-0.9.1-12/python-2.6.1/Modules/cjkcodecs/_codecs_kr.c
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 /*
       
     2  * _codecs_kr.c: Codecs collection for Korean encodings
       
     3  *
       
     4  * Written by Hye-Shik Chang <perky@FreeBSD.org>
       
     5  */
       
     6 
       
     7 #include "cjkcodecs.h"
       
     8 #include "mappings_kr.h"
       
     9 
       
    10 /*
       
    11  * EUC-KR codec
       
    12  */
       
    13 
       
    14 #define EUCKR_JAMO_FIRSTBYTE	0xA4
       
    15 #define EUCKR_JAMO_FILLER	0xD4
       
    16 
       
    17 static const unsigned char u2cgk_choseong[19] = {
       
    18 	0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2,
       
    19 	0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb,
       
    20 	0xbc, 0xbd, 0xbe
       
    21 };
       
    22 static const unsigned char u2cgk_jungseong[21] = {
       
    23 	0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6,
       
    24 	0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
       
    25 	0xcf, 0xd0, 0xd1, 0xd2, 0xd3
       
    26 };
       
    27 static const unsigned char u2cgk_jongseong[28] = {
       
    28 	0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
       
    29 	0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
       
    30 	0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba,
       
    31 	0xbb, 0xbc, 0xbd, 0xbe
       
    32 };
       
    33 
       
    34 ENCODER(euc_kr)
       
    35 {
       
    36 	while (inleft > 0) {
       
    37 		Py_UNICODE c = IN1;
       
    38 		DBCHAR code;
       
    39 
       
    40 		if (c < 0x80) {
       
    41 			WRITE1((unsigned char)c)
       
    42 			NEXT(1, 1)
       
    43 			continue;
       
    44 		}
       
    45 		UCS4INVALID(c)
       
    46 
       
    47 		REQUIRE_OUTBUF(2)
       
    48 		TRYMAP_ENC(cp949, code, c);
       
    49 		else return 1;
       
    50 
       
    51 		if ((code & 0x8000) == 0) {
       
    52 			/* KS X 1001 coded character */
       
    53 			OUT1((code >> 8) | 0x80)
       
    54 			OUT2((code & 0xFF) | 0x80)
       
    55 			NEXT(1, 2)
       
    56 		}
       
    57 		else {	/* Mapping is found in CP949 extension,
       
    58 			 * but we encode it in KS X 1001:1998 Annex 3,
       
    59 			 * make-up sequence for EUC-KR. */
       
    60 
       
    61 			REQUIRE_OUTBUF(8)
       
    62 
       
    63 			/* syllable composition precedence */
       
    64 			OUT1(EUCKR_JAMO_FIRSTBYTE)
       
    65 			OUT2(EUCKR_JAMO_FILLER)
       
    66 
       
    67 			/* All codepoints in CP949 extension are in unicode
       
    68 			 * Hangul Syllable area. */
       
    69 			assert(0xac00 <= c && c <= 0xd7a3);
       
    70 			c -= 0xac00;
       
    71 
       
    72 			OUT3(EUCKR_JAMO_FIRSTBYTE)
       
    73 			OUT4(u2cgk_choseong[c / 588])
       
    74 			NEXT_OUT(4)
       
    75 
       
    76 			OUT1(EUCKR_JAMO_FIRSTBYTE)
       
    77 			OUT2(u2cgk_jungseong[(c / 28) % 21])
       
    78 			OUT3(EUCKR_JAMO_FIRSTBYTE)
       
    79 			OUT4(u2cgk_jongseong[c % 28])
       
    80 			NEXT(1, 4)
       
    81 		}
       
    82 	}
       
    83 
       
    84 	return 0;
       
    85 }
       
    86 
       
    87 #define NONE	127
       
    88 
       
    89 static const unsigned char cgk2u_choseong[] = { /* [A1, BE] */
       
    90 	   0,    1, NONE,    2, NONE, NONE,    3,    4,
       
    91 	   5, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
       
    92 	   6,    7,    8, NONE,    9,   10,   11,   12,
       
    93 	  13,   14,   15,   16,   17,   18
       
    94 };
       
    95 static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */
       
    96 	   1,    2,    3,    4,    5,    6,    7, NONE,
       
    97 	   8,    9,   10,   11,   12,   13,   14,   15,
       
    98 	  16,   17, NONE,   18,   19,   20,   21,   22,
       
    99 	NONE,   23,   24,   25,   26,   27
       
   100 };
       
   101 
       
   102 DECODER(euc_kr)
       
   103 {
       
   104 	while (inleft > 0) {
       
   105 		unsigned char c = IN1;
       
   106 
       
   107 		REQUIRE_OUTBUF(1)
       
   108 
       
   109 		if (c < 0x80) {
       
   110 			OUT1(c)
       
   111 			NEXT(1, 1)
       
   112 			continue;
       
   113 		}
       
   114 
       
   115 		REQUIRE_INBUF(2)
       
   116 
       
   117 		if (c == EUCKR_JAMO_FIRSTBYTE &&
       
   118 		    IN2 == EUCKR_JAMO_FILLER) {
       
   119 			/* KS X 1001:1998 Annex 3 make-up sequence */
       
   120 			DBCHAR cho, jung, jong;
       
   121 
       
   122 			REQUIRE_INBUF(8)
       
   123 			if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
       
   124 			    (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
       
   125 			    (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
       
   126 				return 8;
       
   127 
       
   128 			c = (*inbuf)[3];
       
   129 			if (0xa1 <= c && c <= 0xbe)
       
   130 				cho = cgk2u_choseong[c - 0xa1];
       
   131 			else
       
   132 				cho = NONE;
       
   133 
       
   134 			c = (*inbuf)[5];
       
   135 			jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE;
       
   136 
       
   137 			c = (*inbuf)[7];
       
   138 			if (c == EUCKR_JAMO_FILLER)
       
   139 				jong = 0;
       
   140 			else if (0xa1 <= c && c <= 0xbe)
       
   141 				jong = cgk2u_jongseong[c - 0xa1];
       
   142 			else
       
   143 				jong = NONE;
       
   144 
       
   145 			if (cho == NONE || jung == NONE || jong == NONE)
       
   146 				return 8;
       
   147 
       
   148 			OUT1(0xac00 + cho*588 + jung*28 + jong);
       
   149 			NEXT(8, 1)
       
   150 		}
       
   151 		else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
       
   152 			NEXT(2, 1)
       
   153 		}
       
   154 		else
       
   155 			return 2;
       
   156 	}
       
   157 
       
   158 	return 0;
       
   159 }
       
   160 #undef NONE
       
   161 
       
   162 
       
   163 /*
       
   164  * CP949 codec
       
   165  */
       
   166 
       
   167 ENCODER(cp949)
       
   168 {
       
   169 	while (inleft > 0) {
       
   170 		Py_UNICODE c = IN1;
       
   171 		DBCHAR code;
       
   172 
       
   173 		if (c < 0x80) {
       
   174 			WRITE1((unsigned char)c)
       
   175 			NEXT(1, 1)
       
   176 			continue;
       
   177 		}
       
   178 		UCS4INVALID(c)
       
   179 
       
   180 		REQUIRE_OUTBUF(2)
       
   181 		TRYMAP_ENC(cp949, code, c);
       
   182 		else return 1;
       
   183 
       
   184 		OUT1((code >> 8) | 0x80)
       
   185 		if (code & 0x8000)
       
   186 			OUT2(code & 0xFF) /* MSB set: CP949 */
       
   187 		else
       
   188 			OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
       
   189 		NEXT(1, 2)
       
   190 	}
       
   191 
       
   192 	return 0;
       
   193 }
       
   194 
       
   195 DECODER(cp949)
       
   196 {
       
   197 	while (inleft > 0) {
       
   198 		unsigned char c = IN1;
       
   199 
       
   200 		REQUIRE_OUTBUF(1)
       
   201 
       
   202 		if (c < 0x80) {
       
   203 			OUT1(c)
       
   204 			NEXT(1, 1)
       
   205 			continue;
       
   206 		}
       
   207 
       
   208 		REQUIRE_INBUF(2)
       
   209 		TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
       
   210 		else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
       
   211 		else return 2;
       
   212 
       
   213 		NEXT(2, 1)
       
   214 	}
       
   215 
       
   216 	return 0;
       
   217 }
       
   218 
       
   219 
       
   220 /*
       
   221  * JOHAB codec
       
   222  */
       
   223 
       
   224 static const unsigned char u2johabidx_choseong[32] = {
       
   225                 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
       
   226     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
       
   227     0x10, 0x11, 0x12, 0x13, 0x14,
       
   228 };
       
   229 static const unsigned char u2johabidx_jungseong[32] = {
       
   230                       0x03, 0x04, 0x05, 0x06, 0x07,
       
   231                 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
       
   232                 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
       
   233                 0x1a, 0x1b, 0x1c, 0x1d,
       
   234 };
       
   235 static const unsigned char u2johabidx_jongseong[32] = {
       
   236           0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
       
   237     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
       
   238     0x10, 0x11,       0x13, 0x14, 0x15, 0x16, 0x17,
       
   239     0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
       
   240 };
       
   241 static const DBCHAR u2johabjamo[] = {
       
   242             0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,
       
   243     0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,
       
   244     0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,
       
   245     0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,
       
   246     0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,
       
   247     0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
       
   248     0x8741, 0x8761, 0x8781, 0x87a1,
       
   249 };
       
   250 
       
   251 ENCODER(johab)
       
   252 {
       
   253 	while (inleft > 0) {
       
   254 		Py_UNICODE c = IN1;
       
   255 		DBCHAR code;
       
   256 
       
   257 		if (c < 0x80) {
       
   258 			WRITE1((unsigned char)c)
       
   259 			NEXT(1, 1)
       
   260 			continue;
       
   261 		}
       
   262 		UCS4INVALID(c)
       
   263 
       
   264 		REQUIRE_OUTBUF(2)
       
   265 
       
   266 		if (c >= 0xac00 && c <= 0xd7a3) {
       
   267 			c -= 0xac00;
       
   268 			code = 0x8000 |
       
   269 				(u2johabidx_choseong[c / 588] << 10) |
       
   270 				(u2johabidx_jungseong[(c / 28) % 21] << 5) |
       
   271 				u2johabidx_jongseong[c % 28];
       
   272 		}
       
   273 		else if (c >= 0x3131 && c <= 0x3163)
       
   274 			code = u2johabjamo[c - 0x3131];
       
   275 		else TRYMAP_ENC(cp949, code, c) {
       
   276 			unsigned char c1, c2, t2;
       
   277 			unsigned short t1;
       
   278 
       
   279 			assert((code & 0x8000) == 0);
       
   280 			c1 = code >> 8;
       
   281 			c2 = code & 0xff;
       
   282 			if (((c1 >= 0x21 && c1 <= 0x2c) ||
       
   283 			    (c1 >= 0x4a && c1 <= 0x7d)) &&
       
   284 			    (c2 >= 0x21 && c2 <= 0x7e)) {
       
   285 				t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
       
   286 						  (c1 - 0x21 + 0x197));
       
   287 				t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
       
   288 				OUT1(t1 >> 1)
       
   289 				OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
       
   290 				NEXT(1, 2)
       
   291 				continue;
       
   292 			}
       
   293 			else
       
   294 				return 1;
       
   295 		}
       
   296 		else
       
   297 			return 1;
       
   298 
       
   299 		OUT1(code >> 8)
       
   300 		OUT2(code & 0xff)
       
   301 		NEXT(1, 2)
       
   302 	}
       
   303 
       
   304 	return 0;
       
   305 }
       
   306 
       
   307 #define FILL 0xfd
       
   308 #define NONE 0xff
       
   309 
       
   310 static const unsigned char johabidx_choseong[32] = {
       
   311     NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
       
   312     0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
       
   313     0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,
       
   314     NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
       
   315 };
       
   316 static const unsigned char johabidx_jungseong[32] = {
       
   317     NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,
       
   318     NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
       
   319     NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
       
   320     NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,
       
   321 };
       
   322 static const unsigned char johabidx_jongseong[32] = {
       
   323     NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
       
   324     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
       
   325     0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,
       
   326     0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,
       
   327 };
       
   328 
       
   329 static const unsigned char johabjamo_choseong[32] = {
       
   330     NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,
       
   331     0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,
       
   332     0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,
       
   333     NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
       
   334 };
       
   335 static const unsigned char johabjamo_jungseong[32] = {
       
   336     NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,
       
   337     NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
       
   338     NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
       
   339     NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,
       
   340 };
       
   341 static const unsigned char johabjamo_jongseong[32] = {
       
   342     NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
       
   343     0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
       
   344     0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,
       
   345     0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,
       
   346 };
       
   347 
       
   348 DECODER(johab)
       
   349 {
       
   350 	while (inleft > 0) {
       
   351 		unsigned char    c = IN1, c2;
       
   352 
       
   353 		REQUIRE_OUTBUF(1)
       
   354 
       
   355 		if (c < 0x80) {
       
   356 			OUT1(c)
       
   357 			NEXT(1, 1)
       
   358 			continue;
       
   359 		}
       
   360 
       
   361 		REQUIRE_INBUF(2)
       
   362 		c2 = IN2;
       
   363 
       
   364 		if (c < 0xd8) {
       
   365 			/* johab hangul */
       
   366 			unsigned char c_cho, c_jung, c_jong;
       
   367 			unsigned char i_cho, i_jung, i_jong;
       
   368 
       
   369 			c_cho = (c >> 2) & 0x1f;
       
   370 			c_jung = ((c << 3) | c2 >> 5) & 0x1f;
       
   371 			c_jong = c2 & 0x1f;
       
   372 
       
   373 			i_cho = johabidx_choseong[c_cho];
       
   374 			i_jung = johabidx_jungseong[c_jung];
       
   375 			i_jong = johabidx_jongseong[c_jong];
       
   376 
       
   377 			if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
       
   378 				return 2;
       
   379 
       
   380 			/* we don't use U+1100 hangul jamo yet. */
       
   381 			if (i_cho == FILL) {
       
   382 				if (i_jung == FILL) {
       
   383 					if (i_jong == FILL)
       
   384 						OUT1(0x3000)
       
   385 					else
       
   386 						OUT1(0x3100 |
       
   387 						  johabjamo_jongseong[c_jong])
       
   388 				}
       
   389 				else {
       
   390 					if (i_jong == FILL)
       
   391 						OUT1(0x3100 |
       
   392 						  johabjamo_jungseong[c_jung])
       
   393 					else
       
   394 						return 2;
       
   395 				}
       
   396 			} else {
       
   397 				if (i_jung == FILL) {
       
   398 					if (i_jong == FILL)
       
   399 						OUT1(0x3100 |
       
   400 						  johabjamo_choseong[c_cho])
       
   401 					else
       
   402 						return 2;
       
   403 				}
       
   404 				else
       
   405 					OUT1(0xac00 +
       
   406 					     i_cho * 588 +
       
   407 					     i_jung * 28 +
       
   408 					     (i_jong == FILL ? 0 : i_jong))
       
   409 			}
       
   410 			NEXT(2, 1)
       
   411 		} else {
       
   412 			/* KS X 1001 except hangul jamos and syllables */
       
   413 			if (c == 0xdf || c > 0xf9 ||
       
   414 			    c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
       
   415 			    (c2 & 0x7f) == 0x7f ||
       
   416 			    (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
       
   417 				return 2;
       
   418 			else {
       
   419 				unsigned char t1, t2;
       
   420 
       
   421 				t1 = (c < 0xe0 ? 2 * (c - 0xd9) :
       
   422 						 2 * c - 0x197);
       
   423 				t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
       
   424 				t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
       
   425 				t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
       
   426 
       
   427 				TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
       
   428 				else return 2;
       
   429 				NEXT(2, 1)
       
   430 			}
       
   431 		}
       
   432 	}
       
   433 
       
   434 	return 0;
       
   435 }
       
   436 #undef NONE
       
   437 #undef FILL
       
   438 
       
   439 
       
   440 BEGIN_MAPPINGS_LIST
       
   441   MAPPING_DECONLY(ksx1001)
       
   442   MAPPING_ENCONLY(cp949)
       
   443   MAPPING_DECONLY(cp949ext)
       
   444 END_MAPPINGS_LIST
       
   445 
       
   446 BEGIN_CODECS_LIST
       
   447   CODEC_STATELESS(euc_kr)
       
   448   CODEC_STATELESS(cp949)
       
   449   CODEC_STATELESS(johab)
       
   450 END_CODECS_LIST
       
   451 
       
   452 I_AM_A_MODULE_FOR(kr)