|
1 /* |
|
2 * _codecs_jp.c: Codecs collection for Japanese encodings |
|
3 * |
|
4 * Written by Hye-Shik Chang <perky@FreeBSD.org> |
|
5 */ |
|
6 |
|
7 #define USING_BINARY_PAIR_SEARCH |
|
8 #define EMPBASE 0x20000 |
|
9 |
|
10 #include "cjkcodecs.h" |
|
11 #include "mappings_jp.h" |
|
12 #include "mappings_jisx0213_pair.h" |
|
13 #include "alg_jisx0201.h" |
|
14 #include "emu_jisx0213_2000.h" |
|
15 |
|
16 /* |
|
17 * CP932 codec |
|
18 */ |
|
19 |
|
20 ENCODER(cp932) |
|
21 { |
|
22 while (inleft > 0) { |
|
23 Py_UNICODE c = IN1; |
|
24 DBCHAR code; |
|
25 unsigned char c1, c2; |
|
26 |
|
27 if (c <= 0x80) { |
|
28 WRITE1((unsigned char)c) |
|
29 NEXT(1, 1) |
|
30 continue; |
|
31 } |
|
32 else if (c >= 0xff61 && c <= 0xff9f) { |
|
33 WRITE1(c - 0xfec0) |
|
34 NEXT(1, 1) |
|
35 continue; |
|
36 } |
|
37 else if (c >= 0xf8f0 && c <= 0xf8f3) { |
|
38 /* Windows compatibility */ |
|
39 REQUIRE_OUTBUF(1) |
|
40 if (c == 0xf8f0) |
|
41 OUT1(0xa0) |
|
42 else |
|
43 OUT1(c - 0xfef1 + 0xfd) |
|
44 NEXT(1, 1) |
|
45 continue; |
|
46 } |
|
47 |
|
48 UCS4INVALID(c) |
|
49 REQUIRE_OUTBUF(2) |
|
50 |
|
51 TRYMAP_ENC(cp932ext, code, c) { |
|
52 OUT1(code >> 8) |
|
53 OUT2(code & 0xff) |
|
54 } |
|
55 else TRYMAP_ENC(jisxcommon, code, c) { |
|
56 if (code & 0x8000) /* MSB set: JIS X 0212 */ |
|
57 return 1; |
|
58 |
|
59 /* JIS X 0208 */ |
|
60 c1 = code >> 8; |
|
61 c2 = code & 0xff; |
|
62 c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); |
|
63 c1 = (c1 - 0x21) >> 1; |
|
64 OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) |
|
65 OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) |
|
66 } |
|
67 else if (c >= 0xe000 && c < 0xe758) { |
|
68 /* User-defined area */ |
|
69 c1 = (Py_UNICODE)(c - 0xe000) / 188; |
|
70 c2 = (Py_UNICODE)(c - 0xe000) % 188; |
|
71 OUT1(c1 + 0xf0) |
|
72 OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) |
|
73 } |
|
74 else |
|
75 return 1; |
|
76 |
|
77 NEXT(1, 2) |
|
78 } |
|
79 |
|
80 return 0; |
|
81 } |
|
82 |
|
83 DECODER(cp932) |
|
84 { |
|
85 while (inleft > 0) { |
|
86 unsigned char c = IN1, c2; |
|
87 |
|
88 REQUIRE_OUTBUF(1) |
|
89 if (c <= 0x80) { |
|
90 OUT1(c) |
|
91 NEXT(1, 1) |
|
92 continue; |
|
93 } |
|
94 else if (c >= 0xa0 && c <= 0xdf) { |
|
95 if (c == 0xa0) |
|
96 OUT1(0xf8f0) /* half-width katakana */ |
|
97 else |
|
98 OUT1(0xfec0 + c) |
|
99 NEXT(1, 1) |
|
100 continue; |
|
101 } |
|
102 else if (c >= 0xfd/* && c <= 0xff*/) { |
|
103 /* Windows compatibility */ |
|
104 OUT1(0xf8f1 - 0xfd + c) |
|
105 NEXT(1, 1) |
|
106 continue; |
|
107 } |
|
108 |
|
109 REQUIRE_INBUF(2) |
|
110 c2 = IN2; |
|
111 |
|
112 TRYMAP_DEC(cp932ext, **outbuf, c, c2); |
|
113 else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ |
|
114 if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) |
|
115 return 2; |
|
116 |
|
117 c = (c < 0xe0 ? c - 0x81 : c - 0xc1); |
|
118 c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); |
|
119 c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21); |
|
120 c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; |
|
121 |
|
122 TRYMAP_DEC(jisx0208, **outbuf, c, c2); |
|
123 else return 2; |
|
124 } |
|
125 else if (c >= 0xf0 && c <= 0xf9) { |
|
126 if ((c2 >= 0x40 && c2 <= 0x7e) || |
|
127 (c2 >= 0x80 && c2 <= 0xfc)) |
|
128 OUT1(0xe000 + 188 * (c - 0xf0) + |
|
129 (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41)) |
|
130 else |
|
131 return 2; |
|
132 } |
|
133 else |
|
134 return 2; |
|
135 |
|
136 NEXT(2, 1) |
|
137 } |
|
138 |
|
139 return 0; |
|
140 } |
|
141 |
|
142 |
|
143 /* |
|
144 * EUC-JIS-2004 codec |
|
145 */ |
|
146 |
|
147 ENCODER(euc_jis_2004) |
|
148 { |
|
149 while (inleft > 0) { |
|
150 ucs4_t c = IN1; |
|
151 DBCHAR code; |
|
152 Py_ssize_t insize; |
|
153 |
|
154 if (c < 0x80) { |
|
155 WRITE1(c) |
|
156 NEXT(1, 1) |
|
157 continue; |
|
158 } |
|
159 |
|
160 DECODE_SURROGATE(c) |
|
161 insize = GET_INSIZE(c); |
|
162 |
|
163 if (c <= 0xFFFF) { |
|
164 EMULATE_JISX0213_2000_ENCODE_BMP(code, c) |
|
165 else TRYMAP_ENC(jisx0213_bmp, code, c) { |
|
166 if (code == MULTIC) { |
|
167 if (inleft < 2) { |
|
168 if (flags & MBENC_FLUSH) { |
|
169 code = find_pairencmap( |
|
170 (ucs2_t)c, 0, |
|
171 jisx0213_pair_encmap, |
|
172 JISX0213_ENCPAIRS); |
|
173 if (code == DBCINV) |
|
174 return 1; |
|
175 } |
|
176 else |
|
177 return MBERR_TOOFEW; |
|
178 } |
|
179 else { |
|
180 code = find_pairencmap( |
|
181 (ucs2_t)c, (*inbuf)[1], |
|
182 jisx0213_pair_encmap, |
|
183 JISX0213_ENCPAIRS); |
|
184 if (code == DBCINV) { |
|
185 code = find_pairencmap( |
|
186 (ucs2_t)c, 0, |
|
187 jisx0213_pair_encmap, |
|
188 JISX0213_ENCPAIRS); |
|
189 if (code == DBCINV) |
|
190 return 1; |
|
191 } else |
|
192 insize = 2; |
|
193 } |
|
194 } |
|
195 } |
|
196 else TRYMAP_ENC(jisxcommon, code, c); |
|
197 else if (c >= 0xff61 && c <= 0xff9f) { |
|
198 /* JIS X 0201 half-width katakana */ |
|
199 WRITE2(0x8e, c - 0xfec0) |
|
200 NEXT(1, 2) |
|
201 continue; |
|
202 } |
|
203 else if (c == 0xff3c) |
|
204 /* F/W REVERSE SOLIDUS (see NOTES) */ |
|
205 code = 0x2140; |
|
206 else if (c == 0xff5e) |
|
207 /* F/W TILDE (see NOTES) */ |
|
208 code = 0x2232; |
|
209 else |
|
210 return 1; |
|
211 } |
|
212 else if (c >> 16 == EMPBASE >> 16) { |
|
213 EMULATE_JISX0213_2000_ENCODE_EMP(code, c) |
|
214 else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff); |
|
215 else return insize; |
|
216 } |
|
217 else |
|
218 return insize; |
|
219 |
|
220 if (code & 0x8000) { |
|
221 /* Codeset 2 */ |
|
222 WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80) |
|
223 NEXT(insize, 3) |
|
224 } else { |
|
225 /* Codeset 1 */ |
|
226 WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80) |
|
227 NEXT(insize, 2) |
|
228 } |
|
229 } |
|
230 |
|
231 return 0; |
|
232 } |
|
233 |
|
234 DECODER(euc_jis_2004) |
|
235 { |
|
236 while (inleft > 0) { |
|
237 unsigned char c = IN1; |
|
238 ucs4_t code; |
|
239 |
|
240 REQUIRE_OUTBUF(1) |
|
241 |
|
242 if (c < 0x80) { |
|
243 OUT1(c) |
|
244 NEXT(1, 1) |
|
245 continue; |
|
246 } |
|
247 |
|
248 if (c == 0x8e) { |
|
249 /* JIS X 0201 half-width katakana */ |
|
250 unsigned char c2; |
|
251 |
|
252 REQUIRE_INBUF(2) |
|
253 c2 = IN2; |
|
254 if (c2 >= 0xa1 && c2 <= 0xdf) { |
|
255 OUT1(0xfec0 + c2) |
|
256 NEXT(2, 1) |
|
257 } |
|
258 else |
|
259 return 2; |
|
260 } |
|
261 else if (c == 0x8f) { |
|
262 unsigned char c2, c3; |
|
263 |
|
264 REQUIRE_INBUF(3) |
|
265 c2 = IN2 ^ 0x80; |
|
266 c3 = IN3 ^ 0x80; |
|
267 |
|
268 /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */ |
|
269 EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3) |
|
270 else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ; |
|
271 else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) { |
|
272 WRITEUCS4(EMPBASE | code) |
|
273 NEXT_IN(3) |
|
274 continue; |
|
275 } |
|
276 else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ; |
|
277 else return 3; |
|
278 NEXT(3, 1) |
|
279 } |
|
280 else { |
|
281 unsigned char c2; |
|
282 |
|
283 REQUIRE_INBUF(2) |
|
284 c ^= 0x80; |
|
285 c2 = IN2 ^ 0x80; |
|
286 |
|
287 /* JIS X 0213 Plane 1 */ |
|
288 EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2) |
|
289 else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c; |
|
290 else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e; |
|
291 else TRYMAP_DEC(jisx0208, **outbuf, c, c2); |
|
292 else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2); |
|
293 else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) { |
|
294 WRITEUCS4(EMPBASE | code) |
|
295 NEXT_IN(2) |
|
296 continue; |
|
297 } |
|
298 else TRYMAP_DEC(jisx0213_pair, code, c, c2) { |
|
299 WRITE2(code >> 16, code & 0xffff) |
|
300 NEXT(2, 2) |
|
301 continue; |
|
302 } |
|
303 else return 2; |
|
304 NEXT(2, 1) |
|
305 } |
|
306 } |
|
307 |
|
308 return 0; |
|
309 } |
|
310 |
|
311 |
|
312 /* |
|
313 * EUC-JP codec |
|
314 */ |
|
315 |
|
316 ENCODER(euc_jp) |
|
317 { |
|
318 while (inleft > 0) { |
|
319 Py_UNICODE c = IN1; |
|
320 DBCHAR code; |
|
321 |
|
322 if (c < 0x80) { |
|
323 WRITE1((unsigned char)c) |
|
324 NEXT(1, 1) |
|
325 continue; |
|
326 } |
|
327 |
|
328 UCS4INVALID(c) |
|
329 |
|
330 TRYMAP_ENC(jisxcommon, code, c); |
|
331 else if (c >= 0xff61 && c <= 0xff9f) { |
|
332 /* JIS X 0201 half-width katakana */ |
|
333 WRITE2(0x8e, c - 0xfec0) |
|
334 NEXT(1, 2) |
|
335 continue; |
|
336 } |
|
337 #ifndef STRICT_BUILD |
|
338 else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */ |
|
339 code = 0x2140; |
|
340 else if (c == 0xa5) { /* YEN SIGN */ |
|
341 WRITE1(0x5c); |
|
342 NEXT(1, 1) |
|
343 continue; |
|
344 } else if (c == 0x203e) { /* OVERLINE */ |
|
345 WRITE1(0x7e); |
|
346 NEXT(1, 1) |
|
347 continue; |
|
348 } |
|
349 #endif |
|
350 else |
|
351 return 1; |
|
352 |
|
353 if (code & 0x8000) { |
|
354 /* JIS X 0212 */ |
|
355 WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80) |
|
356 NEXT(1, 3) |
|
357 } else { |
|
358 /* JIS X 0208 */ |
|
359 WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80) |
|
360 NEXT(1, 2) |
|
361 } |
|
362 } |
|
363 |
|
364 return 0; |
|
365 } |
|
366 |
|
367 DECODER(euc_jp) |
|
368 { |
|
369 while (inleft > 0) { |
|
370 unsigned char c = IN1; |
|
371 |
|
372 REQUIRE_OUTBUF(1) |
|
373 |
|
374 if (c < 0x80) { |
|
375 OUT1(c) |
|
376 NEXT(1, 1) |
|
377 continue; |
|
378 } |
|
379 |
|
380 if (c == 0x8e) { |
|
381 /* JIS X 0201 half-width katakana */ |
|
382 unsigned char c2; |
|
383 |
|
384 REQUIRE_INBUF(2) |
|
385 c2 = IN2; |
|
386 if (c2 >= 0xa1 && c2 <= 0xdf) { |
|
387 OUT1(0xfec0 + c2) |
|
388 NEXT(2, 1) |
|
389 } |
|
390 else |
|
391 return 2; |
|
392 } |
|
393 else if (c == 0x8f) { |
|
394 unsigned char c2, c3; |
|
395 |
|
396 REQUIRE_INBUF(3) |
|
397 c2 = IN2; |
|
398 c3 = IN3; |
|
399 /* JIS X 0212 */ |
|
400 TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) { |
|
401 NEXT(3, 1) |
|
402 } |
|
403 else |
|
404 return 3; |
|
405 } |
|
406 else { |
|
407 unsigned char c2; |
|
408 |
|
409 REQUIRE_INBUF(2) |
|
410 c2 = IN2; |
|
411 /* JIS X 0208 */ |
|
412 #ifndef STRICT_BUILD |
|
413 if (c == 0xa1 && c2 == 0xc0) |
|
414 /* FULL-WIDTH REVERSE SOLIDUS */ |
|
415 **outbuf = 0xff3c; |
|
416 else |
|
417 #endif |
|
418 TRYMAP_DEC(jisx0208, **outbuf, |
|
419 c ^ 0x80, c2 ^ 0x80) ; |
|
420 else return 2; |
|
421 NEXT(2, 1) |
|
422 } |
|
423 } |
|
424 |
|
425 return 0; |
|
426 } |
|
427 |
|
428 |
|
429 /* |
|
430 * SHIFT_JIS codec |
|
431 */ |
|
432 |
|
433 ENCODER(shift_jis) |
|
434 { |
|
435 while (inleft > 0) { |
|
436 Py_UNICODE c = IN1; |
|
437 DBCHAR code; |
|
438 unsigned char c1, c2; |
|
439 |
|
440 #ifdef STRICT_BUILD |
|
441 JISX0201_R_ENCODE(c, code) |
|
442 #else |
|
443 if (c < 0x80) code = c; |
|
444 else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */ |
|
445 else if (c == 0x203e) code = 0x7e; /* OVERLINE */ |
|
446 #endif |
|
447 else JISX0201_K_ENCODE(c, code) |
|
448 else UCS4INVALID(c) |
|
449 else code = NOCHAR; |
|
450 |
|
451 if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { |
|
452 REQUIRE_OUTBUF(1) |
|
453 |
|
454 OUT1((unsigned char)code) |
|
455 NEXT(1, 1) |
|
456 continue; |
|
457 } |
|
458 |
|
459 REQUIRE_OUTBUF(2) |
|
460 |
|
461 if (code == NOCHAR) { |
|
462 TRYMAP_ENC(jisxcommon, code, c); |
|
463 #ifndef STRICT_BUILD |
|
464 else if (c == 0xff3c) |
|
465 code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */ |
|
466 #endif |
|
467 else |
|
468 return 1; |
|
469 |
|
470 if (code & 0x8000) /* MSB set: JIS X 0212 */ |
|
471 return 1; |
|
472 } |
|
473 |
|
474 c1 = code >> 8; |
|
475 c2 = code & 0xff; |
|
476 c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); |
|
477 c1 = (c1 - 0x21) >> 1; |
|
478 OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) |
|
479 OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) |
|
480 NEXT(1, 2) |
|
481 } |
|
482 |
|
483 return 0; |
|
484 } |
|
485 |
|
486 DECODER(shift_jis) |
|
487 { |
|
488 while (inleft > 0) { |
|
489 unsigned char c = IN1; |
|
490 |
|
491 REQUIRE_OUTBUF(1) |
|
492 |
|
493 #ifdef STRICT_BUILD |
|
494 JISX0201_R_DECODE(c, **outbuf) |
|
495 #else |
|
496 if (c < 0x80) **outbuf = c; |
|
497 #endif |
|
498 else JISX0201_K_DECODE(c, **outbuf) |
|
499 else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ |
|
500 unsigned char c1, c2; |
|
501 |
|
502 REQUIRE_INBUF(2) |
|
503 c2 = IN2; |
|
504 if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) |
|
505 return 2; |
|
506 |
|
507 c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); |
|
508 c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); |
|
509 c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21); |
|
510 c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; |
|
511 |
|
512 #ifndef STRICT_BUILD |
|
513 if (c1 == 0x21 && c2 == 0x40) { |
|
514 /* FULL-WIDTH REVERSE SOLIDUS */ |
|
515 OUT1(0xff3c) |
|
516 NEXT(2, 1) |
|
517 continue; |
|
518 } |
|
519 #endif |
|
520 TRYMAP_DEC(jisx0208, **outbuf, c1, c2) { |
|
521 NEXT(2, 1) |
|
522 continue; |
|
523 } |
|
524 else |
|
525 return 2; |
|
526 } |
|
527 else |
|
528 return 2; |
|
529 |
|
530 NEXT(1, 1) /* JIS X 0201 */ |
|
531 } |
|
532 |
|
533 return 0; |
|
534 } |
|
535 |
|
536 |
|
537 /* |
|
538 * SHIFT_JIS-2004 codec |
|
539 */ |
|
540 |
|
541 ENCODER(shift_jis_2004) |
|
542 { |
|
543 while (inleft > 0) { |
|
544 ucs4_t c = IN1; |
|
545 DBCHAR code = NOCHAR; |
|
546 int c1, c2; |
|
547 Py_ssize_t insize; |
|
548 |
|
549 JISX0201_ENCODE(c, code) |
|
550 else DECODE_SURROGATE(c) |
|
551 |
|
552 if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { |
|
553 WRITE1((unsigned char)code) |
|
554 NEXT(1, 1) |
|
555 continue; |
|
556 } |
|
557 |
|
558 REQUIRE_OUTBUF(2) |
|
559 insize = GET_INSIZE(c); |
|
560 |
|
561 if (code == NOCHAR) { |
|
562 if (c <= 0xffff) { |
|
563 EMULATE_JISX0213_2000_ENCODE_BMP(code, c) |
|
564 else TRYMAP_ENC(jisx0213_bmp, code, c) { |
|
565 if (code == MULTIC) { |
|
566 if (inleft < 2) { |
|
567 if (flags & MBENC_FLUSH) { |
|
568 code = find_pairencmap |
|
569 ((ucs2_t)c, 0, |
|
570 jisx0213_pair_encmap, |
|
571 JISX0213_ENCPAIRS); |
|
572 if (code == DBCINV) |
|
573 return 1; |
|
574 } |
|
575 else |
|
576 return MBERR_TOOFEW; |
|
577 } |
|
578 else { |
|
579 code = find_pairencmap( |
|
580 (ucs2_t)c, IN2, |
|
581 jisx0213_pair_encmap, |
|
582 JISX0213_ENCPAIRS); |
|
583 if (code == DBCINV) { |
|
584 code = find_pairencmap( |
|
585 (ucs2_t)c, 0, |
|
586 jisx0213_pair_encmap, |
|
587 JISX0213_ENCPAIRS); |
|
588 if (code == DBCINV) |
|
589 return 1; |
|
590 } |
|
591 else |
|
592 insize = 2; |
|
593 } |
|
594 } |
|
595 } |
|
596 else TRYMAP_ENC(jisxcommon, code, c) { |
|
597 /* abandon JIS X 0212 codes */ |
|
598 if (code & 0x8000) |
|
599 return 1; |
|
600 } |
|
601 else return 1; |
|
602 } |
|
603 else if (c >> 16 == EMPBASE >> 16) { |
|
604 EMULATE_JISX0213_2000_ENCODE_EMP(code, c) |
|
605 else TRYMAP_ENC(jisx0213_emp, code, c&0xffff); |
|
606 else return insize; |
|
607 } |
|
608 else |
|
609 return insize; |
|
610 } |
|
611 |
|
612 c1 = code >> 8; |
|
613 c2 = (code & 0xff) - 0x21; |
|
614 |
|
615 if (c1 & 0x80) { /* Plane 2 */ |
|
616 if (c1 >= 0xee) c1 -= 0x87; |
|
617 else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49; |
|
618 else c1 -= 0x43; |
|
619 } |
|
620 else /* Plane 1 */ |
|
621 c1 -= 0x21; |
|
622 |
|
623 if (c1 & 1) c2 += 0x5e; |
|
624 c1 >>= 1; |
|
625 OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1)) |
|
626 OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41)) |
|
627 |
|
628 NEXT(insize, 2) |
|
629 } |
|
630 |
|
631 return 0; |
|
632 } |
|
633 |
|
634 DECODER(shift_jis_2004) |
|
635 { |
|
636 while (inleft > 0) { |
|
637 unsigned char c = IN1; |
|
638 |
|
639 REQUIRE_OUTBUF(1) |
|
640 JISX0201_DECODE(c, **outbuf) |
|
641 else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){ |
|
642 unsigned char c1, c2; |
|
643 ucs4_t code; |
|
644 |
|
645 REQUIRE_INBUF(2) |
|
646 c2 = IN2; |
|
647 if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) |
|
648 return 2; |
|
649 |
|
650 c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); |
|
651 c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); |
|
652 c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1)); |
|
653 c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; |
|
654 |
|
655 if (c1 < 0x5e) { /* Plane 1 */ |
|
656 c1 += 0x21; |
|
657 EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, |
|
658 c1, c2) |
|
659 else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) { |
|
660 NEXT_OUT(1) |
|
661 } |
|
662 else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, |
|
663 c1, c2) { |
|
664 NEXT_OUT(1) |
|
665 } |
|
666 else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) { |
|
667 WRITEUCS4(EMPBASE | code) |
|
668 } |
|
669 else TRYMAP_DEC(jisx0213_pair, code, c1, c2) { |
|
670 WRITE2(code >> 16, code & 0xffff) |
|
671 NEXT_OUT(2) |
|
672 } |
|
673 else |
|
674 return 2; |
|
675 NEXT_IN(2) |
|
676 } |
|
677 else { /* Plane 2 */ |
|
678 if (c1 >= 0x67) c1 += 0x07; |
|
679 else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37; |
|
680 else c1 -= 0x3d; |
|
681 |
|
682 EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, |
|
683 c1, c2) |
|
684 else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, |
|
685 c1, c2) ; |
|
686 else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) { |
|
687 WRITEUCS4(EMPBASE | code) |
|
688 NEXT_IN(2) |
|
689 continue; |
|
690 } |
|
691 else |
|
692 return 2; |
|
693 NEXT(2, 1) |
|
694 } |
|
695 continue; |
|
696 } |
|
697 else |
|
698 return 2; |
|
699 |
|
700 NEXT(1, 1) /* JIS X 0201 */ |
|
701 } |
|
702 |
|
703 return 0; |
|
704 } |
|
705 |
|
706 |
|
707 BEGIN_MAPPINGS_LIST |
|
708 MAPPING_DECONLY(jisx0208) |
|
709 MAPPING_DECONLY(jisx0212) |
|
710 MAPPING_ENCONLY(jisxcommon) |
|
711 MAPPING_DECONLY(jisx0213_1_bmp) |
|
712 MAPPING_DECONLY(jisx0213_2_bmp) |
|
713 MAPPING_ENCONLY(jisx0213_bmp) |
|
714 MAPPING_DECONLY(jisx0213_1_emp) |
|
715 MAPPING_DECONLY(jisx0213_2_emp) |
|
716 MAPPING_ENCONLY(jisx0213_emp) |
|
717 MAPPING_ENCDEC(jisx0213_pair) |
|
718 MAPPING_ENCDEC(cp932ext) |
|
719 END_MAPPINGS_LIST |
|
720 |
|
721 BEGIN_CODECS_LIST |
|
722 CODEC_STATELESS(shift_jis) |
|
723 CODEC_STATELESS(cp932) |
|
724 CODEC_STATELESS(euc_jp) |
|
725 CODEC_STATELESS(shift_jis_2004) |
|
726 CODEC_STATELESS(euc_jis_2004) |
|
727 { "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) }, |
|
728 { "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) }, |
|
729 END_CODECS_LIST |
|
730 |
|
731 I_AM_A_MODULE_FOR(jp) |