|
1 /* |
|
2 SDL - Simple DirectMedia Layer |
|
3 Copyright (C) 1997-2006 Sam Lantinga |
|
4 |
|
5 This library is free software; you can redistribute it and/or |
|
6 modify it under the terms of the GNU Lesser General Public |
|
7 License as published by the Free Software Foundation; either |
|
8 version 2.1 of the License, or (at your option) any later version. |
|
9 |
|
10 This library is distributed in the hope that it will be useful, |
|
11 but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 Lesser General Public License for more details. |
|
14 |
|
15 You should have received a copy of the GNU Lesser General Public |
|
16 License along with this library; if not, write to the Free Software |
|
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
|
19 Sam Lantinga |
|
20 slouken@libsdl.org |
|
21 */ |
|
22 #include "SDL_config.h" |
|
23 |
|
24 /* This file contains portable iconv functions for SDL */ |
|
25 |
|
26 #include "SDL_stdinc.h" |
|
27 #include "SDL_endian.h" |
|
28 |
|
29 #ifdef HAVE_ICONV |
|
30 |
|
31 /* Depending on which standard the iconv() was implemented with, |
|
32 iconv() may or may not use const char ** for the inbuf param. |
|
33 If we get this wrong, it's just a warning, so no big deal. |
|
34 */ |
|
35 #if defined(_XGP6) || \ |
|
36 defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)) |
|
37 #define ICONV_INBUF_NONCONST |
|
38 #endif |
|
39 |
|
40 #include <errno.h> |
|
41 |
|
42 size_t SDL_iconv(SDL_iconv_t cd, |
|
43 const char **inbuf, size_t *inbytesleft, |
|
44 char **outbuf, size_t *outbytesleft) |
|
45 { |
|
46 size_t retCode; |
|
47 #ifdef ICONV_INBUF_NONCONST |
|
48 retCode = iconv(cd, (char **)inbuf, inbytesleft, outbuf, outbytesleft); |
|
49 #else |
|
50 retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft); |
|
51 #endif |
|
52 if ( retCode == (size_t)-1 ) { |
|
53 switch(errno) { |
|
54 case E2BIG: |
|
55 return SDL_ICONV_E2BIG; |
|
56 case EILSEQ: |
|
57 return SDL_ICONV_EILSEQ; |
|
58 case EINVAL: |
|
59 return SDL_ICONV_EINVAL; |
|
60 default: |
|
61 return SDL_ICONV_ERROR; |
|
62 } |
|
63 } |
|
64 return retCode; |
|
65 } |
|
66 |
|
67 #else |
|
68 |
|
69 /* Lots of useful information on Unicode at: |
|
70 http://www.cl.cam.ac.uk/~mgk25/unicode.html |
|
71 */ |
|
72 |
|
73 #define UNICODE_BOM 0xFEFF |
|
74 |
|
75 #define UNKNOWN_ASCII '?' |
|
76 #define UNKNOWN_UNICODE 0xFFFD |
|
77 |
|
78 enum { |
|
79 ENCODING_UNKNOWN, |
|
80 ENCODING_ASCII, |
|
81 ENCODING_LATIN1, |
|
82 ENCODING_UTF8, |
|
83 ENCODING_UTF16, /* Needs byte order marker */ |
|
84 ENCODING_UTF16BE, |
|
85 ENCODING_UTF16LE, |
|
86 ENCODING_UTF32, /* Needs byte order marker */ |
|
87 ENCODING_UTF32BE, |
|
88 ENCODING_UTF32LE, |
|
89 ENCODING_UCS2, /* Native byte order assumed */ |
|
90 ENCODING_UCS4, /* Native byte order assumed */ |
|
91 }; |
|
92 #if SDL_BYTEORDER == SDL_BIG_ENDIAN |
|
93 #define ENCODING_UTF16NATIVE ENCODING_UTF16BE |
|
94 #define ENCODING_UTF32NATIVE ENCODING_UTF32BE |
|
95 #else |
|
96 #define ENCODING_UTF16NATIVE ENCODING_UTF16LE |
|
97 #define ENCODING_UTF32NATIVE ENCODING_UTF32LE |
|
98 #endif |
|
99 |
|
100 struct _SDL_iconv_t |
|
101 { |
|
102 int src_fmt; |
|
103 int dst_fmt; |
|
104 }; |
|
105 |
|
106 static struct { |
|
107 const char *name; |
|
108 int format; |
|
109 } encodings[] = { |
|
110 { "ASCII", ENCODING_ASCII }, |
|
111 { "US-ASCII", ENCODING_ASCII }, |
|
112 { "8859-1", ENCODING_LATIN1 }, |
|
113 { "ISO-8859-1", ENCODING_LATIN1 }, |
|
114 { "UTF8", ENCODING_UTF8 }, |
|
115 { "UTF-8", ENCODING_UTF8 }, |
|
116 { "UTF16", ENCODING_UTF16 }, |
|
117 { "UTF-16", ENCODING_UTF16 }, |
|
118 { "UTF16BE", ENCODING_UTF16BE }, |
|
119 { "UTF-16BE", ENCODING_UTF16BE }, |
|
120 { "UTF16LE", ENCODING_UTF16LE }, |
|
121 { "UTF-16LE", ENCODING_UTF16LE }, |
|
122 { "UTF32", ENCODING_UTF32 }, |
|
123 { "UTF-32", ENCODING_UTF32 }, |
|
124 { "UTF32BE", ENCODING_UTF32BE }, |
|
125 { "UTF-32BE", ENCODING_UTF32BE }, |
|
126 { "UTF32LE", ENCODING_UTF32LE }, |
|
127 { "UTF-32LE", ENCODING_UTF32LE }, |
|
128 { "UCS2", ENCODING_UCS2 }, |
|
129 { "UCS-2", ENCODING_UCS2 }, |
|
130 { "UCS4", ENCODING_UCS4 }, |
|
131 { "UCS-4", ENCODING_UCS4 }, |
|
132 }; |
|
133 |
|
134 static const char *getlocale(char *buffer, size_t bufsize) |
|
135 { |
|
136 const char *lang; |
|
137 char *ptr; |
|
138 |
|
139 lang = SDL_getenv("LC_ALL"); |
|
140 if ( !lang ) { |
|
141 lang = SDL_getenv("LC_CTYPE"); |
|
142 } |
|
143 if ( !lang ) { |
|
144 lang = SDL_getenv("LC_MESSAGES"); |
|
145 } |
|
146 if ( !lang ) { |
|
147 lang = SDL_getenv("LANG"); |
|
148 } |
|
149 if ( !lang || !*lang || SDL_strcmp(lang, "C") == 0 ) { |
|
150 lang = "ASCII"; |
|
151 } |
|
152 |
|
153 /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */ |
|
154 ptr = SDL_strchr(lang, '.'); |
|
155 if (ptr != NULL) { |
|
156 lang = ptr + 1; |
|
157 } |
|
158 |
|
159 SDL_strlcpy(buffer, lang, bufsize); |
|
160 ptr = SDL_strchr(buffer, '@'); |
|
161 if (ptr != NULL) { |
|
162 *ptr = '\0'; /* chop end of string. */ |
|
163 } |
|
164 |
|
165 return buffer; |
|
166 } |
|
167 |
|
168 SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode) |
|
169 { |
|
170 int src_fmt = ENCODING_UNKNOWN; |
|
171 int dst_fmt = ENCODING_UNKNOWN; |
|
172 int i; |
|
173 char fromcode_buffer[64]; |
|
174 char tocode_buffer[64]; |
|
175 |
|
176 if ( !fromcode || !*fromcode ) { |
|
177 fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer)); |
|
178 } |
|
179 if ( !tocode || !*tocode ) { |
|
180 tocode = getlocale(tocode_buffer, sizeof(tocode_buffer)); |
|
181 } |
|
182 for ( i = 0; i < SDL_arraysize(encodings); ++i ) { |
|
183 if ( SDL_strcasecmp(fromcode, encodings[i].name) == 0 ) { |
|
184 src_fmt = encodings[i].format; |
|
185 if ( dst_fmt != ENCODING_UNKNOWN ) { |
|
186 break; |
|
187 } |
|
188 } |
|
189 if ( SDL_strcasecmp(tocode, encodings[i].name) == 0 ) { |
|
190 dst_fmt = encodings[i].format; |
|
191 if ( src_fmt != ENCODING_UNKNOWN ) { |
|
192 break; |
|
193 } |
|
194 } |
|
195 } |
|
196 if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) { |
|
197 SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd)); |
|
198 if ( cd ) { |
|
199 cd->src_fmt = src_fmt; |
|
200 cd->dst_fmt = dst_fmt; |
|
201 return cd; |
|
202 } |
|
203 } |
|
204 return (SDL_iconv_t)-1; |
|
205 } |
|
206 |
|
207 size_t SDL_iconv(SDL_iconv_t cd, |
|
208 const char **inbuf, size_t *inbytesleft, |
|
209 char **outbuf, size_t *outbytesleft) |
|
210 { |
|
211 /* For simplicity, we'll convert everything to and from UCS-4 */ |
|
212 const char *src; |
|
213 char *dst; |
|
214 size_t srclen, dstlen; |
|
215 Uint32 ch = 0; |
|
216 size_t total; |
|
217 |
|
218 if ( !inbuf || !*inbuf ) { |
|
219 /* Reset the context */ |
|
220 return 0; |
|
221 } |
|
222 if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) { |
|
223 return SDL_ICONV_E2BIG; |
|
224 } |
|
225 src = *inbuf; |
|
226 srclen = (inbytesleft ? *inbytesleft : 0); |
|
227 dst = *outbuf; |
|
228 dstlen = *outbytesleft; |
|
229 |
|
230 switch ( cd->src_fmt ) { |
|
231 case ENCODING_UTF16: |
|
232 /* Scan for a byte order marker */ |
|
233 { |
|
234 Uint8 *p = (Uint8 *)src; |
|
235 size_t n = srclen / 2; |
|
236 while ( n ) { |
|
237 if ( p[0] == 0xFF && p[1] == 0xFE ) { |
|
238 cd->src_fmt = ENCODING_UTF16BE; |
|
239 break; |
|
240 } else if ( p[0] == 0xFE && p[1] == 0xFF ) { |
|
241 cd->src_fmt = ENCODING_UTF16LE; |
|
242 break; |
|
243 } |
|
244 p += 2; |
|
245 --n; |
|
246 } |
|
247 if ( n == 0 ) { |
|
248 /* We can't tell, default to host order */ |
|
249 cd->src_fmt = ENCODING_UTF16NATIVE; |
|
250 } |
|
251 } |
|
252 break; |
|
253 case ENCODING_UTF32: |
|
254 /* Scan for a byte order marker */ |
|
255 { |
|
256 Uint8 *p = (Uint8 *)src; |
|
257 size_t n = srclen / 4; |
|
258 while ( n ) { |
|
259 if ( p[0] == 0xFF && p[1] == 0xFE && |
|
260 p[2] == 0x00 && p[3] == 0x00 ) { |
|
261 cd->src_fmt = ENCODING_UTF32BE; |
|
262 break; |
|
263 } else if ( p[0] == 0x00 && p[1] == 0x00 && |
|
264 p[2] == 0xFE && p[3] == 0xFF ) { |
|
265 cd->src_fmt = ENCODING_UTF32LE; |
|
266 break; |
|
267 } |
|
268 p += 4; |
|
269 --n; |
|
270 } |
|
271 if ( n == 0 ) { |
|
272 /* We can't tell, default to host order */ |
|
273 cd->src_fmt = ENCODING_UTF32NATIVE; |
|
274 } |
|
275 } |
|
276 break; |
|
277 } |
|
278 |
|
279 switch ( cd->dst_fmt ) { |
|
280 case ENCODING_UTF16: |
|
281 /* Default to host order, need to add byte order marker */ |
|
282 if ( dstlen < 2 ) { |
|
283 return SDL_ICONV_E2BIG; |
|
284 } |
|
285 *(Uint16 *)dst = UNICODE_BOM; |
|
286 dst += 2; |
|
287 dstlen -= 2; |
|
288 cd->dst_fmt = ENCODING_UTF16NATIVE; |
|
289 break; |
|
290 case ENCODING_UTF32: |
|
291 /* Default to host order, need to add byte order marker */ |
|
292 if ( dstlen < 4 ) { |
|
293 return SDL_ICONV_E2BIG; |
|
294 } |
|
295 *(Uint32 *)dst = UNICODE_BOM; |
|
296 dst += 4; |
|
297 dstlen -= 4; |
|
298 cd->dst_fmt = ENCODING_UTF32NATIVE; |
|
299 break; |
|
300 } |
|
301 |
|
302 total = 0; |
|
303 while ( srclen > 0 ) { |
|
304 /* Decode a character */ |
|
305 switch ( cd->src_fmt ) { |
|
306 case ENCODING_ASCII: |
|
307 { |
|
308 Uint8 *p = (Uint8 *)src; |
|
309 ch = (Uint32)(p[0] & 0x7F); |
|
310 ++src; |
|
311 --srclen; |
|
312 } |
|
313 break; |
|
314 case ENCODING_LATIN1: |
|
315 { |
|
316 Uint8 *p = (Uint8 *)src; |
|
317 ch = (Uint32)p[0]; |
|
318 ++src; |
|
319 --srclen; |
|
320 } |
|
321 break; |
|
322 case ENCODING_UTF8: /* RFC 3629 */ |
|
323 { |
|
324 Uint8 *p = (Uint8 *)src; |
|
325 size_t left = 0; |
|
326 SDL_bool overlong = SDL_FALSE; |
|
327 if ( p[0] >= 0xFC ) { |
|
328 if ( (p[0] & 0xFE) != 0xFC ) { |
|
329 /* Skip illegal sequences |
|
330 return SDL_ICONV_EILSEQ; |
|
331 */ |
|
332 ch = UNKNOWN_UNICODE; |
|
333 } else { |
|
334 if ( p[0] == 0xFC ) { |
|
335 overlong = SDL_TRUE; |
|
336 } |
|
337 ch = (Uint32)(p[0] & 0x01); |
|
338 left = 5; |
|
339 } |
|
340 } else if ( p[0] >= 0xF8 ) { |
|
341 if ( (p[0] & 0xFC) != 0xF8 ) { |
|
342 /* Skip illegal sequences |
|
343 return SDL_ICONV_EILSEQ; |
|
344 */ |
|
345 ch = UNKNOWN_UNICODE; |
|
346 } else { |
|
347 if ( p[0] == 0xF8 ) { |
|
348 overlong = SDL_TRUE; |
|
349 } |
|
350 ch = (Uint32)(p[0] & 0x03); |
|
351 left = 4; |
|
352 } |
|
353 } else if ( p[0] >= 0xF0 ) { |
|
354 if ( (p[0] & 0xF8) != 0xF0 ) { |
|
355 /* Skip illegal sequences |
|
356 return SDL_ICONV_EILSEQ; |
|
357 */ |
|
358 ch = UNKNOWN_UNICODE; |
|
359 } else { |
|
360 if ( p[0] == 0xF0 ) { |
|
361 overlong = SDL_TRUE; |
|
362 } |
|
363 ch = (Uint32)(p[0] & 0x07); |
|
364 left = 3; |
|
365 } |
|
366 } else if ( p[0] >= 0xE0 ) { |
|
367 if ( (p[0] & 0xF0) != 0xE0 ) { |
|
368 /* Skip illegal sequences |
|
369 return SDL_ICONV_EILSEQ; |
|
370 */ |
|
371 ch = UNKNOWN_UNICODE; |
|
372 } else { |
|
373 if ( p[0] == 0xE0 ) { |
|
374 overlong = SDL_TRUE; |
|
375 } |
|
376 ch = (Uint32)(p[0] & 0x0F); |
|
377 left = 2; |
|
378 } |
|
379 } else if ( p[0] >= 0xC0 ) { |
|
380 if ( (p[0] & 0xE0) != 0xC0 ) { |
|
381 /* Skip illegal sequences |
|
382 return SDL_ICONV_EILSEQ; |
|
383 */ |
|
384 ch = UNKNOWN_UNICODE; |
|
385 } else { |
|
386 if ( (p[0] & 0xCE) == 0xC0 ) { |
|
387 overlong = SDL_TRUE; |
|
388 } |
|
389 ch = (Uint32)(p[0] & 0x1F); |
|
390 left = 1; |
|
391 } |
|
392 } else { |
|
393 if ( (p[0] & 0x80) != 0x00 ) { |
|
394 /* Skip illegal sequences |
|
395 return SDL_ICONV_EILSEQ; |
|
396 */ |
|
397 ch = UNKNOWN_UNICODE; |
|
398 } else { |
|
399 ch = (Uint32)p[0]; |
|
400 } |
|
401 } |
|
402 ++src; |
|
403 --srclen; |
|
404 if ( srclen < left ) { |
|
405 return SDL_ICONV_EINVAL; |
|
406 } |
|
407 while ( left-- ) { |
|
408 ++p; |
|
409 if ( (p[0] & 0xC0) != 0x80 ) { |
|
410 /* Skip illegal sequences |
|
411 return SDL_ICONV_EILSEQ; |
|
412 */ |
|
413 ch = UNKNOWN_UNICODE; |
|
414 break; |
|
415 } |
|
416 ch <<= 6; |
|
417 ch |= (p[0] & 0x3F); |
|
418 ++src; |
|
419 --srclen; |
|
420 } |
|
421 if ( overlong ) { |
|
422 /* Potential security risk |
|
423 return SDL_ICONV_EILSEQ; |
|
424 */ |
|
425 ch = UNKNOWN_UNICODE; |
|
426 } |
|
427 if ( (ch >= 0xD800 && ch <= 0xDFFF) || |
|
428 (ch == 0xFFFE || ch == 0xFFFF) || |
|
429 ch > 0x10FFFF ) { |
|
430 /* Skip illegal sequences |
|
431 return SDL_ICONV_EILSEQ; |
|
432 */ |
|
433 ch = UNKNOWN_UNICODE; |
|
434 } |
|
435 } |
|
436 break; |
|
437 case ENCODING_UTF16BE: /* RFC 2781 */ |
|
438 { |
|
439 Uint8 *p = (Uint8 *)src; |
|
440 Uint16 W1, W2; |
|
441 if ( srclen < 2 ) { |
|
442 return SDL_ICONV_EINVAL; |
|
443 } |
|
444 W1 = ((Uint16)p[0] << 8) | |
|
445 (Uint16)p[1]; |
|
446 src += 2; |
|
447 srclen -= 2; |
|
448 if ( W1 < 0xD800 || W1 > 0xDFFF ) { |
|
449 ch = (Uint32)W1; |
|
450 break; |
|
451 } |
|
452 if ( W1 > 0xDBFF ) { |
|
453 /* Skip illegal sequences |
|
454 return SDL_ICONV_EILSEQ; |
|
455 */ |
|
456 ch = UNKNOWN_UNICODE; |
|
457 break; |
|
458 } |
|
459 if ( srclen < 2 ) { |
|
460 return SDL_ICONV_EINVAL; |
|
461 } |
|
462 p = (Uint8 *)src; |
|
463 W2 = ((Uint16)p[0] << 8) | |
|
464 (Uint16)p[1]; |
|
465 src += 2; |
|
466 srclen -= 2; |
|
467 if ( W2 < 0xDC00 || W2 > 0xDFFF ) { |
|
468 /* Skip illegal sequences |
|
469 return SDL_ICONV_EILSEQ; |
|
470 */ |
|
471 ch = UNKNOWN_UNICODE; |
|
472 break; |
|
473 } |
|
474 ch = (((Uint32)(W1 & 0x3FF) << 10) | |
|
475 (Uint32)(W2 & 0x3FF)) + 0x10000; |
|
476 } |
|
477 break; |
|
478 case ENCODING_UTF16LE: /* RFC 2781 */ |
|
479 { |
|
480 Uint8 *p = (Uint8 *)src; |
|
481 Uint16 W1, W2; |
|
482 if ( srclen < 2 ) { |
|
483 return SDL_ICONV_EINVAL; |
|
484 } |
|
485 W1 = ((Uint16)p[1] << 8) | |
|
486 (Uint16)p[0]; |
|
487 src += 2; |
|
488 srclen -= 2; |
|
489 if ( W1 < 0xD800 || W1 > 0xDFFF ) { |
|
490 ch = (Uint32)W1; |
|
491 break; |
|
492 } |
|
493 if ( W1 > 0xDBFF ) { |
|
494 /* Skip illegal sequences |
|
495 return SDL_ICONV_EILSEQ; |
|
496 */ |
|
497 ch = UNKNOWN_UNICODE; |
|
498 break; |
|
499 } |
|
500 if ( srclen < 2 ) { |
|
501 return SDL_ICONV_EINVAL; |
|
502 } |
|
503 p = (Uint8 *)src; |
|
504 W2 = ((Uint16)p[1] << 8) | |
|
505 (Uint16)p[0]; |
|
506 src += 2; |
|
507 srclen -= 2; |
|
508 if ( W2 < 0xDC00 || W2 > 0xDFFF ) { |
|
509 /* Skip illegal sequences |
|
510 return SDL_ICONV_EILSEQ; |
|
511 */ |
|
512 ch = UNKNOWN_UNICODE; |
|
513 break; |
|
514 } |
|
515 ch = (((Uint32)(W1 & 0x3FF) << 10) | |
|
516 (Uint32)(W2 & 0x3FF)) + 0x10000; |
|
517 } |
|
518 break; |
|
519 case ENCODING_UTF32BE: |
|
520 { |
|
521 Uint8 *p = (Uint8 *)src; |
|
522 if ( srclen < 4 ) { |
|
523 return SDL_ICONV_EINVAL; |
|
524 } |
|
525 ch = ((Uint32)p[0] << 24) | |
|
526 ((Uint32)p[1] << 16) | |
|
527 ((Uint32)p[2] << 8) | |
|
528 (Uint32)p[3]; |
|
529 src += 4; |
|
530 srclen -= 4; |
|
531 } |
|
532 break; |
|
533 case ENCODING_UTF32LE: |
|
534 { |
|
535 Uint8 *p = (Uint8 *)src; |
|
536 if ( srclen < 4 ) { |
|
537 return SDL_ICONV_EINVAL; |
|
538 } |
|
539 ch = ((Uint32)p[3] << 24) | |
|
540 ((Uint32)p[2] << 16) | |
|
541 ((Uint32)p[1] << 8) | |
|
542 (Uint32)p[0]; |
|
543 src += 4; |
|
544 srclen -= 4; |
|
545 } |
|
546 break; |
|
547 case ENCODING_UCS2: |
|
548 { |
|
549 Uint16 *p = (Uint16 *)src; |
|
550 if ( srclen < 2 ) { |
|
551 return SDL_ICONV_EINVAL; |
|
552 } |
|
553 ch = *p; |
|
554 src += 2; |
|
555 srclen -= 2; |
|
556 } |
|
557 break; |
|
558 case ENCODING_UCS4: |
|
559 { |
|
560 Uint32 *p = (Uint32 *)src; |
|
561 if ( srclen < 4 ) { |
|
562 return SDL_ICONV_EINVAL; |
|
563 } |
|
564 ch = *p; |
|
565 src += 4; |
|
566 srclen -= 4; |
|
567 } |
|
568 break; |
|
569 } |
|
570 |
|
571 /* Encode a character */ |
|
572 switch ( cd->dst_fmt ) { |
|
573 case ENCODING_ASCII: |
|
574 { |
|
575 Uint8 *p = (Uint8 *)dst; |
|
576 if ( dstlen < 1 ) { |
|
577 return SDL_ICONV_E2BIG; |
|
578 } |
|
579 if ( ch > 0x7F ) { |
|
580 *p = UNKNOWN_ASCII; |
|
581 } else { |
|
582 *p = (Uint8)ch; |
|
583 } |
|
584 ++dst; |
|
585 --dstlen; |
|
586 } |
|
587 break; |
|
588 case ENCODING_LATIN1: |
|
589 { |
|
590 Uint8 *p = (Uint8 *)dst; |
|
591 if ( dstlen < 1 ) { |
|
592 return SDL_ICONV_E2BIG; |
|
593 } |
|
594 if ( ch > 0xFF ) { |
|
595 *p = UNKNOWN_ASCII; |
|
596 } else { |
|
597 *p = (Uint8)ch; |
|
598 } |
|
599 ++dst; |
|
600 --dstlen; |
|
601 } |
|
602 break; |
|
603 case ENCODING_UTF8: /* RFC 3629 */ |
|
604 { |
|
605 Uint8 *p = (Uint8 *)dst; |
|
606 if ( ch > 0x10FFFF ) { |
|
607 ch = UNKNOWN_UNICODE; |
|
608 } |
|
609 if ( ch <= 0x7F ) { |
|
610 if ( dstlen < 1 ) { |
|
611 return SDL_ICONV_E2BIG; |
|
612 } |
|
613 *p = (Uint8)ch; |
|
614 ++dst; |
|
615 --dstlen; |
|
616 } else if ( ch <= 0x7FF ) { |
|
617 if ( dstlen < 2 ) { |
|
618 return SDL_ICONV_E2BIG; |
|
619 } |
|
620 p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F); |
|
621 p[1] = 0x80 | (Uint8)(ch & 0x3F); |
|
622 dst += 2; |
|
623 dstlen -= 2; |
|
624 } else if ( ch <= 0xFFFF ) { |
|
625 if ( dstlen < 3 ) { |
|
626 return SDL_ICONV_E2BIG; |
|
627 } |
|
628 p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F); |
|
629 p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F); |
|
630 p[2] = 0x80 | (Uint8)(ch & 0x3F); |
|
631 dst += 3; |
|
632 dstlen -= 3; |
|
633 } else if ( ch <= 0x1FFFFF ) { |
|
634 if ( dstlen < 4 ) { |
|
635 return SDL_ICONV_E2BIG; |
|
636 } |
|
637 p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07); |
|
638 p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F); |
|
639 p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F); |
|
640 p[3] = 0x80 | (Uint8)(ch & 0x3F); |
|
641 dst += 4; |
|
642 dstlen -= 4; |
|
643 } else if ( ch <= 0x3FFFFFF ) { |
|
644 if ( dstlen < 5 ) { |
|
645 return SDL_ICONV_E2BIG; |
|
646 } |
|
647 p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03); |
|
648 p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F); |
|
649 p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F); |
|
650 p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F); |
|
651 p[4] = 0x80 | (Uint8)(ch & 0x3F); |
|
652 dst += 5; |
|
653 dstlen -= 5; |
|
654 } else { |
|
655 if ( dstlen < 6 ) { |
|
656 return SDL_ICONV_E2BIG; |
|
657 } |
|
658 p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01); |
|
659 p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F); |
|
660 p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F); |
|
661 p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F); |
|
662 p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F); |
|
663 p[5] = 0x80 | (Uint8)(ch & 0x3F); |
|
664 dst += 6; |
|
665 dstlen -= 6; |
|
666 } |
|
667 } |
|
668 break; |
|
669 case ENCODING_UTF16BE: /* RFC 2781 */ |
|
670 { |
|
671 Uint8 *p = (Uint8 *)dst; |
|
672 if ( ch > 0x10FFFF ) { |
|
673 ch = UNKNOWN_UNICODE; |
|
674 } |
|
675 if ( ch < 0x10000 ) { |
|
676 if ( dstlen < 2 ) { |
|
677 return SDL_ICONV_E2BIG; |
|
678 } |
|
679 p[0] = (Uint8)(ch >> 8); |
|
680 p[1] = (Uint8)ch; |
|
681 dst += 2; |
|
682 dstlen -= 2; |
|
683 } else { |
|
684 Uint16 W1, W2; |
|
685 if ( dstlen < 4 ) { |
|
686 return SDL_ICONV_E2BIG; |
|
687 } |
|
688 ch = ch - 0x10000; |
|
689 W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF); |
|
690 W2 = 0xDC00 | (Uint16)(ch & 0x3FF); |
|
691 p[0] = (Uint8)(W1 >> 8); |
|
692 p[1] = (Uint8)W1; |
|
693 p[2] = (Uint8)(W2 >> 8); |
|
694 p[3] = (Uint8)W2; |
|
695 dst += 4; |
|
696 dstlen -= 4; |
|
697 } |
|
698 } |
|
699 break; |
|
700 case ENCODING_UTF16LE: /* RFC 2781 */ |
|
701 { |
|
702 Uint8 *p = (Uint8 *)dst; |
|
703 if ( ch > 0x10FFFF ) { |
|
704 ch = UNKNOWN_UNICODE; |
|
705 } |
|
706 if ( ch < 0x10000 ) { |
|
707 if ( dstlen < 2 ) { |
|
708 return SDL_ICONV_E2BIG; |
|
709 } |
|
710 p[1] = (Uint8)(ch >> 8); |
|
711 p[0] = (Uint8)ch; |
|
712 dst += 2; |
|
713 dstlen -= 2; |
|
714 } else { |
|
715 Uint16 W1, W2; |
|
716 if ( dstlen < 4 ) { |
|
717 return SDL_ICONV_E2BIG; |
|
718 } |
|
719 ch = ch - 0x10000; |
|
720 W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF); |
|
721 W2 = 0xDC00 | (Uint16)(ch & 0x3FF); |
|
722 p[1] = (Uint8)(W1 >> 8); |
|
723 p[0] = (Uint8)W1; |
|
724 p[3] = (Uint8)(W2 >> 8); |
|
725 p[2] = (Uint8)W2; |
|
726 dst += 4; |
|
727 dstlen -= 4; |
|
728 } |
|
729 } |
|
730 break; |
|
731 case ENCODING_UTF32BE: |
|
732 { |
|
733 Uint8 *p = (Uint8 *)dst; |
|
734 if ( ch > 0x10FFFF ) { |
|
735 ch = UNKNOWN_UNICODE; |
|
736 } |
|
737 if ( dstlen < 4 ) { |
|
738 return SDL_ICONV_E2BIG; |
|
739 } |
|
740 p[0] = (Uint8)(ch >> 24); |
|
741 p[1] = (Uint8)(ch >> 16); |
|
742 p[2] = (Uint8)(ch >> 8); |
|
743 p[3] = (Uint8)ch; |
|
744 dst += 4; |
|
745 dstlen -= 4; |
|
746 } |
|
747 break; |
|
748 case ENCODING_UTF32LE: |
|
749 { |
|
750 Uint8 *p = (Uint8 *)dst; |
|
751 if ( ch > 0x10FFFF ) { |
|
752 ch = UNKNOWN_UNICODE; |
|
753 } |
|
754 if ( dstlen < 4 ) { |
|
755 return SDL_ICONV_E2BIG; |
|
756 } |
|
757 p[3] = (Uint8)(ch >> 24); |
|
758 p[2] = (Uint8)(ch >> 16); |
|
759 p[1] = (Uint8)(ch >> 8); |
|
760 p[0] = (Uint8)ch; |
|
761 dst += 4; |
|
762 dstlen -= 4; |
|
763 } |
|
764 break; |
|
765 case ENCODING_UCS2: |
|
766 { |
|
767 Uint16 *p = (Uint16 *)dst; |
|
768 if ( ch > 0xFFFF ) { |
|
769 ch = UNKNOWN_UNICODE; |
|
770 } |
|
771 if ( dstlen < 2 ) { |
|
772 return SDL_ICONV_E2BIG; |
|
773 } |
|
774 *p = (Uint16)ch; |
|
775 dst += 2; |
|
776 dstlen -= 2; |
|
777 } |
|
778 break; |
|
779 case ENCODING_UCS4: |
|
780 { |
|
781 Uint32 *p = (Uint32 *)dst; |
|
782 if ( ch > 0x7FFFFFFF ) { |
|
783 ch = UNKNOWN_UNICODE; |
|
784 } |
|
785 if ( dstlen < 4 ) { |
|
786 return SDL_ICONV_E2BIG; |
|
787 } |
|
788 *p = ch; |
|
789 dst += 4; |
|
790 dstlen -= 4; |
|
791 } |
|
792 break; |
|
793 } |
|
794 |
|
795 /* Update state */ |
|
796 *inbuf = src; |
|
797 *inbytesleft = srclen; |
|
798 *outbuf = dst; |
|
799 *outbytesleft = dstlen; |
|
800 ++total; |
|
801 } |
|
802 return total; |
|
803 } |
|
804 |
|
805 int SDL_iconv_close(SDL_iconv_t cd) |
|
806 { |
|
807 if ( cd && cd != (SDL_iconv_t)-1 ) { |
|
808 SDL_free(cd); |
|
809 } |
|
810 return 0; |
|
811 } |
|
812 |
|
813 #endif /* !HAVE_ICONV */ |
|
814 |
|
815 char *SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft) |
|
816 { |
|
817 SDL_iconv_t cd; |
|
818 char *string; |
|
819 size_t stringsize; |
|
820 char *outbuf; |
|
821 size_t outbytesleft; |
|
822 size_t retCode = 0; |
|
823 |
|
824 cd = SDL_iconv_open(tocode, fromcode); |
|
825 if ( cd == (SDL_iconv_t)-1 ) { |
|
826 /* See if we can recover here (fixes iconv on Solaris 11) */ |
|
827 if ( !tocode || !*tocode ) { |
|
828 tocode = "UTF-8"; |
|
829 } |
|
830 if ( !fromcode || !*fromcode ) { |
|
831 tocode = "UTF-8"; |
|
832 } |
|
833 cd = SDL_iconv_open(tocode, fromcode); |
|
834 } |
|
835 if ( cd == (SDL_iconv_t)-1 ) { |
|
836 return NULL; |
|
837 } |
|
838 |
|
839 stringsize = inbytesleft > 4 ? inbytesleft : 4; |
|
840 string = SDL_malloc(stringsize); |
|
841 if ( !string ) { |
|
842 SDL_iconv_close(cd); |
|
843 return NULL; |
|
844 } |
|
845 outbuf = string; |
|
846 outbytesleft = stringsize; |
|
847 SDL_memset(outbuf, 0, 4); |
|
848 |
|
849 while ( inbytesleft > 0 ) { |
|
850 retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); |
|
851 switch (retCode) { |
|
852 case SDL_ICONV_E2BIG: |
|
853 { |
|
854 char *oldstring = string; |
|
855 stringsize *= 2; |
|
856 string = SDL_realloc(string, stringsize); |
|
857 if ( !string ) { |
|
858 SDL_iconv_close(cd); |
|
859 return NULL; |
|
860 } |
|
861 outbuf = string + (outbuf - oldstring); |
|
862 outbytesleft = stringsize - (outbuf - string); |
|
863 SDL_memset(outbuf, 0, 4); |
|
864 } |
|
865 break; |
|
866 case SDL_ICONV_EILSEQ: |
|
867 /* Try skipping some input data - not perfect, but... */ |
|
868 ++inbuf; |
|
869 --inbytesleft; |
|
870 break; |
|
871 case SDL_ICONV_EINVAL: |
|
872 case SDL_ICONV_ERROR: |
|
873 /* We can't continue... */ |
|
874 inbytesleft = 0; |
|
875 break; |
|
876 } |
|
877 } |
|
878 SDL_iconv_close(cd); |
|
879 |
|
880 return string; |
|
881 } |