|
1 /**************************************************************************** |
|
2 ** |
|
3 ** |
|
4 ** Implementation of QTextCodec class |
|
5 ** |
|
6 ** Created : 981015 |
|
7 ** |
|
8 ** Copyright (C)1998-2000 Trolltech AS. All rights reserved. |
|
9 ** |
|
10 ** This file is part of the tools module of the Qt GUI Toolkit. |
|
11 ** |
|
12 ** This file may be distributed under the terms of the Q Public License |
|
13 ** as defined by Trolltech AS of Norway and appearing in the file |
|
14 ** LICENSE.QPL included in the packaging of this file. |
|
15 ** |
|
16 ** This file may be distributed and/or modified under the terms of the |
|
17 ** GNU General Public License version 2 as published by the Free Software |
|
18 ** Foundation and appearing in the file LICENSE.GPL included in the |
|
19 ** packaging of this file. |
|
20 ** |
|
21 ** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition |
|
22 ** licenses may use this file in accordance with the Qt Commercial License |
|
23 ** Agreement provided with the Software. |
|
24 ** |
|
25 ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE |
|
26 ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. |
|
27 ** |
|
28 ** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for |
|
29 ** information about Qt Commercial License Agreements. |
|
30 ** See http://www.trolltech.com/qpl/ for QPL licensing information. |
|
31 ** See http://www.trolltech.com/gpl/ for GPL licensing information. |
|
32 ** |
|
33 ** Contact info@trolltech.com if any conditions of this licensing are |
|
34 ** not clear to you. |
|
35 ** |
|
36 **********************************************************************/ |
|
37 |
|
38 #include "qtextcodec.h" |
|
39 #ifndef QT_NO_TEXTCODEC |
|
40 |
|
41 #include "qlist.h" |
|
42 #ifndef QT_NO_CODECS |
|
43 #include "qutfcodec.h" |
|
44 #include "qgbkcodec.h" |
|
45 #include "qeucjpcodec.h" |
|
46 #include "qjiscodec.h" |
|
47 #include "qsjiscodec.h" |
|
48 #include "qeuckrcodec.h" |
|
49 #include "qbig5codec.h" |
|
50 #include "qrtlcodec.h" |
|
51 #include "qtsciicodec.h" |
|
52 #endif |
|
53 |
|
54 #include "qfile.h" |
|
55 #include "qstrlist.h" |
|
56 #include "qstring.h" |
|
57 |
|
58 #include <stdlib.h> |
|
59 #include <ctype.h> |
|
60 #include <locale.h> |
|
61 |
|
62 |
|
63 static QList<QTextCodec> * all = 0; |
|
64 static bool destroying_is_ok; // starts out as 0 |
|
65 |
|
66 /*! Deletes all the created codecs. |
|
67 |
|
68 \warning Do not call this function. |
|
69 |
|
70 QApplication calls this just before exiting, to delete any |
|
71 QTextCodec objects that may be lying around. Since various other |
|
72 classes hold pointers to QTextCodec objects, it is not safe to call |
|
73 this function earlier. |
|
74 |
|
75 If you are using the utility classes (like QString) but not using |
|
76 QApplication, calling this function at the very end of your |
|
77 application can be helpful to chasing down memory leaks, as |
|
78 QTextCodec objects will not show up. |
|
79 */ |
|
80 |
|
81 void QTextCodec::deleteAllCodecs() |
|
82 { |
|
83 if ( !all ) |
|
84 return; |
|
85 |
|
86 destroying_is_ok = TRUE; |
|
87 QList<QTextCodec> * ball = all; |
|
88 all = 0; |
|
89 ball->clear(); |
|
90 delete ball; |
|
91 destroying_is_ok = FALSE; |
|
92 } |
|
93 |
|
94 |
|
95 static void setupBuiltinCodecs(); |
|
96 |
|
97 |
|
98 static void realSetup() |
|
99 { |
|
100 #if defined(CHECK_STATE) |
|
101 if ( destroying_is_ok ) |
|
102 qWarning( "creating new codec during codec cleanup" ); |
|
103 #endif |
|
104 all = new QList<QTextCodec>; |
|
105 all->setAutoDelete( TRUE ); |
|
106 setupBuiltinCodecs(); |
|
107 } |
|
108 |
|
109 |
|
110 static inline void setup() |
|
111 { |
|
112 if ( !all ) |
|
113 realSetup(); |
|
114 } |
|
115 |
|
116 |
|
117 class QTextStatelessEncoder: public QTextEncoder { |
|
118 const QTextCodec* codec; |
|
119 public: |
|
120 QTextStatelessEncoder(const QTextCodec*); |
|
121 QCString fromUnicode(const QString& uc, int& lenInOut); |
|
122 }; |
|
123 |
|
124 |
|
125 class QTextStatelessDecoder : public QTextDecoder { |
|
126 const QTextCodec* codec; |
|
127 public: |
|
128 QTextStatelessDecoder(const QTextCodec*); |
|
129 QString toUnicode(const char* chars, int len); |
|
130 }; |
|
131 |
|
132 QTextStatelessEncoder::QTextStatelessEncoder(const QTextCodec* c) : |
|
133 codec(c) |
|
134 { |
|
135 } |
|
136 |
|
137 |
|
138 QCString QTextStatelessEncoder::fromUnicode(const QString& uc, int& lenInOut) |
|
139 { |
|
140 return codec->fromUnicode(uc,lenInOut); |
|
141 } |
|
142 |
|
143 |
|
144 QTextStatelessDecoder::QTextStatelessDecoder(const QTextCodec* c) : |
|
145 codec(c) |
|
146 { |
|
147 } |
|
148 |
|
149 |
|
150 QString QTextStatelessDecoder::toUnicode(const char* chars, int len) |
|
151 { |
|
152 return codec->toUnicode(chars,len); |
|
153 } |
|
154 |
|
155 |
|
156 |
|
157 // NOT REVISED |
|
158 /*! |
|
159 \class QTextCodec qtextcodec.h |
|
160 \brief Provides conversion between text encodings. |
|
161 |
|
162 By making objects of subclasses of QTextCodec, support for |
|
163 new text encodings can be added to Qt. |
|
164 |
|
165 The abstract virtual functions describe the encoder to the |
|
166 system and the coder is used as required in the different |
|
167 text file formats supported QTextStream and, under X11 for the |
|
168 locale-specific character input and output (under Windows NT |
|
169 codecs are not needed for GUI I/O since the system works |
|
170 with Unicode already, and Windows 95/98 has built-in convertors |
|
171 for the 8-bit local encoding). |
|
172 |
|
173 More recently created QTextCodec objects take precedence |
|
174 over earlier ones. |
|
175 |
|
176 To add support for another 8-bit encoding to Qt, make a subclass |
|
177 or QTextCodec and implement at least the following methods: |
|
178 <dl> |
|
179 <dt>\c const char* name() const |
|
180 <dd>Return the official name for the encoding. |
|
181 <dt>\c int mibEnum() const |
|
182 <dd>Return the MIB enum for the encoding if it is listed in the |
|
183 <a href=ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets> |
|
184 IANA character-sets encoding file</a>. |
|
185 </dl> |
|
186 If the encoding is multi-byte then it will have "state"; that is, |
|
187 the interpretation of some bytes will be dependent on some preceding |
|
188 bytes. For such an encoding, you will need to implement |
|
189 <dl> |
|
190 <dt> \c QTextDecoder* makeDecoder() const |
|
191 <dd>Return a QTextDecoder that remembers incomplete multibyte |
|
192 sequence prefixes or other required state. |
|
193 </dl> |
|
194 If the encoding does \e not require state, you should implement: |
|
195 <dl> |
|
196 <dt> \c QString toUnicode(const char* chars, int len) const |
|
197 <dd>Converts \e len characters from \e chars to Unicode. |
|
198 </dl> |
|
199 The base QTextCodec class has default implementations of the above |
|
200 two functions, <i>but they are mutually recursive</i>, so you must |
|
201 re-implement at least one of them, or both for improved efficiency. |
|
202 |
|
203 For conversion from Unicode to 8-bit encodings, it is rarely necessary |
|
204 to maintain state. However, two functions similar to the two above |
|
205 are used for encoding: |
|
206 <dl> |
|
207 <dt> \c QTextEncoder* makeEncoder() const |
|
208 <dd>Return a QTextDecoder. |
|
209 <dt> \c QCString fromUnicode(const QString& uc, int& lenInOut ) const; |
|
210 <dd>Converts \e lenInOut characters (of type QChar) from the start |
|
211 of the string \a uc, returning a QCString result, and also returning |
|
212 the \link QCString::length() length\endlink |
|
213 of the result in lenInOut. |
|
214 </dl> |
|
215 Again, these are mutually recursive so only one needs to be implemented, |
|
216 or both if better efficiency is possible. |
|
217 |
|
218 Finally, you must implement: |
|
219 <dl> |
|
220 <dt> \c int heuristicContentMatch(const char* chars, int len) const |
|
221 <dd>Gives a value indicating how likely it is that \e len characters |
|
222 from \e chars are in the encoding. |
|
223 </dl> |
|
224 A good model for this function is the |
|
225 QWindowsLocalCodec::heuristicContentMatch function found in the Qt sources. |
|
226 |
|
227 A QTextCodec subclass might have improved performance if you also |
|
228 re-implement: |
|
229 <dl> |
|
230 <dt> \c bool canEncode( QChar ) const |
|
231 <dd>Test if a Unicode character can be encoded. |
|
232 <dt> \c bool canEncode( const QString& ) const |
|
233 <dd>Test if a string of Unicode characters can be encoded. |
|
234 <dt> \c int heuristicNameMatch(const char* hint) const |
|
235 <dd>Test if a possibly non-standard name is referring to the codec. |
|
236 </dl> |
|
237 */ |
|
238 |
|
239 |
|
240 /*! |
|
241 Constructs a QTextCodec, making it of highest precedence. |
|
242 The QTextCodec should always be constructed on the heap |
|
243 (with new), and once constructed it becomes the responsibility |
|
244 of Qt to delete it (which is done at QApplication destruction). |
|
245 */ |
|
246 QTextCodec::QTextCodec() |
|
247 { |
|
248 setup(); |
|
249 all->insert(0,this); |
|
250 } |
|
251 |
|
252 |
|
253 /*! |
|
254 Destructs the QTextCodec. Note that you should not delete |
|
255 codecs yourself - once created they become the responsibility |
|
256 of Qt to delete. |
|
257 */ |
|
258 QTextCodec::~QTextCodec() |
|
259 { |
|
260 if ( !destroying_is_ok ) |
|
261 qWarning("QTextCodec::~QTextCodec() called by application"); |
|
262 if ( all ) |
|
263 all->remove( this ); |
|
264 } |
|
265 |
|
266 |
|
267 /*! |
|
268 Returns a value indicating how likely this decoder is |
|
269 for decoding some format that has the given name. |
|
270 |
|
271 A good match returns a positive number around |
|
272 the length of the string. A bad match is negative. |
|
273 |
|
274 The default implementation calls simpleHeuristicNameMatch() |
|
275 with the name of the codec. |
|
276 */ |
|
277 int QTextCodec::heuristicNameMatch(const char* hint) const |
|
278 { |
|
279 return simpleHeuristicNameMatch(name(),hint); |
|
280 } |
|
281 |
|
282 |
|
283 // returns a string cotnaining the letters and numbers from input, |
|
284 // with a space separating run of a character class. e.g. "iso8859-1" |
|
285 // becomes "iso 8859 1" |
|
286 static QString lettersAndNumbers( const char * input ) |
|
287 { |
|
288 QString result; |
|
289 QChar c; |
|
290 |
|
291 while( input && *input ) { |
|
292 c = *input; |
|
293 if ( c.isLetter() || c.isNumber() ) |
|
294 result += c.lower(); |
|
295 if ( input[1] ) { |
|
296 // add space at character class transition, except |
|
297 // transition from upper-case to lower-case letter |
|
298 QChar n( input[1] ); |
|
299 if ( c.isLetter() && n.isLetter() ) { |
|
300 if ( c == c.lower() && n == n.upper() ) |
|
301 result += ' '; |
|
302 } else if ( c.category() != n.category() ) { |
|
303 result += ' '; |
|
304 } |
|
305 } |
|
306 input++; |
|
307 } |
|
308 return result.simplifyWhiteSpace(); |
|
309 } |
|
310 |
|
311 /*! |
|
312 A simple utility function for heuristicNameMatch() - it |
|
313 does some very minor character-skipping |
|
314 so that almost-exact matches score high. |
|
315 */ |
|
316 int QTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint) |
|
317 { |
|
318 // if they're the same, return a perfect score. |
|
319 if ( name && hint && qstrcmp( name, hint ) == 0 ) |
|
320 return qstrlen( hint ); |
|
321 |
|
322 // if the letters and numbers are the same, we have an "almost" |
|
323 // perfect match. |
|
324 QString h( lettersAndNumbers( hint ) ); |
|
325 QString n( lettersAndNumbers( name ) ); |
|
326 if ( h == n ) |
|
327 return qstrlen( hint )-1; |
|
328 |
|
329 if ( h.stripWhiteSpace() == n.stripWhiteSpace() ) |
|
330 return qstrlen( hint )-2; |
|
331 |
|
332 // could do some more here, but I don't think it's worth it |
|
333 |
|
334 return 0; |
|
335 } |
|
336 |
|
337 |
|
338 /*! |
|
339 Returns the QTextCodec \a i places from the more recently |
|
340 inserted, or NULL if there is no such QTextCodec. Thus, |
|
341 codecForIndex(0) returns the most recently created QTextCodec. |
|
342 */ |
|
343 QTextCodec* QTextCodec::codecForIndex(int i) |
|
344 { |
|
345 setup(); |
|
346 return (uint)i >= all->count() ? 0 : all->at(i); |
|
347 } |
|
348 |
|
349 |
|
350 /*! |
|
351 Returns the QTextCodec which matches the |
|
352 \link QTextCodec::mibEnum() MIBenum\endlink \a mib. |
|
353 */ |
|
354 QTextCodec* QTextCodec::codecForMib(int mib) |
|
355 { |
|
356 setup(); |
|
357 QListIterator<QTextCodec> i(*all); |
|
358 QTextCodec* result; |
|
359 for ( ; (result=i); ++i ) { |
|
360 if ( result->mibEnum()==mib ) |
|
361 break; |
|
362 } |
|
363 return result; |
|
364 } |
|
365 |
|
366 |
|
367 |
|
368 |
|
369 |
|
370 #ifdef _OS_WIN32_ |
|
371 class QWindowsLocalCodec: public QTextCodec |
|
372 { |
|
373 public: |
|
374 QWindowsLocalCodec(); |
|
375 ~QWindowsLocalCodec(); |
|
376 |
|
377 QString toUnicode(const char* chars, int len) const; |
|
378 QCString fromUnicode(const QString& uc, int& lenInOut ) const; |
|
379 |
|
380 const char* name() const; |
|
381 int mibEnum() const; |
|
382 |
|
383 int heuristicContentMatch(const char* chars, int len) const; |
|
384 }; |
|
385 |
|
386 QWindowsLocalCodec::QWindowsLocalCodec() |
|
387 { |
|
388 } |
|
389 |
|
390 QWindowsLocalCodec::~QWindowsLocalCodec() |
|
391 { |
|
392 } |
|
393 |
|
394 |
|
395 QString QWindowsLocalCodec::toUnicode(const char* chars, int len) const |
|
396 { |
|
397 if ( len == 1 && chars ) { // Optimization; avoids allocation |
|
398 char c[2]; |
|
399 c[0] = *chars; |
|
400 c[1] = 0; |
|
401 return qt_winMB2QString( c, 2 ); |
|
402 } |
|
403 if ( len < 0 ) |
|
404 return qt_winMB2QString( chars ); |
|
405 QCString s(chars,len+1); |
|
406 return qt_winMB2QString(s); |
|
407 } |
|
408 |
|
409 QCString QWindowsLocalCodec::fromUnicode(const QString& uc, int& lenInOut ) const |
|
410 { |
|
411 QCString r = qt_winQString2MB( uc, lenInOut ); |
|
412 lenInOut = r.length(); |
|
413 return r; |
|
414 } |
|
415 |
|
416 |
|
417 const char* QWindowsLocalCodec::name() const |
|
418 { |
|
419 return "System"; |
|
420 } |
|
421 |
|
422 int QWindowsLocalCodec::mibEnum() const |
|
423 { |
|
424 return 0; |
|
425 } |
|
426 |
|
427 |
|
428 int QWindowsLocalCodec::heuristicContentMatch(const char* chars, int len) const |
|
429 { |
|
430 // ### Not a bad default implementation? |
|
431 QString t = toUnicode(chars,len); |
|
432 int l = t.length(); |
|
433 QCString mb = fromUnicode(t,l); |
|
434 int i=0; |
|
435 while ( i < len ) |
|
436 if ( chars[i] == mb[i] ) |
|
437 i++; |
|
438 return i; |
|
439 } |
|
440 |
|
441 #else |
|
442 |
|
443 /* locale names mostly copied from XFree86 */ |
|
444 static const char * const iso8859_2locales[] = { |
|
445 "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr", |
|
446 "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro", |
|
447 "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk", |
|
448 "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 }; |
|
449 |
|
450 static const char * const iso8859_3locales[] = { |
|
451 "eo", 0 }; |
|
452 |
|
453 static const char * const iso8859_4locales[] = { |
|
454 "ee", "ee_EE", "lt", "lt_LT", "lv", "lv_LV", 0 }; |
|
455 |
|
456 static const char * const iso8859_5locales[] = { |
|
457 "bg", "bg_BG", "bulgarian", "mk", "mk_MK", |
|
458 "sp", "sp_YU", 0 }; |
|
459 |
|
460 static const char * const iso8859_6locales[] = { |
|
461 "ar_AA", "ar_SA", "arabic", 0 }; |
|
462 |
|
463 static const char * const iso8859_7locales[] = { |
|
464 "el", "el_GR", "greek", 0 }; |
|
465 |
|
466 static const char * const iso8859_8locales[] = { |
|
467 "hebrew", "he", "he_IL", "iw", "iw_IL", 0 }; |
|
468 |
|
469 static const char * const iso8859_9locales[] = { |
|
470 "tr", "tr_TR", "turkish", 0 }; |
|
471 |
|
472 static const char * const iso8859_15locales[] = { |
|
473 "fr", "fi", "french", "finnish", "et", "et_EE", 0 }; |
|
474 |
|
475 static const char * const koi8_ulocales[] = { |
|
476 "uk", "uk_UA", "ru_UA", "ukrainian", 0 }; |
|
477 |
|
478 static const char * const tis_620locales[] = { |
|
479 "th", "th_TH", "thai", 0 }; |
|
480 |
|
481 |
|
482 static bool try_locale_list( const char * const locale[], const char * lang ) |
|
483 { |
|
484 int i; |
|
485 for( i=0; locale[i] && qstrcmp(locale[i], lang); i++ ) |
|
486 { } |
|
487 return locale[i] != 0; |
|
488 } |
|
489 |
|
490 // For the probably_koi8_locales we have to look. the standard says |
|
491 // these are 8859-5, but almsot all Russion users uses KOI8-R and |
|
492 // incorrectly set $LANG to ru_RU. We'll check tolower() to see what |
|
493 // tolower() thinks ru_RU means. |
|
494 |
|
495 // If you read the history, it seems that many Russians blame ISO and |
|
496 // Peristroika for the confusion. |
|
497 // |
|
498 // The real bug is that some programs break if the user specifies |
|
499 // ru_RU.KOI8-R. |
|
500 |
|
501 static const char * const probably_koi8_rlocales[] = { |
|
502 "ru", "ru_SU", "ru_RU", "russian", 0 }; |
|
503 |
|
504 // this means ANY of these locale aliases. if they're aliases for |
|
505 // different locales, the code breaks. |
|
506 static QTextCodec * ru_RU_codec = 0; |
|
507 |
|
508 static QTextCodec * ru_RU_hack( const char * i ) { |
|
509 if ( ! ru_RU_codec ) { |
|
510 QCString origlocale = setlocale( LC_CTYPE, i ); |
|
511 // unicode koi8r latin5 name |
|
512 // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU |
|
513 // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU |
|
514 int latin5 = tolower( 0xCE ); |
|
515 int koi8r = tolower( 0xE0 ); |
|
516 if ( koi8r == 0xC0 && latin5 != 0xEE ) { |
|
517 ru_RU_codec = QTextCodec::codecForName( "KOI8-R" ); |
|
518 } else if ( koi8r != 0xC0 && latin5 == 0xEE ) { |
|
519 ru_RU_codec = QTextCodec::codecForName( "ISO 8859-5" ); |
|
520 } else { |
|
521 // something else again... let's assume... *throws dice* |
|
522 ru_RU_codec = QTextCodec::codecForName( "KOI8-R" ); |
|
523 qWarning( "QTextCodec: using KOI8-R, probe failed (%02x %02x %s)", |
|
524 koi8r, latin5, i ); |
|
525 } |
|
526 setlocale( LC_CTYPE, origlocale.data() ); |
|
527 } |
|
528 return ru_RU_codec; |
|
529 } |
|
530 |
|
531 #endif |
|
532 |
|
533 static QTextCodec * localeMapper = 0; |
|
534 |
|
535 void qt_set_locale_codec( QTextCodec *codec ) |
|
536 { |
|
537 localeMapper = codec; |
|
538 } |
|
539 |
|
540 /*! Returns a pointer to the codec most suitable for this locale. */ |
|
541 |
|
542 QTextCodec* QTextCodec::codecForLocale() |
|
543 { |
|
544 if ( localeMapper ) |
|
545 return localeMapper; |
|
546 |
|
547 setup(); |
|
548 |
|
549 #ifdef _OS_WIN32_ |
|
550 localeMapper = new QWindowsLocalCodec; |
|
551 #else |
|
552 // Very poorly defined and followed standards causes lots of code |
|
553 // to try to get all the cases... |
|
554 |
|
555 char * lang = qstrdup( getenv("LANG") ); |
|
556 |
|
557 char * p = lang ? strchr( lang, '.' ) : 0; |
|
558 if ( !p || *p != '.' ) { |
|
559 // Some versions of setlocale return encoding, others not. |
|
560 char *ctype = qstrdup( setlocale( LC_CTYPE, 0 ) ); |
|
561 // Some Linux distributions have broken locales which will return |
|
562 // "C" for LC_CTYPE |
|
563 if ( qstrcmp( ctype, "C" ) == 0 ) { |
|
564 delete [] ctype; |
|
565 } else { |
|
566 if ( lang ) |
|
567 delete [] lang; |
|
568 lang = ctype; |
|
569 p = lang ? strchr( lang, '.' ) : 0; |
|
570 } |
|
571 } |
|
572 |
|
573 if( p && *p == '.' ) { |
|
574 // if there is an encoding and we don't know it, we return 0 |
|
575 // User knows what they are doing. Codecs will believe them. |
|
576 localeMapper = codecForName( lang ); |
|
577 if ( !localeMapper ) { |
|
578 // Use or codec disagree. |
|
579 localeMapper = codecForName( p+1 ); |
|
580 } |
|
581 } |
|
582 if ( !localeMapper || !(p && *p == '.') ) { |
|
583 // if there is none, we default to 8859-1 |
|
584 // We could perhaps default to 8859-15. |
|
585 if ( try_locale_list( iso8859_2locales, lang ) ) |
|
586 localeMapper = codecForName( "ISO 8859-2" ); |
|
587 else if ( try_locale_list( iso8859_3locales, lang ) ) |
|
588 localeMapper = codecForName( "ISO 8859-3" ); |
|
589 else if ( try_locale_list( iso8859_4locales, lang ) ) |
|
590 localeMapper = codecForName( "ISO 8859-4" ); |
|
591 else if ( try_locale_list( iso8859_5locales, lang ) ) |
|
592 localeMapper = codecForName( "ISO 8859-5" ); |
|
593 else if ( try_locale_list( iso8859_6locales, lang ) ) |
|
594 localeMapper = codecForName( "ISO 8859-6-I" ); |
|
595 else if ( try_locale_list( iso8859_7locales, lang ) ) |
|
596 localeMapper = codecForName( "ISO 8859-7" ); |
|
597 else if ( try_locale_list( iso8859_8locales, lang ) ) |
|
598 localeMapper = codecForName( "ISO 8859-8-I" ); |
|
599 else if ( try_locale_list( iso8859_9locales, lang ) ) |
|
600 localeMapper = codecForName( "ISO 8859-9" ); |
|
601 else if ( try_locale_list( iso8859_15locales, lang ) ) |
|
602 localeMapper = codecForName( "ISO 8859-15" ); |
|
603 else if ( try_locale_list( tis_620locales, lang ) ) |
|
604 localeMapper = codecForName( "ISO 8859-11" ); |
|
605 else if ( try_locale_list( koi8_ulocales, lang ) ) |
|
606 localeMapper = codecForName( "KOI8-U" ); |
|
607 else if ( try_locale_list( probably_koi8_rlocales, lang ) ) |
|
608 localeMapper = ru_RU_hack( lang ); |
|
609 else if (!lang || !(localeMapper = codecForName(lang) )) |
|
610 localeMapper = codecForName( "ISO 8859-1" ); |
|
611 } |
|
612 delete[] lang; |
|
613 #endif |
|
614 |
|
615 return localeMapper; |
|
616 } |
|
617 |
|
618 |
|
619 /*! |
|
620 Searches all installed QTextCodec objects, returning the one |
|
621 which best matches given name. Returns NULL if no codec has |
|
622 a match closeness above \a accuracy. |
|
623 |
|
624 \sa heuristicNameMatch() |
|
625 */ |
|
626 QTextCodec* QTextCodec::codecForName(const char* hint, int accuracy) |
|
627 { |
|
628 setup(); |
|
629 QListIterator<QTextCodec> i(*all); |
|
630 QTextCodec* result = 0; |
|
631 int best=accuracy; |
|
632 for ( QTextCodec* cursor; (cursor=i); ++i ) { |
|
633 int s = cursor->heuristicNameMatch(hint); |
|
634 if ( s > best ) { |
|
635 best = s; |
|
636 result = cursor; |
|
637 } |
|
638 } |
|
639 return result; |
|
640 } |
|
641 |
|
642 |
|
643 /*! |
|
644 Searches all installed QTextCodec objects, returning the one |
|
645 which most recognizes the given content. May return 0. |
|
646 |
|
647 Note that this is often a poor choice, since character |
|
648 encodings often use most of the available character sequences, |
|
649 and so only by linguistic analysis could a true match be made. |
|
650 |
|
651 \sa heuristicContentMatch() |
|
652 */ |
|
653 QTextCodec* QTextCodec::codecForContent(const char* chars, int len) |
|
654 { |
|
655 setup(); |
|
656 QListIterator<QTextCodec> i(*all); |
|
657 QTextCodec* result = 0; |
|
658 int best=0; |
|
659 for ( QTextCodec* cursor; (cursor=i); ++i ) { |
|
660 int s = cursor->heuristicContentMatch(chars,len); |
|
661 if ( s > best ) { |
|
662 best = s; |
|
663 result = cursor; |
|
664 } |
|
665 } |
|
666 return result; |
|
667 } |
|
668 |
|
669 |
|
670 /*! |
|
671 \fn const char* QTextCodec::name() const |
|
672 Subclasses of QTextCodec must reimplement this function. It returns |
|
673 the name of the encoding supported by the subclass. When choosing |
|
674 a name for an encoding, consider these points: |
|
675 <ul> |
|
676 <li>On X11, heuristicNameMatch( const char * hint ) |
|
677 is used to test if a the QTextCodec |
|
678 can convert between Unicode and the encoding of a font |
|
679 with encoding \e hint, such as "iso8859-1" for Latin-1 fonts, |
|
680 "koi8-r" for Russian KOI8 fonts. |
|
681 The default algorithm of heuristicNameMatch() uses name(). |
|
682 <li>Some applications may use this function to present |
|
683 encodings to the end user. |
|
684 </ul> |
|
685 */ |
|
686 |
|
687 /*! |
|
688 \fn int QTextCodec::mibEnum() const |
|
689 |
|
690 Subclasses of QTextCodec must reimplement this function. It returns the |
|
691 MIBenum (see |
|
692 <a href="ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets"> |
|
693 the IANA character-sets encoding file</a> for more information). |
|
694 It is important that each QTextCodec subclass return the correct unique |
|
695 value for this function. |
|
696 */ |
|
697 |
|
698 |
|
699 /*! |
|
700 \fn int QTextCodec::heuristicContentMatch(const char* chars, int len) const |
|
701 |
|
702 Subclasses of QTextCodec must reimplement this function. It examines |
|
703 the first \a len bytes of \a chars and returns a value indicating how |
|
704 likely it is that the string is a prefix of text encoded in the |
|
705 encoding of the subclass. Any negative return value indicates that the text |
|
706 is detectably not in the encoding (eg. it contains undefined characters). |
|
707 A return value of 0 indicates that the text should be decoded with this |
|
708 codec rather than as ASCII, but there |
|
709 is no particular evidence. The value should range up to \a len. Thus, |
|
710 most decoders will return -1, 0, or -\a len. |
|
711 |
|
712 The characters are not null terminated. |
|
713 |
|
714 \sa codecForContent(). |
|
715 */ |
|
716 |
|
717 |
|
718 /*! |
|
719 Creates a QTextDecoder which stores enough state to decode chunks |
|
720 of char* data to create chunks of Unicode data. The default implementation |
|
721 creates a stateless decoder, which is sufficient for only the simplest |
|
722 encodings where each byte corresponds to exactly one Unicode character. |
|
723 |
|
724 The caller is responsible for deleting the returned object. |
|
725 */ |
|
726 QTextDecoder* QTextCodec::makeDecoder() const |
|
727 { |
|
728 return new QTextStatelessDecoder(this); |
|
729 } |
|
730 |
|
731 |
|
732 /*! |
|
733 Creates a QTextEncoder which stores enough state to encode chunks |
|
734 of Unicode data as char* data. The default implementation |
|
735 creates a stateless encoder, which is sufficient for only the simplest |
|
736 encodings where each Unicode character corresponds to exactly one char. |
|
737 |
|
738 The caller is responsible for deleting the returned object. |
|
739 */ |
|
740 QTextEncoder* QTextCodec::makeEncoder() const |
|
741 { |
|
742 return new QTextStatelessEncoder(this); |
|
743 } |
|
744 |
|
745 |
|
746 /*! |
|
747 Subclasses of QTextCodec must reimplement this function or |
|
748 makeDecoder(). It converts the first \a len characters of \a chars |
|
749 to Unicode. |
|
750 |
|
751 The default implementation makes a decoder with makeDecoder() and |
|
752 converts the input with that. Note that the default makeDecoder() |
|
753 implementation makes a decoder that simply calls |
|
754 this function, hence subclasses \e must reimplement one function or |
|
755 the other to avoid infinite recursion. |
|
756 */ |
|
757 QString QTextCodec::toUnicode(const char* chars, int len) const |
|
758 { |
|
759 QTextDecoder* i = makeDecoder(); |
|
760 QString result = i->toUnicode(chars,len); |
|
761 delete i; |
|
762 return result; |
|
763 } |
|
764 |
|
765 |
|
766 /*! |
|
767 Subclasses of QTextCodec must reimplement either this function or |
|
768 makeEncoder(). It converts the first \a lenInOut characters of \a |
|
769 uc from Unicode to the encoding of the subclass. If \a lenInOut |
|
770 is negative or too large, the length of \a uc is used instead. |
|
771 |
|
772 The value returned is the property of the caller, which is |
|
773 responsible for deleting it with "delete []". The length of the |
|
774 resulting Unicode character sequence is returned in \a lenInOut. |
|
775 |
|
776 The default implementation makes an encoder with makeEncoder() and |
|
777 converts the input with that. Note that the default makeEncoder() |
|
778 implementation makes an encoder that simply calls |
|
779 this function, hence subclasses \e must reimplement one function or |
|
780 the other to avoid infinite recursion. |
|
781 */ |
|
782 |
|
783 QCString QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const |
|
784 { |
|
785 QTextEncoder* i = makeEncoder(); |
|
786 QCString result = i->fromUnicode(uc, lenInOut); |
|
787 delete i; |
|
788 return result; |
|
789 } |
|
790 |
|
791 /*! |
|
792 \overload QCString QTextCodec::fromUnicode(const QString& uc) const |
|
793 */ |
|
794 QCString QTextCodec::fromUnicode(const QString& uc) const |
|
795 { |
|
796 int l = uc.length(); |
|
797 return fromUnicode(uc,l); |
|
798 } |
|
799 |
|
800 /*! |
|
801 \overload QString QTextCodec::toUnicode(const QByteArray& a, int len) const |
|
802 */ |
|
803 QString QTextCodec::toUnicode(const QByteArray& a, int len) const |
|
804 { |
|
805 int l = a.size(); |
|
806 if( l > 0 && a.data()[l - 1] == '\0' ) l--; |
|
807 l = QMIN( l, len ); |
|
808 return toUnicode( a.data(), l ); |
|
809 } |
|
810 |
|
811 /*! |
|
812 \overload QString QTextCodec::toUnicode(const QByteArray& a) const |
|
813 */ |
|
814 QString QTextCodec::toUnicode(const QByteArray& a) const |
|
815 { |
|
816 int l = a.size(); |
|
817 if( l > 0 && a.data()[l - 1] == '\0' ) l--; |
|
818 return toUnicode( a.data(), l ); |
|
819 } |
|
820 |
|
821 /*! |
|
822 \overload QString QTextCodec::toUnicode(const char* chars) const |
|
823 */ |
|
824 QString QTextCodec::toUnicode(const char* chars) const |
|
825 { |
|
826 return toUnicode(chars,qstrlen(chars)); |
|
827 } |
|
828 |
|
829 /*! |
|
830 Returns TRUE if the unicode character \a ch can be fully encoded |
|
831 with this codec. The default implementation tests if the result of |
|
832 toUnicode(fromUnicode(ch)) is the original \a ch. Subclasses may be |
|
833 able to improve the efficiency. |
|
834 */ |
|
835 bool QTextCodec::canEncode( QChar ch ) const |
|
836 { |
|
837 return toUnicode(fromUnicode(ch)) == ch; |
|
838 } |
|
839 |
|
840 /*! |
|
841 Returns TRUE if the unicode string \a s can be fully encoded |
|
842 with this codec. The default implementation tests if the result of |
|
843 toUnicode(fromUnicode(s)) is the original \a s. Subclasses may be |
|
844 able to improve the efficiency. |
|
845 */ |
|
846 bool QTextCodec::canEncode( const QString& s ) const |
|
847 { |
|
848 return toUnicode(fromUnicode(s)) == s; |
|
849 } |
|
850 |
|
851 |
|
852 |
|
853 /*! |
|
854 \class QTextEncoder qtextcodec.h |
|
855 \brief State-based encoder |
|
856 |
|
857 A QTextEncoder converts Unicode into another format, remembering |
|
858 any state that is required between calls. |
|
859 |
|
860 \sa QTextCodec::makeEncoder() |
|
861 */ |
|
862 |
|
863 /*! |
|
864 Destructs the encoder. |
|
865 */ |
|
866 QTextEncoder::~QTextEncoder() |
|
867 { |
|
868 } |
|
869 /*! |
|
870 \fn QCString QTextEncoder::fromUnicode(const QString& uc, int& lenInOut) |
|
871 |
|
872 Converts \a lenInOut characters (not bytes) from \a uc, producing |
|
873 a QCString. \a lenInOut will also be set to the |
|
874 \link QCString::length() length\endlink of the result (in bytes). |
|
875 |
|
876 The encoder is free to record state to use when subsequent calls are |
|
877 made to this function (for example, it might change modes with escape |
|
878 sequences if needed during the encoding of one string, then assume that |
|
879 mode applies when a subsequent call begins). |
|
880 */ |
|
881 |
|
882 /*! |
|
883 \class QTextDecoder qtextcodec.h |
|
884 \brief State-based decoder |
|
885 |
|
886 A QTextEncoder converts a text format into Unicode, remembering |
|
887 any state that is required between calls. |
|
888 |
|
889 \sa QTextCodec::makeEncoder() |
|
890 */ |
|
891 |
|
892 |
|
893 /*! |
|
894 Destructs the decoder. |
|
895 */ |
|
896 QTextDecoder::~QTextDecoder() |
|
897 { |
|
898 } |
|
899 |
|
900 /*! |
|
901 \fn QString QTextDecoder::toUnicode(const char* chars, int len) |
|
902 |
|
903 Converts the first \a len bytes at \a chars to Unicode, returning the |
|
904 result. |
|
905 |
|
906 If not all characters are used (eg. only part of a multi-byte |
|
907 encoding is at the end of the characters), the decoder remembers |
|
908 enough state to continue with the next call to this function. |
|
909 */ |
|
910 |
|
911 #define CHAINED 0xffff |
|
912 |
|
913 struct QMultiByteUnicodeTable { |
|
914 // If multibyte, ignore unicode and index into multibyte |
|
915 // with the next character. |
|
916 QMultiByteUnicodeTable() : unicode(0xfffd), multibyte(0) { } |
|
917 |
|
918 ~QMultiByteUnicodeTable() |
|
919 { |
|
920 if ( multibyte ) |
|
921 delete [] multibyte; |
|
922 } |
|
923 |
|
924 ushort unicode; |
|
925 QMultiByteUnicodeTable* multibyte; |
|
926 }; |
|
927 |
|
928 #ifndef QT_NO_CODECS |
|
929 static int getByte(char* &cursor) |
|
930 { |
|
931 int byte = 0; |
|
932 if ( *cursor ) { |
|
933 if ( cursor[1] == 'x' ) |
|
934 byte = strtol(cursor+2,&cursor,16); |
|
935 else if ( cursor[1] == 'd' ) |
|
936 byte = strtol(cursor+2,&cursor,10); |
|
937 else |
|
938 byte = strtol(cursor+2,&cursor,8); |
|
939 } |
|
940 return byte&0xff; |
|
941 } |
|
942 |
|
943 class QTextCodecFromIOD; |
|
944 |
|
945 class QTextCodecFromIODDecoder : public QTextDecoder { |
|
946 const QTextCodecFromIOD* codec; |
|
947 QMultiByteUnicodeTable* mb; |
|
948 public: |
|
949 QTextCodecFromIODDecoder(const QTextCodecFromIOD* c); |
|
950 QString toUnicode(const char* chars, int len); |
|
951 }; |
|
952 |
|
953 class QTextCodecFromIOD : public QTextCodec { |
|
954 friend class QTextCodecFromIODDecoder; |
|
955 |
|
956 QCString n; |
|
957 |
|
958 // If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multibyte, |
|
959 // use from_unicode_page_multibyte[row()][cell()] as string. |
|
960 char** from_unicode_page; |
|
961 char*** from_unicode_page_multibyte; |
|
962 char unkn; |
|
963 |
|
964 // Only one of these is used |
|
965 ushort* to_unicode; |
|
966 QMultiByteUnicodeTable* to_unicode_multibyte; |
|
967 int max_bytes_per_char; |
|
968 QStrList aliases; |
|
969 |
|
970 bool stateless() const { return !to_unicode_multibyte; } |
|
971 |
|
972 public: |
|
973 QTextCodecFromIOD(QIODevice* iod) |
|
974 { |
|
975 from_unicode_page = 0; |
|
976 to_unicode_multibyte = 0; |
|
977 to_unicode = 0; |
|
978 from_unicode_page_multibyte = 0; |
|
979 max_bytes_per_char = 1; |
|
980 |
|
981 const int maxlen=100; |
|
982 char line[maxlen]; |
|
983 char esc='\\'; |
|
984 char comm='%'; |
|
985 bool incmap = FALSE; |
|
986 while (iod->readLine(line,maxlen) > 0) { |
|
987 if (0==qstrnicmp(line,"<code_set_name>",15)) |
|
988 n = line+15; |
|
989 else if (0==qstrnicmp(line,"<escape_char> ",14)) |
|
990 esc = line[14]; |
|
991 else if (0==qstrnicmp(line,"<comment_char> ",15)) |
|
992 comm = line[15]; |
|
993 else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) { |
|
994 aliases.append(line+8); |
|
995 } else if (0==qstrnicmp(line,"CHARMAP",7)) { |
|
996 if (!from_unicode_page) { |
|
997 from_unicode_page = new char*[256]; |
|
998 for (int i=0; i<256; i++) |
|
999 from_unicode_page[i]=0; |
|
1000 } |
|
1001 if (!to_unicode) { |
|
1002 to_unicode = new ushort[256]; |
|
1003 } |
|
1004 incmap = TRUE; |
|
1005 } else if (0==qstrnicmp(line,"END CHARMAP",11)) |
|
1006 break; |
|
1007 else if (incmap) { |
|
1008 char* cursor = line; |
|
1009 int byte,unicode=-1; |
|
1010 ushort* mb_unicode=0; |
|
1011 const int maxmb=8; // more -> we'll need to improve datastructures |
|
1012 char mb[maxmb+1]; |
|
1013 int nmb=0; |
|
1014 |
|
1015 while (*cursor) { |
|
1016 if (cursor[0]=='<' && cursor[1]=='U' && |
|
1017 cursor[2]>='0' && cursor[2]<='9' && |
|
1018 cursor[3]>='0' && cursor[3]<='9') { |
|
1019 |
|
1020 unicode = strtol(cursor+2,&cursor,16); |
|
1021 |
|
1022 } else if (*cursor==esc) { |
|
1023 |
|
1024 byte = getByte(cursor); |
|
1025 |
|
1026 if ( *cursor == esc ) { |
|
1027 if ( !to_unicode_multibyte ) { |
|
1028 to_unicode_multibyte = |
|
1029 new QMultiByteUnicodeTable[256]; |
|
1030 for (int i=0; i<256; i++) { |
|
1031 to_unicode_multibyte[i].unicode = |
|
1032 to_unicode[i]; |
|
1033 to_unicode_multibyte[i].multibyte = 0; |
|
1034 } |
|
1035 delete [] to_unicode; |
|
1036 to_unicode = 0; |
|
1037 } |
|
1038 QMultiByteUnicodeTable* mbut = |
|
1039 to_unicode_multibyte+byte; |
|
1040 mb[nmb++] = byte; |
|
1041 while ( nmb < maxmb && *cursor == esc ) { |
|
1042 // Always at least once |
|
1043 |
|
1044 mbut->unicode = CHAINED; |
|
1045 byte = getByte(cursor); |
|
1046 mb[nmb++] = byte; |
|
1047 if (!mbut->multibyte) { |
|
1048 mbut->multibyte = |
|
1049 new QMultiByteUnicodeTable[256]; |
|
1050 } |
|
1051 mbut = mbut->multibyte+byte; |
|
1052 mb_unicode = & mbut->unicode; |
|
1053 } |
|
1054 |
|
1055 if ( nmb > max_bytes_per_char ) |
|
1056 max_bytes_per_char = nmb; |
|
1057 } |
|
1058 } else { |
|
1059 cursor++; |
|
1060 } |
|
1061 } |
|
1062 |
|
1063 if (unicode >= 0 && unicode <= 0xffff) |
|
1064 { |
|
1065 QChar ch((ushort)unicode); |
|
1066 if (!from_unicode_page[ch.row()]) { |
|
1067 from_unicode_page[ch.row()] = new char[256]; |
|
1068 for (int i=0; i<256; i++) |
|
1069 from_unicode_page[ch.row()][i]=0; |
|
1070 } |
|
1071 if ( mb_unicode ) { |
|
1072 from_unicode_page[ch.row()][ch.cell()] = 0; |
|
1073 if (!from_unicode_page_multibyte) { |
|
1074 from_unicode_page_multibyte = new char**[256]; |
|
1075 for (int i=0; i<256; i++) |
|
1076 from_unicode_page_multibyte[i]=0; |
|
1077 } |
|
1078 if (!from_unicode_page_multibyte[ch.row()]) { |
|
1079 from_unicode_page_multibyte[ch.row()] = new char*[256]; |
|
1080 for (int i=0; i<256; i++) |
|
1081 from_unicode_page_multibyte[ch.row()][i] = 0; |
|
1082 } |
|
1083 mb[nmb++] = 0; |
|
1084 from_unicode_page_multibyte[ch.row()][ch.cell()] |
|
1085 = qstrdup(mb); |
|
1086 *mb_unicode = unicode; |
|
1087 } else { |
|
1088 from_unicode_page[ch.row()][ch.cell()] = (char)byte; |
|
1089 if ( to_unicode ) |
|
1090 to_unicode[byte] = unicode; |
|
1091 else |
|
1092 to_unicode_multibyte[byte].unicode = unicode; |
|
1093 } |
|
1094 } else { |
|
1095 } |
|
1096 } |
|
1097 } |
|
1098 n = n.stripWhiteSpace(); |
|
1099 |
|
1100 unkn = '?'; // ##### Might be a bad choice. |
|
1101 } |
|
1102 |
|
1103 ~QTextCodecFromIOD() |
|
1104 { |
|
1105 if ( from_unicode_page ) { |
|
1106 for (int i=0; i<256; i++) |
|
1107 if (from_unicode_page[i]) |
|
1108 delete [] from_unicode_page[i]; |
|
1109 } |
|
1110 if ( from_unicode_page_multibyte ) { |
|
1111 for (int i=0; i<256; i++) |
|
1112 if (from_unicode_page_multibyte[i]) |
|
1113 for (int j=0; j<256; j++) |
|
1114 if (from_unicode_page_multibyte[i][j]) |
|
1115 delete [] from_unicode_page_multibyte[i][j]; |
|
1116 } |
|
1117 if ( to_unicode ) |
|
1118 delete [] to_unicode; |
|
1119 if ( to_unicode_multibyte ) |
|
1120 delete [] to_unicode_multibyte; |
|
1121 } |
|
1122 |
|
1123 bool ok() const |
|
1124 { |
|
1125 return !!from_unicode_page; |
|
1126 } |
|
1127 |
|
1128 QTextDecoder* makeDecoder() const |
|
1129 { |
|
1130 if ( stateless() ) |
|
1131 return QTextCodec::makeDecoder(); |
|
1132 else |
|
1133 return new QTextCodecFromIODDecoder(this); |
|
1134 } |
|
1135 |
|
1136 const char* name() const |
|
1137 { |
|
1138 return n; |
|
1139 } |
|
1140 |
|
1141 int mibEnum() const |
|
1142 { |
|
1143 return 0; // #### Unknown. |
|
1144 } |
|
1145 |
|
1146 int heuristicContentMatch(const char*, int) const |
|
1147 { |
|
1148 return 0; |
|
1149 } |
|
1150 |
|
1151 int heuristicNameMatch(const char* hint) const |
|
1152 { |
|
1153 int bestr = QTextCodec::heuristicNameMatch(hint); |
|
1154 QStrListIterator it(aliases); |
|
1155 char* a; |
|
1156 while ((a=it.current())) { |
|
1157 ++it; |
|
1158 int r = simpleHeuristicNameMatch(a,hint); |
|
1159 if (r > bestr) |
|
1160 bestr = r; |
|
1161 } |
|
1162 return bestr; |
|
1163 } |
|
1164 |
|
1165 QString toUnicode(const char* chars, int len) const |
|
1166 { |
|
1167 const uchar* uchars = (const uchar*)chars; |
|
1168 QString result; |
|
1169 QMultiByteUnicodeTable* multibyte=to_unicode_multibyte; |
|
1170 if ( multibyte ) { |
|
1171 while (len--) { |
|
1172 QMultiByteUnicodeTable& mb = multibyte[*uchars]; |
|
1173 if ( mb.multibyte ) { |
|
1174 // Chained multi-byte |
|
1175 multibyte = mb.multibyte; |
|
1176 } else { |
|
1177 result += QChar(mb.unicode); |
|
1178 multibyte=to_unicode_multibyte; |
|
1179 } |
|
1180 uchars++; |
|
1181 } |
|
1182 } else { |
|
1183 while (len--) |
|
1184 result += QChar(to_unicode[*uchars++]); |
|
1185 } |
|
1186 return result; |
|
1187 } |
|
1188 |
|
1189 QCString fromUnicode(const QString& uc, int& lenInOut) const |
|
1190 { |
|
1191 if (lenInOut > (int)uc.length()) |
|
1192 lenInOut = uc.length(); |
|
1193 int rlen = lenInOut*max_bytes_per_char; |
|
1194 QCString rstr(rlen); |
|
1195 char* cursor = rstr.data(); |
|
1196 char* s=0; |
|
1197 int l = lenInOut; |
|
1198 int lout = 0; |
|
1199 for (int i=0; i<l; i++) { |
|
1200 QChar ch = uc[i]; |
|
1201 if ( ch == QChar::null ) { |
|
1202 // special |
|
1203 *cursor++ = 0; |
|
1204 } else if ( from_unicode_page[ch.row()] && |
|
1205 from_unicode_page[ch.row()][ch.cell()] ) |
|
1206 { |
|
1207 *cursor++ = from_unicode_page[ch.row()][ch.cell()]; |
|
1208 lout++; |
|
1209 } else if ( from_unicode_page_multibyte && |
|
1210 from_unicode_page_multibyte[ch.row()] && |
|
1211 (s=from_unicode_page_multibyte[ch.row()][ch.cell()]) ) |
|
1212 { |
|
1213 while (*s) { |
|
1214 *cursor++ = *s++; |
|
1215 lout++; |
|
1216 } |
|
1217 } else { |
|
1218 *cursor++ = unkn; |
|
1219 lout++; |
|
1220 } |
|
1221 } |
|
1222 *cursor = 0; |
|
1223 lenInOut = lout; |
|
1224 return rstr; |
|
1225 } |
|
1226 }; |
|
1227 |
|
1228 QTextCodecFromIODDecoder::QTextCodecFromIODDecoder(const QTextCodecFromIOD* c) : |
|
1229 codec(c) |
|
1230 { |
|
1231 mb = codec->to_unicode_multibyte; |
|
1232 } |
|
1233 |
|
1234 QString QTextCodecFromIODDecoder::toUnicode(const char* chars, int len) |
|
1235 { |
|
1236 const uchar* uchars = (const uchar*)chars; |
|
1237 QString result; |
|
1238 while (len--) { |
|
1239 QMultiByteUnicodeTable& t = mb[*uchars]; |
|
1240 if ( t.multibyte ) { |
|
1241 // Chained multi-byte |
|
1242 mb = t.multibyte; |
|
1243 } else { |
|
1244 if ( t.unicode ) |
|
1245 result += QChar(t.unicode); |
|
1246 mb=codec->to_unicode_multibyte; |
|
1247 } |
|
1248 uchars++; |
|
1249 } |
|
1250 return result; |
|
1251 } |
|
1252 |
|
1253 /*! |
|
1254 Reads a POSIX2 charmap definition from \a iod. |
|
1255 The parser recognizes the following lines: |
|
1256 <pre> |
|
1257 <code_set_name> <i>name</i> |
|
1258 <escape_char> <i>character</i> |
|
1259 % alias <i>alias</i> |
|
1260 CHARMAP |
|
1261 <<i>token</i>> /x<i>hexbyte</i> <U<i>unicode</i>> ... |
|
1262 <<i>token</i>> /d<i>decbyte</i> <U<i>unicode</i>> ... |
|
1263 <<i>token</i>> /<i>octbyte</i> <U<i>unicode</i>> ... |
|
1264 <<i>token</i>> /<i>any</i>/<i>any</i>... <U<i>unicode</i>> ... |
|
1265 END CHARMAP |
|
1266 </pre> |
|
1267 |
|
1268 The resulting QTextCodec is returned (and also added to the |
|
1269 global list of codecs). The name() of the result is taken |
|
1270 from the code_set_name. |
|
1271 |
|
1272 Note that a codec constructed in this way uses much more memory |
|
1273 and is slower than a hand-written QTextCodec subclass, since |
|
1274 tables in code are in memory shared by all applications simultaneously |
|
1275 using Qt. |
|
1276 |
|
1277 \sa loadCharmapFile() |
|
1278 */ |
|
1279 QTextCodec* QTextCodec::loadCharmap(QIODevice* iod) |
|
1280 { |
|
1281 QTextCodecFromIOD* r = new QTextCodecFromIOD(iod); |
|
1282 if ( !r->ok() ) { |
|
1283 delete r; |
|
1284 r = 0; |
|
1285 } |
|
1286 return r; |
|
1287 } |
|
1288 |
|
1289 /*! |
|
1290 A convenience function for loadCharmap(). |
|
1291 */ |
|
1292 QTextCodec* QTextCodec::loadCharmapFile(QString filename) |
|
1293 { |
|
1294 QFile f(filename); |
|
1295 if (f.open(IO_ReadOnly)) { |
|
1296 QTextCodecFromIOD* r = new QTextCodecFromIOD(&f); |
|
1297 if ( !r->ok() ) |
|
1298 delete r; |
|
1299 else |
|
1300 return r; |
|
1301 } |
|
1302 return 0; |
|
1303 } |
|
1304 #endif //QT_NO_CODECS |
|
1305 |
|
1306 |
|
1307 /*! |
|
1308 Returns a string representing the current language. |
|
1309 */ |
|
1310 |
|
1311 const char* QTextCodec::locale() |
|
1312 { |
|
1313 static QCString lang; |
|
1314 if ( lang.isEmpty() ) { |
|
1315 lang = getenv( "LANG" ); //########Windows?? |
|
1316 if ( lang.isEmpty() ) |
|
1317 lang = "C"; |
|
1318 } |
|
1319 return lang; |
|
1320 } |
|
1321 |
|
1322 |
|
1323 |
|
1324 #ifndef QT_NO_CODECS |
|
1325 |
|
1326 class QSimpleTextCodec: public QTextCodec |
|
1327 { |
|
1328 public: |
|
1329 QSimpleTextCodec( int ); |
|
1330 ~QSimpleTextCodec(); |
|
1331 |
|
1332 QString toUnicode(const char* chars, int len) const; |
|
1333 QCString fromUnicode(const QString& uc, int& lenInOut ) const; |
|
1334 |
|
1335 const char* name() const; |
|
1336 int mibEnum() const; |
|
1337 |
|
1338 int heuristicContentMatch(const char* chars, int len) const; |
|
1339 |
|
1340 int heuristicNameMatch(const char* hint) const; |
|
1341 |
|
1342 private: |
|
1343 int forwardIndex; |
|
1344 }; |
|
1345 |
|
1346 |
|
1347 #define LAST_MIB 2259 |
|
1348 |
|
1349 static struct { |
|
1350 const char * cs; |
|
1351 int mib; |
|
1352 Q_UINT16 values[128]; |
|
1353 } unicodevalues[] = { |
|
1354 // from RFC 1489, ftp://ftp.isi.edu/in-notes/rfc1489.txt |
|
1355 { "KOI8-R", 2084, |
|
1356 { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, |
|
1357 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590, |
|
1358 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219/**/, 0x221A, 0x2248, |
|
1359 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, |
|
1360 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, |
|
1361 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E, |
|
1362 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565, |
|
1363 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9, |
|
1364 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, |
|
1365 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, |
|
1366 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, |
|
1367 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, |
|
1368 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, |
|
1369 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, |
|
1370 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, |
|
1371 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } }, |
|
1372 // /**/ - The BULLET OPERATOR is confused. Some people think |
|
1373 // it should be 0x2022 (BULLET). |
|
1374 |
|
1375 // from RFC 2319, ftp://ftp.isi.edu/in-notes/rfc2319.txt |
|
1376 { "KOI8-U", 2088, |
|
1377 { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, |
|
1378 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590, |
|
1379 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248, |
|
1380 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, |
|
1381 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457, |
|
1382 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E, |
|
1383 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407, |
|
1384 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9, |
|
1385 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, |
|
1386 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, |
|
1387 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, |
|
1388 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, |
|
1389 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, |
|
1390 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, |
|
1391 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, |
|
1392 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } }, |
|
1393 |
|
1394 // next bits generated from tables on the Unicode 2.0 CD. we can |
|
1395 // use these tables since this is part of the transition to using |
|
1396 // unicode everywhere in qt. |
|
1397 |
|
1398 // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo 0x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; for a in 8859-* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ; cat /tmp/digits ) | sort | uniq -w4 | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done |
|
1399 |
|
1400 // then I inserted the files manually. |
|
1401 { "ISO 8859-2", 5, |
|
1402 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, |
|
1403 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, |
|
1404 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, |
|
1405 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, |
|
1406 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7, |
|
1407 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B, |
|
1408 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7, |
|
1409 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C, |
|
1410 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, |
|
1411 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, |
|
1412 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, |
|
1413 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, |
|
1414 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, |
|
1415 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, |
|
1416 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, |
|
1417 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} }, |
|
1418 { "ISO 8859-3", 6, |
|
1419 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, |
|
1420 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, |
|
1421 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, |
|
1422 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, |
|
1423 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7, |
|
1424 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B, |
|
1425 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7, |
|
1426 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C, |
|
1427 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7, |
|
1428 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, |
|
1429 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7, |
|
1430 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF, |
|
1431 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7, |
|
1432 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, |
|
1433 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7, |
|
1434 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9} }, |
|
1435 { "ISO 8859-4", 7, |
|
1436 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, |
|
1437 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, |
|
1438 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, |
|
1439 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, |
|
1440 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7, |
|
1441 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF, |
|
1442 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7, |
|
1443 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B, |
|
1444 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, |
|
1445 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A, |
|
1446 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7, |
|
1447 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF, |
|
1448 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, |
|
1449 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B, |
|
1450 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7, |
|
1451 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9} }, |
|
1452 { "ISO 8859-5", 8, |
|
1453 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, |
|
1454 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, |
|
1455 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, |
|
1456 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, |
|
1457 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, |
|
1458 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F, |
|
1459 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, |
|
1460 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, |
|
1461 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, |
|
1462 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, |
|
1463 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, |
|
1464 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, |
|
1465 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, |
|
1466 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, |
|
1467 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, |
|
1468 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F} }, |
|
1469 { "ISO 8859-6-I", 82, |
|
1470 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, |
|
1471 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, |
|
1472 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, |
|
1473 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, |
|
1474 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1475 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD, |
|
1476 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1477 0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F, |
|
1478 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, |
|
1479 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F, |
|
1480 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, |
|
1481 0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1482 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, |
|
1483 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F, |
|
1484 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1485 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, |
|
1486 { "ISO 8859-7", 10, |
|
1487 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, |
|
1488 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, |
|
1489 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, |
|
1490 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, |
|
1491 0x00A0, 0x2018, 0x2019, 0x00A3, 0xFFFD, 0xFFFD, 0x00A6, 0x00A7, |
|
1492 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015, |
|
1493 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7, |
|
1494 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, |
|
1495 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, |
|
1496 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, |
|
1497 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, |
|
1498 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, |
|
1499 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, |
|
1500 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, |
|
1501 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, |
|
1502 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} }, |
|
1503 { "ISO 8859-8-I", 85, |
|
1504 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, |
|
1505 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, |
|
1506 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, |
|
1507 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, |
|
1508 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, |
|
1509 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x203E, |
|
1510 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, |
|
1511 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD, |
|
1512 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1513 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1514 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1515 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017, |
|
1516 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, |
|
1517 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, |
|
1518 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, |
|
1519 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, |
|
1520 { "ISO 8859-9", 12, |
|
1521 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, |
|
1522 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, |
|
1523 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, |
|
1524 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, |
|
1525 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, |
|
1526 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, |
|
1527 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, |
|
1528 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, |
|
1529 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, |
|
1530 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, |
|
1531 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, |
|
1532 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF, |
|
1533 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, |
|
1534 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, |
|
1535 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, |
|
1536 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} }, |
|
1537 { "ISO 8859-10", 13, |
|
1538 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, |
|
1539 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, |
|
1540 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, |
|
1541 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, |
|
1542 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7, |
|
1543 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A, |
|
1544 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7, |
|
1545 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B, |
|
1546 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, |
|
1547 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF, |
|
1548 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168, |
|
1549 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, |
|
1550 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, |
|
1551 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF, |
|
1552 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169, |
|
1553 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138} }, |
|
1554 { "ISO 8859-13", 109, |
|
1555 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, |
|
1556 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, |
|
1557 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, |
|
1558 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, |
|
1559 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7, |
|
1560 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6, |
|
1561 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7, |
|
1562 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, |
|
1563 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, |
|
1564 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B, |
|
1565 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, |
|
1566 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, |
|
1567 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, |
|
1568 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C, |
|
1569 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, |
|
1570 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019} }, |
|
1571 { "ISO 8859-14", 110, |
|
1572 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, |
|
1573 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, |
|
1574 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, |
|
1575 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, |
|
1576 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7, |
|
1577 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178, |
|
1578 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56, |
|
1579 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61, |
|
1580 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, |
|
1581 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, |
|
1582 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A, |
|
1583 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF, |
|
1584 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, |
|
1585 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, |
|
1586 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, |
|
1587 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} }, |
|
1588 { "ISO 8859-15", 111, |
|
1589 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, |
|
1590 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, |
|
1591 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, |
|
1592 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, |
|
1593 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7, |
|
1594 0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, |
|
1595 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7, |
|
1596 0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF, |
|
1597 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, |
|
1598 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, |
|
1599 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, |
|
1600 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, |
|
1601 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, |
|
1602 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, |
|
1603 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, |
|
1604 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} }, |
|
1605 |
|
1606 // next bits generated again from tables on the Unicode 3.0 CD. |
|
1607 |
|
1608 // $ for a in CP* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ) | sort | sed -e 's/#UNDEF.*$/0xFFFD/' | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done |
|
1609 |
|
1610 { "CP 874", 0, //### what is the mib? |
|
1611 { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD, |
|
1612 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1613 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, |
|
1614 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1615 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, |
|
1616 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, |
|
1617 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, |
|
1618 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F, |
|
1619 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, |
|
1620 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F, |
|
1621 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, |
|
1622 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F, |
|
1623 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, |
|
1624 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F, |
|
1625 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, |
|
1626 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, |
|
1627 { "CP 1250", 2250, |
|
1628 { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021, |
|
1629 0xFFFD, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179, |
|
1630 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, |
|
1631 0xFFFD, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A, |
|
1632 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7, |
|
1633 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B, |
|
1634 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7, |
|
1635 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C, |
|
1636 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, |
|
1637 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, |
|
1638 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, |
|
1639 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, |
|
1640 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, |
|
1641 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, |
|
1642 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, |
|
1643 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} }, |
|
1644 { "CP 1251", 2251, |
|
1645 { 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021, |
|
1646 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F, |
|
1647 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, |
|
1648 0xFFFD, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F, |
|
1649 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7, |
|
1650 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407, |
|
1651 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7, |
|
1652 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457, |
|
1653 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, |
|
1654 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, |
|
1655 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, |
|
1656 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, |
|
1657 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, |
|
1658 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, |
|
1659 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, |
|
1660 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F} }, |
|
1661 { "CP 1252", 2252, |
|
1662 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, |
|
1663 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD, |
|
1664 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, |
|
1665 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178, |
|
1666 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, |
|
1667 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, |
|
1668 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, |
|
1669 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, |
|
1670 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, |
|
1671 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, |
|
1672 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, |
|
1673 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, |
|
1674 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, |
|
1675 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, |
|
1676 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, |
|
1677 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} }, |
|
1678 { "CP 1253", 2253, |
|
1679 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, |
|
1680 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1681 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, |
|
1682 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1683 0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, |
|
1684 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015, |
|
1685 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7, |
|
1686 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, |
|
1687 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, |
|
1688 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, |
|
1689 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, |
|
1690 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, |
|
1691 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, |
|
1692 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, |
|
1693 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, |
|
1694 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} }, |
|
1695 { "CP 1254", 2254, |
|
1696 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, |
|
1697 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1698 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, |
|
1699 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178, |
|
1700 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, |
|
1701 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, |
|
1702 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, |
|
1703 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, |
|
1704 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, |
|
1705 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, |
|
1706 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, |
|
1707 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF, |
|
1708 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, |
|
1709 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, |
|
1710 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, |
|
1711 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} }, |
|
1712 { "CP 1255", 2255, |
|
1713 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, |
|
1714 0x02C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1715 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, |
|
1716 0x02DC, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1717 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7, |
|
1718 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, |
|
1719 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, |
|
1720 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, |
|
1721 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, |
|
1722 0x05B8, 0x05B9, 0xFFFD, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF, |
|
1723 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3, |
|
1724 0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1725 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, |
|
1726 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, |
|
1727 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, |
|
1728 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD} }, |
|
1729 { "CP 1256", 2256, |
|
1730 { 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, |
|
1731 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688, |
|
1732 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, |
|
1733 0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA, |
|
1734 0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, |
|
1735 0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, |
|
1736 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, |
|
1737 0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F, |
|
1738 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, |
|
1739 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F, |
|
1740 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7, |
|
1741 0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643, |
|
1742 0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7, |
|
1743 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF, |
|
1744 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7, |
|
1745 0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2} }, |
|
1746 { "CP 1257", 2257, |
|
1747 { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021, |
|
1748 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0x00A8, 0x02C7, 0x00B8, |
|
1749 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, |
|
1750 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0x00AF, 0x02DB, 0xFFFD, |
|
1751 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7, |
|
1752 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6, |
|
1753 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, |
|
1754 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, |
|
1755 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, |
|
1756 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B, |
|
1757 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, |
|
1758 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, |
|
1759 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, |
|
1760 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C, |
|
1761 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, |
|
1762 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9} }, |
|
1763 { "CP 1258", 2258, |
|
1764 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, |
|
1765 0x02C6, 0x2030, 0xFFFD, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1766 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, |
|
1767 0x02DC, 0x2122, 0xFFFD, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178, |
|
1768 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, |
|
1769 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, |
|
1770 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, |
|
1771 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, |
|
1772 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7, |
|
1773 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF, |
|
1774 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7, |
|
1775 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF, |
|
1776 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7, |
|
1777 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF, |
|
1778 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7, |
|
1779 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF} }, |
|
1780 |
|
1781 // this one is generated from the charmap file located in /usr/share/i18n/charmaps |
|
1782 // on most Linux distributions. The thai character set tis620 is byte by byte equivalent |
|
1783 // to iso8859-11, so we name it 8859-11 here, but recognise the name tis620 too. |
|
1784 |
|
1785 // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; ( cut -c25- < TIS-620 ; cat /tmp/digits ) | awk '/^x[89ABCDEF]/{ print $1, $2 }' | sed -e 's/<U/0x/' -e 's/>//' | sort | uniq -w4 | cut -c5- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/tis-620 |
|
1786 { "ISO 8859-11", 2259, // Thai character set mib enum taken from tis620 (which is byte by byte equivalent) |
|
1787 { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1788 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1789 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1790 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
|
1791 0xFFFD, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, |
|
1792 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, |
|
1793 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, |
|
1794 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F, |
|
1795 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, |
|
1796 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F, |
|
1797 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, |
|
1798 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F, |
|
1799 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, |
|
1800 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F, |
|
1801 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, |
|
1802 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, |
|
1803 |
|
1804 // change LAST_MIB if you add more, and edit unicodevalues in |
|
1805 // kernel/qpsprinter.cpp too. |
|
1806 }; |
|
1807 |
|
1808 |
|
1809 static const QSimpleTextCodec * reverseOwner = 0; |
|
1810 static QArray<char> * reverseMap = 0; |
|
1811 |
|
1812 |
|
1813 QSimpleTextCodec::QSimpleTextCodec( int i ) |
|
1814 : QTextCodec(), forwardIndex( i ) |
|
1815 { |
|
1816 } |
|
1817 |
|
1818 |
|
1819 QSimpleTextCodec::~QSimpleTextCodec() |
|
1820 { |
|
1821 if ( reverseOwner == this ) { |
|
1822 delete reverseMap; |
|
1823 reverseMap = 0; |
|
1824 reverseOwner = 0; |
|
1825 } |
|
1826 } |
|
1827 |
|
1828 // what happens if strlen(chars)<len? what happens if !chars? if len<1? |
|
1829 QString QSimpleTextCodec::toUnicode(const char* chars, int len) const |
|
1830 { |
|
1831 if(len <= 0) |
|
1832 return QString::null; |
|
1833 |
|
1834 int clen = qstrlen(chars); |
|
1835 len = QMIN(len, clen); // Note: NUL ends string |
|
1836 |
|
1837 QString r; |
|
1838 r.setUnicode(0, len); |
|
1839 QChar* uc = (QChar*)r.unicode(); // const_cast |
|
1840 const unsigned char * c = (const unsigned char *)chars; |
|
1841 for( int i=0; i<len; i++ ) { |
|
1842 if ( c[i] > 127 ) |
|
1843 uc[i] = unicodevalues[forwardIndex].values[c[i]-128]; |
|
1844 else |
|
1845 uc[i] = c[i]; |
|
1846 } |
|
1847 return r; |
|
1848 } |
|
1849 |
|
1850 |
|
1851 QCString QSimpleTextCodec::fromUnicode(const QString& uc, int& len ) const |
|
1852 { |
|
1853 if ( reverseOwner != this ) { |
|
1854 int m = 0; |
|
1855 int i = 0; |
|
1856 while( i < 128 ) { |
|
1857 if ( unicodevalues[forwardIndex].values[i] > m && |
|
1858 unicodevalues[forwardIndex].values[i] < 0xfffd ) |
|
1859 m = unicodevalues[forwardIndex].values[i]; |
|
1860 i++; |
|
1861 } |
|
1862 m++; |
|
1863 if ( !reverseMap ) |
|
1864 reverseMap = new QArray<char>( m ); |
|
1865 if ( m > (int)(reverseMap->size()) ) |
|
1866 reverseMap->resize( m ); |
|
1867 for( i = 0; i < 128 && i < m; i++ ) |
|
1868 (*reverseMap)[i] = (char)i; |
|
1869 for( ;i < m; i++ ) |
|
1870 (*reverseMap)[i] = '?'; |
|
1871 for( i=128; i<256; i++ ) { |
|
1872 int u = unicodevalues[forwardIndex].values[i-128]; |
|
1873 if ( u < m ) |
|
1874 (*reverseMap)[u] = (char)(unsigned char)(i); |
|
1875 } |
|
1876 reverseOwner = this; |
|
1877 } |
|
1878 if ( len <0 || len > (int)uc.length() ) |
|
1879 len = uc.length(); |
|
1880 QCString r( len+1 ); |
|
1881 int i = len; |
|
1882 int u; |
|
1883 const QChar* ucp = uc.unicode(); |
|
1884 char* rp = r.data(); |
|
1885 char* rmp = reverseMap->data(); |
|
1886 int rmsize = (int) reverseMap->size(); |
|
1887 while( i-- ) |
|
1888 { |
|
1889 u = ucp->unicode(); |
|
1890 *rp++ = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : '?' ); |
|
1891 ucp++; |
|
1892 } |
|
1893 r[len] = 0; |
|
1894 return r; |
|
1895 } |
|
1896 |
|
1897 |
|
1898 const char* QSimpleTextCodec::name() const |
|
1899 { |
|
1900 return unicodevalues[forwardIndex].cs; |
|
1901 } |
|
1902 |
|
1903 |
|
1904 int QSimpleTextCodec::mibEnum() const |
|
1905 { |
|
1906 return unicodevalues[forwardIndex].mib; |
|
1907 } |
|
1908 |
|
1909 int QSimpleTextCodec::heuristicNameMatch(const char* hint) const |
|
1910 { |
|
1911 if ( hint[0]=='k' ) { |
|
1912 // Help people with messy fonts |
|
1913 if ( QCString(hint) == "koi8-1" ) |
|
1914 return QTextCodec::heuristicNameMatch("koi8-r")-1; |
|
1915 if ( QCString(hint) == "koi8-ru" ) |
|
1916 return QTextCodec::heuristicNameMatch("koi8-r")-1; |
|
1917 } else if ( hint[0] == 't' && QCString(name()) == "ISO 8859-11" ) { |
|
1918 // 8859-11 and tis620 are byte by bute equivalent |
|
1919 int i = simpleHeuristicNameMatch("tis620-0", hint); |
|
1920 if( !i ) |
|
1921 i = simpleHeuristicNameMatch("tis-620", hint); |
|
1922 if( i ) return i; |
|
1923 } |
|
1924 return QTextCodec::heuristicNameMatch(hint); |
|
1925 } |
|
1926 |
|
1927 int QSimpleTextCodec::heuristicContentMatch(const char* chars, int len) const |
|
1928 { |
|
1929 if ( len<1 || !chars ) |
|
1930 return -1; |
|
1931 int i = 0; |
|
1932 const uchar * c = (const unsigned char *)chars; |
|
1933 int r = 0; |
|
1934 while( i<len && c && *c ) { |
|
1935 if ( *c >= 128 ) { |
|
1936 if ( unicodevalues[forwardIndex].values[(*c)-128] == 0xfffd ) |
|
1937 return -1; |
|
1938 } |
|
1939 if ( (*c >= ' ' && *c < 127) || |
|
1940 *c == '\n' || *c == '\t' || *c == '\r' ) |
|
1941 r++; |
|
1942 i++; |
|
1943 c++; |
|
1944 } |
|
1945 if ( mibEnum()==4 ) |
|
1946 r+=1; |
|
1947 return r; |
|
1948 } |
|
1949 |
|
1950 |
|
1951 #endif // QT_NO_CODECS |
|
1952 |
|
1953 class QLatin1Codec: public QTextCodec |
|
1954 { |
|
1955 public: |
|
1956 QLatin1Codec(); |
|
1957 ~QLatin1Codec(); |
|
1958 |
|
1959 QString toUnicode(const char* chars, int len) const; |
|
1960 QCString fromUnicode(const QString& uc, int& lenInOut ) const; |
|
1961 |
|
1962 const char* name() const; |
|
1963 int mibEnum() const; |
|
1964 |
|
1965 int heuristicContentMatch(const char* chars, int len) const; |
|
1966 |
|
1967 int heuristicNameMatch(const char* hint) const; |
|
1968 |
|
1969 private: |
|
1970 int forwardIndex; |
|
1971 }; |
|
1972 |
|
1973 |
|
1974 QLatin1Codec::QLatin1Codec() |
|
1975 : QTextCodec() |
|
1976 { |
|
1977 } |
|
1978 |
|
1979 |
|
1980 QLatin1Codec::~QLatin1Codec() |
|
1981 { |
|
1982 } |
|
1983 |
|
1984 // what happens if strlen(chars)<len? what happens if !chars? if len<1? |
|
1985 QString QLatin1Codec::toUnicode(const char* chars, int len) const |
|
1986 { |
|
1987 if(len <= 0) |
|
1988 return QString::null; |
|
1989 |
|
1990 return QString::fromLatin1(chars, len); |
|
1991 } |
|
1992 |
|
1993 |
|
1994 QCString QLatin1Codec::fromUnicode(const QString& uc, int& len ) const |
|
1995 { |
|
1996 if ( len <0 || len > (int)uc.length() ) |
|
1997 len = uc.length(); |
|
1998 QCString r( len+1 ); |
|
1999 int i = 0; |
|
2000 const QChar *ch = uc.unicode(); |
|
2001 while ( i < len ) { |
|
2002 r[i] = ch->row() ? '?' : ch->cell(); |
|
2003 i++; |
|
2004 ch++; |
|
2005 } |
|
2006 r[len] = 0; |
|
2007 return r; |
|
2008 } |
|
2009 |
|
2010 |
|
2011 const char* QLatin1Codec::name() const |
|
2012 { |
|
2013 return "ISO 8859-1"; |
|
2014 } |
|
2015 |
|
2016 |
|
2017 int QLatin1Codec::mibEnum() const |
|
2018 { |
|
2019 return 4; |
|
2020 } |
|
2021 |
|
2022 int QLatin1Codec::heuristicNameMatch(const char* hint) const |
|
2023 { |
|
2024 return QTextCodec::heuristicNameMatch(hint); |
|
2025 } |
|
2026 |
|
2027 int QLatin1Codec::heuristicContentMatch(const char* chars, int len) const |
|
2028 { |
|
2029 if ( len<1 || !chars ) |
|
2030 return -1; |
|
2031 int i = 0; |
|
2032 const uchar * c = (const unsigned char *)chars; |
|
2033 int r = 0; |
|
2034 while( i<len && c && *c ) { |
|
2035 if ( *c >= 0x80 && *c < 0xa0 ) |
|
2036 return -1; |
|
2037 if ( (*c >= ' ' && *c < 127) || |
|
2038 *c == '\n' || *c == '\t' || *c == '\r' ) |
|
2039 r++; |
|
2040 i++; |
|
2041 c++; |
|
2042 } |
|
2043 return r; |
|
2044 } |
|
2045 |
|
2046 |
|
2047 static void setupBuiltinCodecs() |
|
2048 { |
|
2049 (void)new QLatin1Codec; |
|
2050 |
|
2051 #ifndef QT_NO_CODECS |
|
2052 int i = 0; |
|
2053 do { |
|
2054 (void)new QSimpleTextCodec( i ); |
|
2055 } while( unicodevalues[i++].mib != LAST_MIB ); |
|
2056 |
|
2057 (void)new QEucJpCodec; |
|
2058 (void)new QSjisCodec; |
|
2059 (void)new QJisCodec; |
|
2060 (void)new QEucKrCodec; |
|
2061 (void)new QGbkCodec; |
|
2062 (void)new QBig5Codec; |
|
2063 (void)new QUtf8Codec; |
|
2064 (void)new QUtf16Codec; |
|
2065 (void)new QHebrewCodec; |
|
2066 (void)new QArabicCodec; |
|
2067 (void)new QTsciiCodec; |
|
2068 #endif // QT_NO_CODECS |
|
2069 } |
|
2070 |
|
2071 #endif // QT_NO_TEXTCODEC |