Orb/Doxygen/qtools/qtextcodec.cpp
changeset 0 42188c7ea2d9
equal deleted inserted replaced
-1:000000000000 0:42188c7ea2d9
       
     1 /****************************************************************************
       
     2 ** 
       
     3 **
       
     4 ** Implementation of QTextCodec class
       
     5 **
       
     6 ** Created : 981015
       
     7 **
       
     8 ** Copyright (C)1998-2000 Trolltech AS.  All rights reserved.
       
     9 **
       
    10 ** This file is part of the tools module of the Qt GUI Toolkit.
       
    11 **
       
    12 ** This file may be distributed under the terms of the Q Public License
       
    13 ** as defined by Trolltech AS of Norway and appearing in the file
       
    14 ** LICENSE.QPL included in the packaging of this file.
       
    15 **
       
    16 ** This file may be distributed and/or modified under the terms of the
       
    17 ** GNU General Public License version 2 as published by the Free Software
       
    18 ** Foundation and appearing in the file LICENSE.GPL included in the
       
    19 ** packaging of this file.
       
    20 **
       
    21 ** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
       
    22 ** licenses may use this file in accordance with the Qt Commercial License
       
    23 ** Agreement provided with the Software.
       
    24 **
       
    25 ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
       
    26 ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
       
    27 **
       
    28 ** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
       
    29 **   information about Qt Commercial License Agreements.
       
    30 ** See http://www.trolltech.com/qpl/ for QPL licensing information.
       
    31 ** See http://www.trolltech.com/gpl/ for GPL licensing information.
       
    32 **
       
    33 ** Contact info@trolltech.com if any conditions of this licensing are
       
    34 ** not clear to you.
       
    35 **
       
    36 **********************************************************************/
       
    37 
       
    38 #include "qtextcodec.h"
       
    39 #ifndef QT_NO_TEXTCODEC
       
    40 
       
    41 #include "qlist.h"
       
    42 #ifndef QT_NO_CODECS
       
    43 #include "qutfcodec.h"
       
    44 #include "qgbkcodec.h"
       
    45 #include "qeucjpcodec.h"
       
    46 #include "qjiscodec.h"
       
    47 #include "qsjiscodec.h"
       
    48 #include "qeuckrcodec.h"
       
    49 #include "qbig5codec.h"
       
    50 #include "qrtlcodec.h"
       
    51 #include "qtsciicodec.h"
       
    52 #endif
       
    53 
       
    54 #include "qfile.h"
       
    55 #include "qstrlist.h"
       
    56 #include "qstring.h"
       
    57 
       
    58 #include <stdlib.h>
       
    59 #include <ctype.h>
       
    60 #include <locale.h>
       
    61 
       
    62 
       
    63 static QList<QTextCodec> * all = 0;
       
    64 static bool destroying_is_ok; // starts out as 0
       
    65 
       
    66 /*!  Deletes all the created codecs.
       
    67 
       
    68   \warning Do not call this function.
       
    69 
       
    70   QApplication calls this just before exiting, to delete any
       
    71   QTextCodec objects that may be lying around.  Since various other
       
    72   classes hold pointers to QTextCodec objects, it is not safe to call
       
    73   this function earlier.
       
    74 
       
    75   If you are using the utility classes (like QString) but not using
       
    76   QApplication, calling this function at the very end of your
       
    77   application can be helpful to chasing down memory leaks, as
       
    78   QTextCodec objects will not show up.
       
    79 */
       
    80 
       
    81 void QTextCodec::deleteAllCodecs()
       
    82 {
       
    83     if ( !all )
       
    84         return;
       
    85 
       
    86     destroying_is_ok = TRUE;
       
    87     QList<QTextCodec> * ball = all;
       
    88     all = 0;
       
    89     ball->clear();
       
    90     delete ball;
       
    91     destroying_is_ok = FALSE;
       
    92 }
       
    93 
       
    94 
       
    95 static void setupBuiltinCodecs();
       
    96 
       
    97 
       
    98 static void realSetup()
       
    99 {
       
   100 #if defined(CHECK_STATE)
       
   101     if ( destroying_is_ok )
       
   102         qWarning( "creating new codec during codec cleanup" );
       
   103 #endif
       
   104     all = new QList<QTextCodec>;
       
   105     all->setAutoDelete( TRUE );
       
   106     setupBuiltinCodecs();
       
   107 }
       
   108 
       
   109 
       
   110 static inline void setup()
       
   111 {
       
   112     if ( !all )
       
   113         realSetup();
       
   114 }
       
   115 
       
   116 
       
   117 class QTextStatelessEncoder: public QTextEncoder {
       
   118     const QTextCodec* codec;
       
   119 public:
       
   120     QTextStatelessEncoder(const QTextCodec*);
       
   121     QCString fromUnicode(const QString& uc, int& lenInOut);
       
   122 };
       
   123 
       
   124 
       
   125 class QTextStatelessDecoder : public QTextDecoder {
       
   126     const QTextCodec* codec;
       
   127 public:
       
   128     QTextStatelessDecoder(const QTextCodec*);
       
   129     QString toUnicode(const char* chars, int len);
       
   130 };
       
   131 
       
   132 QTextStatelessEncoder::QTextStatelessEncoder(const QTextCodec* c) :
       
   133     codec(c)
       
   134 {
       
   135 }
       
   136 
       
   137 
       
   138 QCString QTextStatelessEncoder::fromUnicode(const QString& uc, int& lenInOut)
       
   139 {
       
   140     return codec->fromUnicode(uc,lenInOut);
       
   141 }
       
   142 
       
   143 
       
   144 QTextStatelessDecoder::QTextStatelessDecoder(const QTextCodec* c) :
       
   145     codec(c)
       
   146 {
       
   147 }
       
   148 
       
   149 
       
   150 QString QTextStatelessDecoder::toUnicode(const char* chars, int len)
       
   151 {
       
   152     return codec->toUnicode(chars,len);
       
   153 }
       
   154 
       
   155 
       
   156 
       
   157 // NOT REVISED
       
   158 /*!
       
   159   \class QTextCodec qtextcodec.h
       
   160   \brief Provides conversion between text encodings.
       
   161 
       
   162   By making objects of subclasses of QTextCodec, support for
       
   163   new text encodings can be added to Qt.
       
   164 
       
   165   The abstract virtual functions describe the encoder to the
       
   166   system and the coder is used as required in the different
       
   167   text file formats supported QTextStream and, under X11 for the
       
   168   locale-specific character input and output (under Windows NT
       
   169   codecs are not needed for GUI I/O since the system works
       
   170   with Unicode already, and Windows 95/98 has built-in convertors
       
   171   for the 8-bit local encoding).
       
   172 
       
   173   More recently created QTextCodec objects take precedence
       
   174   over earlier ones.
       
   175 
       
   176   To add support for another 8-bit encoding to Qt, make a subclass
       
   177   or QTextCodec and implement at least the following methods:
       
   178   <dl>
       
   179    <dt>\c const char* name() const
       
   180     <dd>Return the official name for the encoding.
       
   181    <dt>\c int mibEnum() const
       
   182     <dd>Return the MIB enum for the encoding if it is listed in the
       
   183       <a href=ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets>
       
   184       IANA character-sets encoding file</a>.
       
   185   </dl>
       
   186   If the encoding is multi-byte then it will have "state"; that is,
       
   187   the interpretation of some bytes will be dependent on some preceding
       
   188   bytes.  For such an encoding, you will need to implement
       
   189   <dl>
       
   190    <dt> \c QTextDecoder* makeDecoder() const
       
   191     <dd>Return a QTextDecoder that remembers incomplete multibyte
       
   192         sequence prefixes or other required state.
       
   193   </dl>
       
   194   If the encoding does \e not require state, you should implement:
       
   195   <dl>
       
   196    <dt> \c QString toUnicode(const char* chars, int len) const
       
   197     <dd>Converts \e len characters from \e chars to Unicode.
       
   198   </dl>
       
   199   The base QTextCodec class has default implementations of the above
       
   200   two functions, <i>but they are mutually recursive</i>, so you must
       
   201   re-implement at least one of them, or both for improved efficiency.
       
   202 
       
   203   For conversion from Unicode to 8-bit encodings, it is rarely necessary
       
   204   to maintain state.  However, two functions similar to the two above
       
   205   are used for encoding:
       
   206   <dl>
       
   207    <dt> \c QTextEncoder* makeEncoder() const
       
   208     <dd>Return a QTextDecoder.
       
   209    <dt> \c QCString fromUnicode(const QString& uc, int& lenInOut ) const;
       
   210     <dd>Converts \e lenInOut characters (of type QChar) from the start
       
   211         of the string \a uc, returning a QCString result, and also returning
       
   212         the \link QCString::length() length\endlink
       
   213         of the result in lenInOut.
       
   214   </dl>
       
   215   Again, these are mutually recursive so only one needs to be implemented,
       
   216   or both if better efficiency is possible.
       
   217 
       
   218   Finally, you must implement:
       
   219   <dl>
       
   220    <dt> \c int heuristicContentMatch(const char* chars, int len) const
       
   221     <dd>Gives a value indicating how likely it is that \e len characters
       
   222         from \e chars are in the encoding.
       
   223   </dl>
       
   224   A good model for this function is the
       
   225   QWindowsLocalCodec::heuristicContentMatch function found in the Qt sources.
       
   226 
       
   227   A QTextCodec subclass might have improved performance if you also
       
   228   re-implement:
       
   229   <dl>
       
   230    <dt> \c bool canEncode( QChar ) const
       
   231     <dd>Test if a Unicode character can be encoded.
       
   232    <dt> \c bool canEncode( const QString& ) const
       
   233     <dd>Test if a string of Unicode characters can be encoded.
       
   234    <dt> \c int heuristicNameMatch(const char* hint) const
       
   235     <dd>Test if a possibly non-standard name is referring to the codec.
       
   236   </dl>
       
   237 */
       
   238 
       
   239 
       
   240 /*!
       
   241   Constructs a QTextCodec, making it of highest precedence.
       
   242   The QTextCodec should always be constructed on the heap
       
   243   (with new), and once constructed it becomes the responsibility
       
   244   of Qt to delete it (which is done at QApplication destruction).
       
   245 */
       
   246 QTextCodec::QTextCodec()
       
   247 {
       
   248     setup();
       
   249     all->insert(0,this);
       
   250 }
       
   251 
       
   252 
       
   253 /*!
       
   254   Destructs the QTextCodec.  Note that you should not delete
       
   255   codecs yourself - once created they become the responsibility
       
   256   of Qt to delete.
       
   257 */
       
   258 QTextCodec::~QTextCodec()
       
   259 {
       
   260     if ( !destroying_is_ok )
       
   261         qWarning("QTextCodec::~QTextCodec() called by application");
       
   262     if ( all )
       
   263         all->remove( this );
       
   264 }
       
   265 
       
   266 
       
   267 /*!
       
   268   Returns a value indicating how likely this decoder is
       
   269   for decoding some format that has the given name.
       
   270 
       
   271   A good match returns a positive number around
       
   272   the length of the string.  A bad match is negative.
       
   273 
       
   274   The default implementation calls simpleHeuristicNameMatch()
       
   275   with the name of the codec.
       
   276 */
       
   277 int QTextCodec::heuristicNameMatch(const char* hint) const
       
   278 {
       
   279     return simpleHeuristicNameMatch(name(),hint);
       
   280 }
       
   281 
       
   282 
       
   283 // returns a string cotnaining the letters and numbers from input,
       
   284 // with a space separating run of a character class.  e.g. "iso8859-1"
       
   285 // becomes "iso 8859 1"
       
   286 static QString lettersAndNumbers( const char * input )
       
   287 {
       
   288     QString result;
       
   289     QChar c;
       
   290 
       
   291     while( input && *input ) {
       
   292         c = *input;
       
   293         if ( c.isLetter() || c.isNumber() )
       
   294             result += c.lower();
       
   295         if ( input[1] ) {
       
   296             // add space at character class transition, except
       
   297             // transition from upper-case to lower-case letter
       
   298             QChar n( input[1] );
       
   299             if ( c.isLetter() && n.isLetter() ) {
       
   300                 if ( c == c.lower() && n == n.upper() )
       
   301                     result += ' ';
       
   302             } else if ( c.category() != n.category() ) {
       
   303                 result += ' ';
       
   304             }
       
   305         }
       
   306         input++;
       
   307     }
       
   308     return result.simplifyWhiteSpace();
       
   309 }
       
   310 
       
   311 /*!
       
   312   A simple utility function for heuristicNameMatch() - it
       
   313   does some very minor character-skipping
       
   314   so that almost-exact matches score high.
       
   315 */
       
   316 int QTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint)
       
   317 {
       
   318     // if they're the same, return a perfect score.
       
   319     if ( name && hint && qstrcmp( name, hint ) == 0 )
       
   320         return qstrlen( hint );
       
   321 
       
   322     // if the letters and numbers are the same, we have an "almost"
       
   323     // perfect match.
       
   324     QString h( lettersAndNumbers( hint ) );
       
   325     QString n( lettersAndNumbers( name ) );
       
   326     if ( h == n )
       
   327         return qstrlen( hint )-1;
       
   328 
       
   329     if ( h.stripWhiteSpace() == n.stripWhiteSpace() )
       
   330         return qstrlen( hint )-2;
       
   331 
       
   332     // could do some more here, but I don't think it's worth it
       
   333 
       
   334     return 0;
       
   335 }
       
   336 
       
   337 
       
   338 /*!
       
   339   Returns the QTextCodec \a i places from the more recently
       
   340   inserted, or NULL if there is no such QTextCodec.  Thus,
       
   341   codecForIndex(0) returns the most recently created QTextCodec.
       
   342 */
       
   343 QTextCodec* QTextCodec::codecForIndex(int i)
       
   344 {
       
   345     setup();
       
   346     return (uint)i >= all->count() ? 0 : all->at(i);
       
   347 }
       
   348 
       
   349 
       
   350 /*!
       
   351   Returns the QTextCodec which matches the
       
   352   \link QTextCodec::mibEnum() MIBenum\endlink \a mib.
       
   353 */
       
   354 QTextCodec* QTextCodec::codecForMib(int mib)
       
   355 {
       
   356     setup();
       
   357     QListIterator<QTextCodec> i(*all);
       
   358     QTextCodec* result;
       
   359     for ( ; (result=i); ++i ) {
       
   360         if ( result->mibEnum()==mib )
       
   361             break;
       
   362     }
       
   363     return result;
       
   364 }
       
   365 
       
   366 
       
   367 
       
   368 
       
   369 
       
   370 #ifdef _OS_WIN32_
       
   371 class QWindowsLocalCodec: public QTextCodec
       
   372 {
       
   373 public:
       
   374     QWindowsLocalCodec();
       
   375     ~QWindowsLocalCodec();
       
   376 
       
   377     QString toUnicode(const char* chars, int len) const;
       
   378     QCString fromUnicode(const QString& uc, int& lenInOut ) const;
       
   379 
       
   380     const char* name() const;
       
   381     int mibEnum() const;
       
   382 
       
   383     int heuristicContentMatch(const char* chars, int len) const;
       
   384 };
       
   385 
       
   386 QWindowsLocalCodec::QWindowsLocalCodec()
       
   387 {
       
   388 }
       
   389 
       
   390 QWindowsLocalCodec::~QWindowsLocalCodec()
       
   391 {
       
   392 }
       
   393 
       
   394 
       
   395 QString QWindowsLocalCodec::toUnicode(const char* chars, int len) const
       
   396 {
       
   397     if ( len == 1 && chars ) {          // Optimization; avoids allocation
       
   398         char c[2];
       
   399         c[0] = *chars;
       
   400         c[1] = 0;
       
   401         return qt_winMB2QString( c, 2 );
       
   402     }
       
   403     if ( len < 0 )
       
   404         return qt_winMB2QString( chars );
       
   405     QCString s(chars,len+1);
       
   406     return qt_winMB2QString(s);
       
   407 }
       
   408 
       
   409 QCString QWindowsLocalCodec::fromUnicode(const QString& uc, int& lenInOut ) const
       
   410 {
       
   411     QCString r = qt_winQString2MB( uc, lenInOut );
       
   412     lenInOut = r.length();
       
   413     return r;
       
   414 }
       
   415 
       
   416 
       
   417 const char* QWindowsLocalCodec::name() const
       
   418 {
       
   419     return "System";
       
   420 }
       
   421 
       
   422 int QWindowsLocalCodec::mibEnum() const
       
   423 {
       
   424     return 0;
       
   425 }
       
   426 
       
   427 
       
   428 int QWindowsLocalCodec::heuristicContentMatch(const char* chars, int len) const
       
   429 {
       
   430     // ### Not a bad default implementation?
       
   431     QString t = toUnicode(chars,len);
       
   432     int l = t.length();
       
   433     QCString mb = fromUnicode(t,l);
       
   434     int i=0;
       
   435     while ( i < len )
       
   436         if ( chars[i] == mb[i] )
       
   437             i++;
       
   438     return i;
       
   439 }
       
   440 
       
   441 #else
       
   442 
       
   443 /* locale names mostly copied from XFree86 */
       
   444 static const char * const iso8859_2locales[] = {
       
   445     "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr",
       
   446     "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro",
       
   447     "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk",
       
   448     "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 };
       
   449 
       
   450 static const char * const iso8859_3locales[] = {
       
   451     "eo", 0 };
       
   452 
       
   453 static const char * const iso8859_4locales[] = {
       
   454     "ee", "ee_EE", "lt", "lt_LT", "lv", "lv_LV", 0 };
       
   455 
       
   456 static const char * const iso8859_5locales[] = {
       
   457     "bg", "bg_BG", "bulgarian", "mk", "mk_MK",
       
   458     "sp", "sp_YU", 0 };
       
   459 
       
   460 static const char * const iso8859_6locales[] = {
       
   461     "ar_AA", "ar_SA", "arabic", 0 };
       
   462 
       
   463 static const char * const iso8859_7locales[] = {
       
   464     "el", "el_GR", "greek", 0 };
       
   465 
       
   466 static const char * const iso8859_8locales[] = {
       
   467     "hebrew", "he", "he_IL", "iw", "iw_IL", 0 };
       
   468 
       
   469 static const char * const iso8859_9locales[] = {
       
   470     "tr", "tr_TR", "turkish", 0 };
       
   471 
       
   472 static const char * const iso8859_15locales[] = {
       
   473     "fr", "fi", "french", "finnish", "et", "et_EE", 0 };
       
   474 
       
   475 static const char * const koi8_ulocales[] = {
       
   476     "uk", "uk_UA", "ru_UA", "ukrainian", 0 };
       
   477 
       
   478 static const char * const tis_620locales[] = {
       
   479     "th", "th_TH", "thai", 0 };
       
   480 
       
   481 
       
   482 static bool try_locale_list( const char * const locale[], const char * lang )
       
   483 {
       
   484     int i;
       
   485     for( i=0; locale[i] && qstrcmp(locale[i], lang); i++ )
       
   486     { }
       
   487     return locale[i] != 0;
       
   488 }
       
   489 
       
   490 // For the probably_koi8_locales we have to look. the standard says
       
   491 // these are 8859-5, but almsot all Russion users uses KOI8-R and
       
   492 // incorrectly set $LANG to ru_RU. We'll check tolower() to see what
       
   493 // tolower() thinks ru_RU means.
       
   494 
       
   495 // If you read the history, it seems that many Russians blame ISO and
       
   496 // Peristroika for the confusion.
       
   497 //
       
   498 // The real bug is that some programs break if the user specifies
       
   499 // ru_RU.KOI8-R.
       
   500 
       
   501 static const char * const probably_koi8_rlocales[] = {
       
   502     "ru", "ru_SU", "ru_RU", "russian", 0 };
       
   503 
       
   504 // this means ANY of these locale aliases. if they're aliases for
       
   505 // different locales, the code breaks.
       
   506 static QTextCodec * ru_RU_codec = 0;
       
   507 
       
   508 static QTextCodec * ru_RU_hack( const char * i ) {
       
   509     if ( ! ru_RU_codec ) {
       
   510         QCString origlocale = setlocale( LC_CTYPE, i );
       
   511         // unicode   koi8r   latin5   name
       
   512         // 0x044E    0xC0    0xEE     CYRILLIC SMALL LETTER YU
       
   513         // 0x042E    0xE0    0xCE     CYRILLIC CAPITAL LETTER YU
       
   514         int latin5 = tolower( 0xCE );
       
   515         int koi8r = tolower( 0xE0 );
       
   516         if ( koi8r == 0xC0 && latin5 != 0xEE ) {
       
   517             ru_RU_codec = QTextCodec::codecForName( "KOI8-R" );
       
   518         } else if ( koi8r != 0xC0 && latin5 == 0xEE ) {
       
   519             ru_RU_codec = QTextCodec::codecForName( "ISO 8859-5" );
       
   520         } else {
       
   521             // something else again... let's assume... *throws dice*
       
   522             ru_RU_codec = QTextCodec::codecForName( "KOI8-R" );
       
   523             qWarning( "QTextCodec: using KOI8-R, probe failed (%02x %02x %s)",
       
   524                       koi8r, latin5, i );
       
   525         }
       
   526         setlocale( LC_CTYPE, origlocale.data() );
       
   527     }
       
   528     return ru_RU_codec;
       
   529 }
       
   530 
       
   531 #endif
       
   532 
       
   533 static QTextCodec * localeMapper = 0;
       
   534 
       
   535 void qt_set_locale_codec( QTextCodec *codec )
       
   536 {
       
   537     localeMapper = codec;
       
   538 }
       
   539 
       
   540 /*!  Returns a pointer to the codec most suitable for this locale. */
       
   541 
       
   542 QTextCodec* QTextCodec::codecForLocale()
       
   543 {
       
   544     if ( localeMapper )
       
   545         return localeMapper;
       
   546 
       
   547     setup();
       
   548 
       
   549 #ifdef _OS_WIN32_
       
   550     localeMapper = new QWindowsLocalCodec;
       
   551 #else
       
   552     // Very poorly defined and followed standards causes lots of code
       
   553     // to try to get all the cases...
       
   554 
       
   555     char * lang = qstrdup( getenv("LANG") );
       
   556 
       
   557     char * p = lang ? strchr( lang, '.' ) : 0;
       
   558     if ( !p || *p != '.' ) {
       
   559         // Some versions of setlocale return encoding, others not.
       
   560         char *ctype = qstrdup( setlocale( LC_CTYPE, 0 ) );
       
   561         // Some Linux distributions have broken locales which will return
       
   562         // "C" for LC_CTYPE
       
   563         if ( qstrcmp( ctype, "C" ) == 0 ) {
       
   564             delete [] ctype;
       
   565         } else {
       
   566             if ( lang )
       
   567                 delete [] lang;
       
   568             lang = ctype;
       
   569             p = lang ? strchr( lang, '.' ) : 0;
       
   570         }
       
   571     }
       
   572 
       
   573     if( p && *p == '.' ) {
       
   574         // if there is an encoding and we don't know it, we return 0
       
   575         // User knows what they are doing.  Codecs will believe them.
       
   576         localeMapper = codecForName( lang );
       
   577         if ( !localeMapper ) {
       
   578             // Use or codec disagree.
       
   579             localeMapper = codecForName( p+1 );
       
   580         }
       
   581     }
       
   582     if ( !localeMapper || !(p && *p == '.') ) {
       
   583         // if there is none, we default to 8859-1
       
   584         // We could perhaps default to 8859-15.
       
   585         if ( try_locale_list( iso8859_2locales, lang ) )
       
   586             localeMapper = codecForName( "ISO 8859-2" );
       
   587         else if ( try_locale_list( iso8859_3locales, lang ) )
       
   588             localeMapper = codecForName( "ISO 8859-3" );
       
   589         else if ( try_locale_list( iso8859_4locales, lang ) )
       
   590             localeMapper = codecForName( "ISO 8859-4" );
       
   591         else if ( try_locale_list( iso8859_5locales, lang ) )
       
   592             localeMapper = codecForName( "ISO 8859-5" );
       
   593         else if ( try_locale_list( iso8859_6locales, lang ) )
       
   594             localeMapper = codecForName( "ISO 8859-6-I" );
       
   595         else if ( try_locale_list( iso8859_7locales, lang ) )
       
   596             localeMapper = codecForName( "ISO 8859-7" );
       
   597         else if ( try_locale_list( iso8859_8locales, lang ) )
       
   598             localeMapper = codecForName( "ISO 8859-8-I" );
       
   599         else if ( try_locale_list( iso8859_9locales, lang ) )
       
   600             localeMapper = codecForName( "ISO 8859-9" );
       
   601         else if ( try_locale_list( iso8859_15locales, lang ) )
       
   602             localeMapper = codecForName( "ISO 8859-15" );
       
   603         else if ( try_locale_list( tis_620locales, lang ) )
       
   604             localeMapper = codecForName( "ISO 8859-11" );
       
   605         else if ( try_locale_list( koi8_ulocales, lang ) )
       
   606             localeMapper = codecForName( "KOI8-U" );
       
   607          else if ( try_locale_list( probably_koi8_rlocales, lang ) )
       
   608             localeMapper = ru_RU_hack( lang );
       
   609         else if (!lang || !(localeMapper = codecForName(lang) ))
       
   610             localeMapper = codecForName( "ISO 8859-1" );
       
   611     }
       
   612     delete[] lang;
       
   613 #endif
       
   614 
       
   615     return localeMapper;
       
   616 }
       
   617 
       
   618 
       
   619 /*!
       
   620   Searches all installed QTextCodec objects, returning the one
       
   621   which best matches given name.  Returns NULL if no codec has
       
   622   a match closeness above \a accuracy.
       
   623 
       
   624   \sa heuristicNameMatch()
       
   625 */
       
   626 QTextCodec* QTextCodec::codecForName(const char* hint, int accuracy)
       
   627 {
       
   628     setup();
       
   629     QListIterator<QTextCodec> i(*all);
       
   630     QTextCodec* result = 0;
       
   631     int best=accuracy;
       
   632     for ( QTextCodec* cursor; (cursor=i); ++i ) {
       
   633         int s = cursor->heuristicNameMatch(hint);
       
   634         if ( s > best ) {
       
   635             best = s;
       
   636             result = cursor;
       
   637         }
       
   638     }
       
   639     return result;
       
   640 }
       
   641 
       
   642 
       
   643 /*!
       
   644   Searches all installed QTextCodec objects, returning the one
       
   645   which most recognizes the given content.  May return 0.
       
   646 
       
   647   Note that this is often a poor choice, since character
       
   648   encodings often use most of the available character sequences,
       
   649   and so only by linguistic analysis could a true match be made.
       
   650 
       
   651   \sa heuristicContentMatch()
       
   652 */
       
   653 QTextCodec* QTextCodec::codecForContent(const char* chars, int len)
       
   654 {
       
   655     setup();
       
   656     QListIterator<QTextCodec> i(*all);
       
   657     QTextCodec* result = 0;
       
   658     int best=0;
       
   659     for ( QTextCodec* cursor; (cursor=i); ++i ) {
       
   660         int s = cursor->heuristicContentMatch(chars,len);
       
   661         if ( s > best ) {
       
   662             best = s;
       
   663             result = cursor;
       
   664         }
       
   665     }
       
   666     return result;
       
   667 }
       
   668 
       
   669 
       
   670 /*!
       
   671   \fn const char* QTextCodec::name() const
       
   672   Subclasses of QTextCodec must reimplement this function.  It returns
       
   673   the name of the encoding supported by the subclass.  When choosing
       
   674   a name for an encoding, consider these points:
       
   675   <ul>
       
   676     <li>On X11, heuristicNameMatch( const char * hint )
       
   677         is used to test if a the QTextCodec
       
   678         can convert between Unicode and the encoding of a font
       
   679         with encoding \e hint, such as "iso8859-1" for Latin-1 fonts,
       
   680         "koi8-r" for Russian KOI8 fonts.
       
   681         The default algorithm of heuristicNameMatch() uses name().
       
   682     <li>Some applications may use this function to present
       
   683         encodings to the end user.
       
   684   </ul>
       
   685 */
       
   686 
       
   687 /*!
       
   688   \fn int QTextCodec::mibEnum() const
       
   689 
       
   690   Subclasses of QTextCodec must reimplement this function.  It returns the
       
   691   MIBenum (see
       
   692   <a href="ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets">
       
   693   the IANA character-sets encoding file</a> for more information).
       
   694   It is important that each QTextCodec subclass return the correct unique
       
   695   value for this function.
       
   696 */
       
   697 
       
   698 
       
   699 /*!
       
   700   \fn int QTextCodec::heuristicContentMatch(const char* chars, int len) const
       
   701 
       
   702   Subclasses of QTextCodec must reimplement this function.  It examines
       
   703   the first \a len bytes of \a chars and returns a value indicating how
       
   704   likely it is that the string is a prefix of text encoded in the
       
   705   encoding of the subclass.  Any negative return value indicates that the text
       
   706   is detectably not in the encoding (eg. it contains undefined characters).
       
   707   A return value of 0 indicates that the text should be decoded with this
       
   708   codec rather than as ASCII, but there
       
   709   is no particular evidence.  The value should range up to \a len.  Thus,
       
   710   most decoders will return -1, 0, or -\a len.
       
   711 
       
   712   The characters are not null terminated.
       
   713 
       
   714   \sa codecForContent().
       
   715 */
       
   716 
       
   717 
       
   718 /*!
       
   719   Creates a QTextDecoder which stores enough state to decode chunks
       
   720   of char* data to create chunks of Unicode data.  The default implementation
       
   721   creates a stateless decoder, which is sufficient for only the simplest
       
   722   encodings where each byte corresponds to exactly one Unicode character.
       
   723 
       
   724   The caller is responsible for deleting the returned object.
       
   725 */
       
   726 QTextDecoder* QTextCodec::makeDecoder() const
       
   727 {
       
   728     return new QTextStatelessDecoder(this);
       
   729 }
       
   730 
       
   731 
       
   732 /*!
       
   733   Creates a QTextEncoder which stores enough state to encode chunks
       
   734   of Unicode data as char* data.  The default implementation
       
   735   creates a stateless encoder, which is sufficient for only the simplest
       
   736   encodings where each Unicode character corresponds to exactly one char.
       
   737 
       
   738   The caller is responsible for deleting the returned object.
       
   739 */
       
   740 QTextEncoder* QTextCodec::makeEncoder() const
       
   741 {
       
   742     return new QTextStatelessEncoder(this);
       
   743 }
       
   744 
       
   745 
       
   746 /*!
       
   747   Subclasses of QTextCodec must reimplement this function or
       
   748   makeDecoder().  It converts the first \a len characters of \a chars
       
   749   to Unicode.
       
   750 
       
   751   The default implementation makes a decoder with makeDecoder() and
       
   752   converts the input with that.  Note that the default makeDecoder()
       
   753   implementation makes a decoder that simply calls
       
   754   this function, hence subclasses \e must reimplement one function or
       
   755   the other to avoid infinite recursion.
       
   756 */
       
   757 QString QTextCodec::toUnicode(const char* chars, int len) const
       
   758 {
       
   759     QTextDecoder* i = makeDecoder();
       
   760     QString result = i->toUnicode(chars,len);
       
   761     delete i;
       
   762     return result;
       
   763 }
       
   764 
       
   765 
       
   766 /*!
       
   767   Subclasses of QTextCodec must reimplement either this function or
       
   768   makeEncoder().  It converts the first \a lenInOut characters of \a
       
   769   uc from Unicode to the encoding of the subclass.  If \a lenInOut
       
   770   is negative or too large, the length of \a uc is used instead.
       
   771 
       
   772   The value returned is the property of the caller, which is
       
   773   responsible for deleting it with "delete []".  The length of the
       
   774   resulting Unicode character sequence is returned in \a lenInOut.
       
   775 
       
   776   The default implementation makes an encoder with makeEncoder() and
       
   777   converts the input with that.  Note that the default makeEncoder()
       
   778   implementation makes an encoder that simply calls
       
   779   this function, hence subclasses \e must reimplement one function or
       
   780   the other to avoid infinite recursion.
       
   781 */
       
   782 
       
   783 QCString QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const
       
   784 {
       
   785     QTextEncoder* i = makeEncoder();
       
   786     QCString result = i->fromUnicode(uc, lenInOut);
       
   787     delete i;
       
   788     return result;
       
   789 }
       
   790 
       
   791 /*!
       
   792   \overload QCString QTextCodec::fromUnicode(const QString& uc) const
       
   793 */
       
   794 QCString QTextCodec::fromUnicode(const QString& uc) const
       
   795 {
       
   796     int l = uc.length();
       
   797     return fromUnicode(uc,l);
       
   798 }
       
   799 
       
   800 /*!
       
   801   \overload QString QTextCodec::toUnicode(const QByteArray& a, int len) const
       
   802 */
       
   803 QString QTextCodec::toUnicode(const QByteArray& a, int len) const
       
   804 {
       
   805     int l = a.size();
       
   806     if( l > 0 && a.data()[l - 1] == '\0' ) l--;
       
   807     l = QMIN( l, len );
       
   808     return toUnicode( a.data(), l );
       
   809 }
       
   810 
       
   811 /*!
       
   812   \overload QString QTextCodec::toUnicode(const QByteArray& a) const
       
   813 */
       
   814 QString QTextCodec::toUnicode(const QByteArray& a) const
       
   815 {
       
   816     int l = a.size();
       
   817     if( l > 0 && a.data()[l - 1] == '\0' ) l--;
       
   818     return toUnicode( a.data(), l );
       
   819 }
       
   820 
       
   821 /*!
       
   822   \overload QString QTextCodec::toUnicode(const char* chars) const
       
   823 */
       
   824 QString QTextCodec::toUnicode(const char* chars) const
       
   825 {
       
   826     return toUnicode(chars,qstrlen(chars));
       
   827 }
       
   828 
       
   829 /*!
       
   830   Returns TRUE if the unicode character \a ch can be fully encoded
       
   831   with this codec.  The default implementation tests if the result of
       
   832   toUnicode(fromUnicode(ch)) is the original \a ch. Subclasses may be
       
   833   able to improve the efficiency.
       
   834 */
       
   835 bool QTextCodec::canEncode( QChar ch ) const
       
   836 {
       
   837     return toUnicode(fromUnicode(ch)) == ch;
       
   838 }
       
   839 
       
   840 /*!
       
   841   Returns TRUE if the unicode string \a s can be fully encoded
       
   842   with this codec.  The default implementation tests if the result of
       
   843   toUnicode(fromUnicode(s)) is the original \a s. Subclasses may be
       
   844   able to improve the efficiency.
       
   845 */
       
   846 bool QTextCodec::canEncode( const QString& s ) const
       
   847 {
       
   848     return toUnicode(fromUnicode(s)) == s;
       
   849 }
       
   850 
       
   851 
       
   852 
       
   853 /*!
       
   854   \class QTextEncoder qtextcodec.h
       
   855   \brief State-based encoder
       
   856 
       
   857   A QTextEncoder converts Unicode into another format, remembering
       
   858   any state that is required between calls.
       
   859 
       
   860   \sa QTextCodec::makeEncoder()
       
   861 */
       
   862 
       
   863 /*!
       
   864   Destructs the encoder.
       
   865 */
       
   866 QTextEncoder::~QTextEncoder()
       
   867 {
       
   868 }
       
   869 /*!
       
   870   \fn QCString QTextEncoder::fromUnicode(const QString& uc, int& lenInOut)
       
   871 
       
   872   Converts \a lenInOut characters (not bytes) from \a uc, producing
       
   873   a QCString.  \a lenInOut will also be set to the
       
   874   \link QCString::length() length\endlink of the result (in bytes).
       
   875 
       
   876   The encoder is free to record state to use when subsequent calls are
       
   877   made to this function (for example, it might change modes with escape
       
   878   sequences if needed during the encoding of one string, then assume that
       
   879   mode applies when a subsequent call begins).
       
   880 */
       
   881 
       
   882 /*!
       
   883   \class QTextDecoder qtextcodec.h
       
   884   \brief State-based decoder
       
   885 
       
   886   A QTextEncoder converts a text format into Unicode, remembering
       
   887   any state that is required between calls.
       
   888 
       
   889   \sa QTextCodec::makeEncoder()
       
   890 */
       
   891 
       
   892 
       
   893 /*!
       
   894   Destructs the decoder.
       
   895 */
       
   896 QTextDecoder::~QTextDecoder()
       
   897 {
       
   898 }
       
   899 
       
   900 /*!
       
   901   \fn QString QTextDecoder::toUnicode(const char* chars, int len)
       
   902 
       
   903   Converts the first \a len bytes at \a chars to Unicode, returning the
       
   904   result.
       
   905 
       
   906   If not all characters are used (eg. only part of a multi-byte
       
   907   encoding is at the end of the characters), the decoder remembers
       
   908   enough state to continue with the next call to this function.
       
   909 */
       
   910 
       
   911 #define CHAINED 0xffff
       
   912 
       
   913 struct QMultiByteUnicodeTable {
       
   914     // If multibyte, ignore unicode and index into multibyte
       
   915     //  with the next character.
       
   916     QMultiByteUnicodeTable() : unicode(0xfffd), multibyte(0) { }
       
   917 
       
   918     ~QMultiByteUnicodeTable()
       
   919     {
       
   920         if ( multibyte )
       
   921             delete [] multibyte;
       
   922     }
       
   923 
       
   924     ushort unicode;
       
   925     QMultiByteUnicodeTable* multibyte;
       
   926 };
       
   927 
       
   928 #ifndef QT_NO_CODECS
       
   929 static int getByte(char* &cursor)
       
   930 {
       
   931     int byte = 0;
       
   932     if ( *cursor ) {
       
   933         if ( cursor[1] == 'x' )
       
   934             byte = strtol(cursor+2,&cursor,16);
       
   935         else if ( cursor[1] == 'd' )
       
   936             byte = strtol(cursor+2,&cursor,10);
       
   937         else
       
   938             byte = strtol(cursor+2,&cursor,8);
       
   939     }
       
   940     return byte&0xff;
       
   941 }
       
   942 
       
   943 class QTextCodecFromIOD;
       
   944 
       
   945 class QTextCodecFromIODDecoder : public QTextDecoder {
       
   946     const QTextCodecFromIOD* codec;
       
   947     QMultiByteUnicodeTable* mb;
       
   948 public:
       
   949     QTextCodecFromIODDecoder(const QTextCodecFromIOD* c);
       
   950     QString toUnicode(const char* chars, int len);
       
   951 };
       
   952 
       
   953 class QTextCodecFromIOD : public QTextCodec {
       
   954     friend class QTextCodecFromIODDecoder;
       
   955 
       
   956     QCString n;
       
   957 
       
   958     // If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multibyte,
       
   959     //  use from_unicode_page_multibyte[row()][cell()] as string.
       
   960     char** from_unicode_page;
       
   961     char*** from_unicode_page_multibyte;
       
   962     char unkn;
       
   963 
       
   964     // Only one of these is used
       
   965     ushort* to_unicode;
       
   966     QMultiByteUnicodeTable* to_unicode_multibyte;
       
   967     int max_bytes_per_char;
       
   968     QStrList aliases;
       
   969 
       
   970     bool stateless() const { return !to_unicode_multibyte; }
       
   971 
       
   972 public:
       
   973     QTextCodecFromIOD(QIODevice* iod)
       
   974     {
       
   975         from_unicode_page = 0;
       
   976         to_unicode_multibyte = 0;
       
   977         to_unicode = 0;
       
   978         from_unicode_page_multibyte = 0;
       
   979         max_bytes_per_char = 1;
       
   980 
       
   981         const int maxlen=100;
       
   982         char line[maxlen];
       
   983         char esc='\\';
       
   984         char comm='%';
       
   985         bool incmap = FALSE;
       
   986         while (iod->readLine(line,maxlen) > 0) {
       
   987             if (0==qstrnicmp(line,"<code_set_name>",15))
       
   988                 n = line+15;
       
   989             else if (0==qstrnicmp(line,"<escape_char> ",14))
       
   990                 esc = line[14];
       
   991             else if (0==qstrnicmp(line,"<comment_char> ",15))
       
   992                 comm = line[15];
       
   993             else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) {
       
   994                 aliases.append(line+8);
       
   995             } else if (0==qstrnicmp(line,"CHARMAP",7)) {
       
   996                 if (!from_unicode_page) {
       
   997                     from_unicode_page = new char*[256];
       
   998                     for (int i=0; i<256; i++)
       
   999                         from_unicode_page[i]=0;
       
  1000                 }
       
  1001                 if (!to_unicode) {
       
  1002                     to_unicode = new ushort[256];
       
  1003                 }
       
  1004                 incmap = TRUE;
       
  1005             } else if (0==qstrnicmp(line,"END CHARMAP",11))
       
  1006                 break;
       
  1007             else if (incmap) {
       
  1008                 char* cursor = line;
       
  1009                 int byte,unicode=-1;
       
  1010                 ushort* mb_unicode=0;
       
  1011                 const int maxmb=8; // more -> we'll need to improve datastructures
       
  1012                 char mb[maxmb+1];
       
  1013                 int nmb=0;
       
  1014 
       
  1015                 while (*cursor) {
       
  1016                     if (cursor[0]=='<' && cursor[1]=='U' &&
       
  1017                         cursor[2]>='0' && cursor[2]<='9' &&
       
  1018                         cursor[3]>='0' && cursor[3]<='9') {
       
  1019 
       
  1020                         unicode = strtol(cursor+2,&cursor,16);
       
  1021 
       
  1022                     } else if (*cursor==esc) {
       
  1023 
       
  1024                         byte = getByte(cursor);
       
  1025 
       
  1026                         if ( *cursor == esc ) {
       
  1027                             if ( !to_unicode_multibyte ) {
       
  1028                                 to_unicode_multibyte =
       
  1029                                     new QMultiByteUnicodeTable[256];
       
  1030                                 for (int i=0; i<256; i++) {
       
  1031                                     to_unicode_multibyte[i].unicode =
       
  1032                                         to_unicode[i];
       
  1033                                     to_unicode_multibyte[i].multibyte = 0;
       
  1034                                 }
       
  1035                                 delete [] to_unicode;
       
  1036                                 to_unicode = 0;
       
  1037                             }
       
  1038                             QMultiByteUnicodeTable* mbut =
       
  1039                                 to_unicode_multibyte+byte;
       
  1040                             mb[nmb++] = byte;
       
  1041                             while ( nmb < maxmb && *cursor == esc ) {
       
  1042                                 // Always at least once
       
  1043 
       
  1044                                 mbut->unicode = CHAINED;
       
  1045                                 byte = getByte(cursor);
       
  1046                                 mb[nmb++] = byte;
       
  1047                                 if (!mbut->multibyte) {
       
  1048                                     mbut->multibyte =
       
  1049                                         new QMultiByteUnicodeTable[256];
       
  1050                                 }
       
  1051                                 mbut = mbut->multibyte+byte;
       
  1052                                 mb_unicode = & mbut->unicode;
       
  1053                             }
       
  1054 
       
  1055                             if ( nmb > max_bytes_per_char )
       
  1056                                 max_bytes_per_char = nmb;
       
  1057                         }
       
  1058                     } else {
       
  1059                         cursor++;
       
  1060                     }
       
  1061                 }
       
  1062 
       
  1063                 if (unicode >= 0 && unicode <= 0xffff)
       
  1064                 {
       
  1065                     QChar ch((ushort)unicode);
       
  1066                     if (!from_unicode_page[ch.row()]) {
       
  1067                         from_unicode_page[ch.row()] = new char[256];
       
  1068                         for (int i=0; i<256; i++)
       
  1069                             from_unicode_page[ch.row()][i]=0;
       
  1070                     }
       
  1071                     if ( mb_unicode ) {
       
  1072                         from_unicode_page[ch.row()][ch.cell()] = 0;
       
  1073                         if (!from_unicode_page_multibyte) {
       
  1074                             from_unicode_page_multibyte = new char**[256];
       
  1075                             for (int i=0; i<256; i++)
       
  1076                                 from_unicode_page_multibyte[i]=0;
       
  1077                         }
       
  1078                         if (!from_unicode_page_multibyte[ch.row()]) {
       
  1079                             from_unicode_page_multibyte[ch.row()] = new char*[256];
       
  1080                             for (int i=0; i<256; i++)
       
  1081                                 from_unicode_page_multibyte[ch.row()][i] = 0;
       
  1082                         }
       
  1083                         mb[nmb++] = 0;
       
  1084                         from_unicode_page_multibyte[ch.row()][ch.cell()]
       
  1085                             = qstrdup(mb);
       
  1086                         *mb_unicode = unicode;
       
  1087                     } else {
       
  1088                         from_unicode_page[ch.row()][ch.cell()] = (char)byte;
       
  1089                         if ( to_unicode )
       
  1090                             to_unicode[byte] = unicode;
       
  1091                         else
       
  1092                             to_unicode_multibyte[byte].unicode = unicode;
       
  1093                     }
       
  1094                 } else {
       
  1095                 }
       
  1096             }
       
  1097         }
       
  1098         n = n.stripWhiteSpace();
       
  1099 
       
  1100         unkn = '?'; // ##### Might be a bad choice.
       
  1101     }
       
  1102 
       
  1103     ~QTextCodecFromIOD()
       
  1104     {
       
  1105         if ( from_unicode_page ) {
       
  1106             for (int i=0; i<256; i++)
       
  1107                 if (from_unicode_page[i])
       
  1108                     delete [] from_unicode_page[i];
       
  1109         }
       
  1110         if ( from_unicode_page_multibyte ) {
       
  1111             for (int i=0; i<256; i++)
       
  1112                 if (from_unicode_page_multibyte[i])
       
  1113                     for (int j=0; j<256; j++)
       
  1114                         if (from_unicode_page_multibyte[i][j])
       
  1115                             delete [] from_unicode_page_multibyte[i][j];
       
  1116         }
       
  1117         if ( to_unicode )
       
  1118             delete [] to_unicode;
       
  1119         if ( to_unicode_multibyte )
       
  1120             delete [] to_unicode_multibyte;
       
  1121     }
       
  1122 
       
  1123     bool ok() const
       
  1124     {
       
  1125         return !!from_unicode_page;
       
  1126     }
       
  1127 
       
  1128     QTextDecoder* makeDecoder() const
       
  1129     {
       
  1130         if ( stateless() )
       
  1131             return QTextCodec::makeDecoder();
       
  1132         else
       
  1133             return new QTextCodecFromIODDecoder(this);
       
  1134     }
       
  1135 
       
  1136     const char* name() const
       
  1137     {
       
  1138         return n;
       
  1139     }
       
  1140 
       
  1141     int mibEnum() const
       
  1142     {
       
  1143         return 0; // #### Unknown.
       
  1144     }
       
  1145 
       
  1146     int heuristicContentMatch(const char*, int) const
       
  1147     {
       
  1148         return 0;
       
  1149     }
       
  1150 
       
  1151     int heuristicNameMatch(const char* hint) const
       
  1152     {
       
  1153         int bestr = QTextCodec::heuristicNameMatch(hint);
       
  1154         QStrListIterator it(aliases);
       
  1155         char* a;
       
  1156         while ((a=it.current())) {
       
  1157             ++it;
       
  1158             int r = simpleHeuristicNameMatch(a,hint);
       
  1159             if (r > bestr)
       
  1160                 bestr = r;
       
  1161         }
       
  1162         return bestr;
       
  1163     }
       
  1164 
       
  1165     QString toUnicode(const char* chars, int len) const
       
  1166     {
       
  1167         const uchar* uchars = (const uchar*)chars;
       
  1168         QString result;
       
  1169         QMultiByteUnicodeTable* multibyte=to_unicode_multibyte;
       
  1170         if ( multibyte ) {
       
  1171             while (len--) {
       
  1172                 QMultiByteUnicodeTable& mb = multibyte[*uchars];
       
  1173                 if ( mb.multibyte ) {
       
  1174                     // Chained multi-byte
       
  1175                     multibyte = mb.multibyte;
       
  1176                 } else {
       
  1177                     result += QChar(mb.unicode);
       
  1178                     multibyte=to_unicode_multibyte;
       
  1179                 }
       
  1180                 uchars++;
       
  1181             }
       
  1182         } else {
       
  1183             while (len--)
       
  1184                 result += QChar(to_unicode[*uchars++]);
       
  1185         }
       
  1186         return result;
       
  1187     }
       
  1188 
       
  1189     QCString fromUnicode(const QString& uc, int& lenInOut) const
       
  1190     {
       
  1191         if (lenInOut > (int)uc.length())
       
  1192             lenInOut = uc.length();
       
  1193         int rlen = lenInOut*max_bytes_per_char;
       
  1194         QCString rstr(rlen);
       
  1195         char* cursor = rstr.data();
       
  1196         char* s=0;
       
  1197         int l = lenInOut;
       
  1198         int lout = 0;
       
  1199         for (int i=0; i<l; i++) {
       
  1200             QChar ch = uc[i];
       
  1201             if ( ch == QChar::null ) {
       
  1202                 // special
       
  1203                 *cursor++ = 0;
       
  1204             } else if ( from_unicode_page[ch.row()] &&
       
  1205                 from_unicode_page[ch.row()][ch.cell()] )
       
  1206             {
       
  1207                 *cursor++ = from_unicode_page[ch.row()][ch.cell()];
       
  1208                 lout++;
       
  1209             } else if ( from_unicode_page_multibyte &&
       
  1210                       from_unicode_page_multibyte[ch.row()] &&
       
  1211                       (s=from_unicode_page_multibyte[ch.row()][ch.cell()]) )
       
  1212             {
       
  1213                 while (*s) {
       
  1214                     *cursor++ = *s++;
       
  1215                     lout++;
       
  1216                 }
       
  1217             } else {
       
  1218                 *cursor++ = unkn;
       
  1219                 lout++;
       
  1220             }
       
  1221         }
       
  1222         *cursor = 0;
       
  1223         lenInOut = lout;
       
  1224         return rstr;
       
  1225     }
       
  1226 };
       
  1227 
       
  1228 QTextCodecFromIODDecoder::QTextCodecFromIODDecoder(const QTextCodecFromIOD* c) :
       
  1229     codec(c)
       
  1230 {
       
  1231     mb = codec->to_unicode_multibyte;
       
  1232 }
       
  1233 
       
  1234 QString QTextCodecFromIODDecoder::toUnicode(const char* chars, int len)
       
  1235 {
       
  1236     const uchar* uchars = (const uchar*)chars;
       
  1237     QString result;
       
  1238     while (len--) {
       
  1239         QMultiByteUnicodeTable& t = mb[*uchars];
       
  1240         if ( t.multibyte ) {
       
  1241             // Chained multi-byte
       
  1242             mb = t.multibyte;
       
  1243         } else {
       
  1244             if ( t.unicode )
       
  1245                 result += QChar(t.unicode);
       
  1246             mb=codec->to_unicode_multibyte;
       
  1247         }
       
  1248         uchars++;
       
  1249     }
       
  1250     return result;
       
  1251 }
       
  1252 
       
  1253 /*!
       
  1254   Reads a POSIX2 charmap definition from \a iod.
       
  1255   The parser recognizes the following lines:
       
  1256 <pre>
       
  1257    &lt;code_set_name&gt; <i>name</i>
       
  1258    &lt;escape_char&gt; <i>character</i>
       
  1259    % alias <i>alias</i>
       
  1260    CHARMAP
       
  1261    &lt;<i>token</i>&gt; /x<i>hexbyte</i> &lt;U<i>unicode</i>&gt; ...
       
  1262    &lt;<i>token</i>&gt; /d<i>decbyte</i> &lt;U<i>unicode</i>&gt; ...
       
  1263    &lt;<i>token</i>&gt; /<i>octbyte</i> &lt;U<i>unicode</i>&gt; ...
       
  1264    &lt;<i>token</i>&gt; /<i>any</i>/<i>any</i>... &lt;U<i>unicode</i>&gt; ...
       
  1265    END CHARMAP
       
  1266 </pre>
       
  1267 
       
  1268   The resulting QTextCodec is returned (and also added to the
       
  1269   global list of codecs).  The name() of the result is taken
       
  1270   from the code_set_name.
       
  1271 
       
  1272   Note that a codec constructed in this way uses much more memory
       
  1273   and is slower than a hand-written QTextCodec subclass, since
       
  1274   tables in code are in memory shared by all applications simultaneously
       
  1275   using Qt.
       
  1276 
       
  1277   \sa loadCharmapFile()
       
  1278 */
       
  1279 QTextCodec* QTextCodec::loadCharmap(QIODevice* iod)
       
  1280 {
       
  1281     QTextCodecFromIOD* r = new QTextCodecFromIOD(iod);
       
  1282     if ( !r->ok() ) {
       
  1283         delete r;
       
  1284         r = 0;
       
  1285     }
       
  1286     return r;
       
  1287 }
       
  1288 
       
  1289 /*!
       
  1290   A convenience function for loadCharmap().
       
  1291 */
       
  1292 QTextCodec* QTextCodec::loadCharmapFile(QString filename)
       
  1293 {
       
  1294     QFile f(filename);
       
  1295     if (f.open(IO_ReadOnly)) {
       
  1296         QTextCodecFromIOD* r = new QTextCodecFromIOD(&f);
       
  1297         if ( !r->ok() )
       
  1298             delete r;
       
  1299         else
       
  1300             return r;
       
  1301     }
       
  1302     return 0;
       
  1303 }
       
  1304 #endif //QT_NO_CODECS
       
  1305 
       
  1306 
       
  1307 /*!
       
  1308   Returns a string representing the current language.
       
  1309 */
       
  1310 
       
  1311 const char* QTextCodec::locale()
       
  1312 {
       
  1313     static QCString lang;
       
  1314     if ( lang.isEmpty() ) {
       
  1315         lang = getenv( "LANG" ); //########Windows??
       
  1316         if ( lang.isEmpty() )
       
  1317             lang = "C";
       
  1318     }
       
  1319     return lang;
       
  1320 }
       
  1321 
       
  1322 
       
  1323 
       
  1324 #ifndef QT_NO_CODECS
       
  1325 
       
  1326 class QSimpleTextCodec: public QTextCodec
       
  1327 {
       
  1328 public:
       
  1329     QSimpleTextCodec( int );
       
  1330     ~QSimpleTextCodec();
       
  1331 
       
  1332     QString toUnicode(const char* chars, int len) const;
       
  1333     QCString fromUnicode(const QString& uc, int& lenInOut ) const;
       
  1334 
       
  1335     const char* name() const;
       
  1336     int mibEnum() const;
       
  1337 
       
  1338     int heuristicContentMatch(const char* chars, int len) const;
       
  1339 
       
  1340     int heuristicNameMatch(const char* hint) const;
       
  1341 
       
  1342 private:
       
  1343     int forwardIndex;
       
  1344 };
       
  1345 
       
  1346 
       
  1347 #define LAST_MIB 2259
       
  1348 
       
  1349 static struct {
       
  1350     const char * cs;
       
  1351     int mib;
       
  1352     Q_UINT16 values[128];
       
  1353 } unicodevalues[] = {
       
  1354     // from RFC 1489, ftp://ftp.isi.edu/in-notes/rfc1489.txt
       
  1355     { "KOI8-R", 2084,
       
  1356       { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
       
  1357         0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
       
  1358         0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219/**/, 0x221A, 0x2248,
       
  1359         0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
       
  1360         0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
       
  1361         0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E,
       
  1362         0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
       
  1363         0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9,
       
  1364         0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
       
  1365         0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
       
  1366         0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
       
  1367         0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
       
  1368         0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
       
  1369         0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
       
  1370         0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
       
  1371         0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
       
  1372     // /**/  - The BULLET OPERATOR is confused.  Some people think
       
  1373     //          it should be 0x2022 (BULLET).
       
  1374 
       
  1375     // from RFC 2319, ftp://ftp.isi.edu/in-notes/rfc2319.txt
       
  1376     { "KOI8-U", 2088,
       
  1377       { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
       
  1378 	0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
       
  1379 	0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,
       
  1380 	0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
       
  1381 	0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
       
  1382 	0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E,
       
  1383 	0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
       
  1384 	0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9,
       
  1385 	0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
       
  1386 	0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
       
  1387 	0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
       
  1388 	0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
       
  1389 	0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
       
  1390 	0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
       
  1391 	0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
       
  1392 	0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
       
  1393 
       
  1394     // next bits generated from tables on the Unicode 2.0 CD.  we can
       
  1395     // use these tables since this is part of the transition to using
       
  1396     // unicode everywhere in qt.
       
  1397 
       
  1398     // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo 0x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; for a in 8859-* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ; cat /tmp/digits ) | sort | uniq -w4 | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
       
  1399 
       
  1400     // then I inserted the files manually.
       
  1401     { "ISO 8859-2", 5,
       
  1402         { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
       
  1403               0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
       
  1404               0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
       
  1405               0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
       
  1406               0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
       
  1407               0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
       
  1408               0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
       
  1409               0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
       
  1410               0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
       
  1411               0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
       
  1412               0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
       
  1413               0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
       
  1414               0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
       
  1415               0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
       
  1416               0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
       
  1417               0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },
       
  1418     { "ISO 8859-3", 6,
       
  1419         { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
       
  1420               0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
       
  1421               0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
       
  1422               0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
       
  1423               0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7,
       
  1424               0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B,
       
  1425               0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7,
       
  1426               0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C,
       
  1427               0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7,
       
  1428               0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
       
  1429               0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7,
       
  1430               0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF,
       
  1431               0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7,
       
  1432               0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
       
  1433               0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7,
       
  1434               0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9} },
       
  1435     { "ISO 8859-4", 7,
       
  1436         { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
       
  1437               0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
       
  1438               0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
       
  1439               0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
       
  1440               0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7,
       
  1441               0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF,
       
  1442               0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7,
       
  1443               0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B,
       
  1444               0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
       
  1445               0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A,
       
  1446               0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
       
  1447               0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF,
       
  1448               0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
       
  1449               0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B,
       
  1450               0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
       
  1451               0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9} },
       
  1452     { "ISO 8859-5", 8,
       
  1453         { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
       
  1454               0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
       
  1455               0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
       
  1456               0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
       
  1457               0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
       
  1458               0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F,
       
  1459               0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
       
  1460               0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
       
  1461               0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
       
  1462               0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
       
  1463               0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
       
  1464               0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
       
  1465               0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
       
  1466               0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
       
  1467               0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
       
  1468               0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F} },
       
  1469     { "ISO 8859-6-I", 82,
       
  1470         { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
       
  1471               0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
       
  1472               0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
       
  1473               0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
       
  1474               0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1475               0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD,
       
  1476               0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1477               0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F,
       
  1478               0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
       
  1479               0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
       
  1480               0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
       
  1481               0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1482               0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
       
  1483               0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,
       
  1484               0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1485               0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
       
  1486     { "ISO 8859-7", 10,
       
  1487         { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
       
  1488               0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
       
  1489               0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
       
  1490               0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
       
  1491               0x00A0, 0x2018, 0x2019, 0x00A3, 0xFFFD, 0xFFFD, 0x00A6, 0x00A7,
       
  1492               0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015,
       
  1493               0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7,
       
  1494               0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
       
  1495               0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
       
  1496               0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
       
  1497               0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
       
  1498               0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
       
  1499               0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
       
  1500               0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
       
  1501               0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
       
  1502               0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },
       
  1503     { "ISO 8859-8-I", 85,
       
  1504         { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
       
  1505               0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
       
  1506               0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
       
  1507               0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
       
  1508               0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
       
  1509               0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x203E,
       
  1510               0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
       
  1511               0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD,
       
  1512               0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1513               0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1514               0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1515               0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017,
       
  1516               0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
       
  1517               0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
       
  1518               0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
       
  1519               0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
       
  1520     { "ISO 8859-9", 12,
       
  1521         { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
       
  1522               0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
       
  1523               0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
       
  1524               0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
       
  1525               0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
       
  1526               0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
       
  1527               0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
       
  1528               0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
       
  1529               0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
       
  1530               0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
       
  1531               0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
       
  1532               0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
       
  1533               0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
       
  1534               0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
       
  1535               0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
       
  1536               0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },
       
  1537     { "ISO 8859-10", 13,
       
  1538         { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
       
  1539               0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
       
  1540               0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
       
  1541               0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
       
  1542               0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7,
       
  1543               0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A,
       
  1544               0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7,
       
  1545               0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B,
       
  1546               0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
       
  1547               0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF,
       
  1548               0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168,
       
  1549               0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
       
  1550               0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
       
  1551               0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,
       
  1552               0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169,
       
  1553               0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138} },
       
  1554     { "ISO 8859-13", 109,
       
  1555         { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
       
  1556               0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
       
  1557               0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
       
  1558               0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
       
  1559               0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7,
       
  1560               0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
       
  1561               0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7,
       
  1562               0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
       
  1563               0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
       
  1564               0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
       
  1565               0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
       
  1566               0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
       
  1567               0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
       
  1568               0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
       
  1569               0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
       
  1570               0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019} },
       
  1571     { "ISO 8859-14", 110,
       
  1572         { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
       
  1573               0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
       
  1574               0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
       
  1575               0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
       
  1576               0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7,
       
  1577               0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178,
       
  1578               0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56,
       
  1579               0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61,
       
  1580               0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
       
  1581               0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
       
  1582               0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A,
       
  1583               0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF,
       
  1584               0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
       
  1585               0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
       
  1586               0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B,
       
  1587               0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} },
       
  1588     { "ISO 8859-15", 111,
       
  1589         { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
       
  1590               0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
       
  1591               0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
       
  1592               0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
       
  1593               0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7,
       
  1594               0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
       
  1595               0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7,
       
  1596               0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF,
       
  1597               0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
       
  1598               0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
       
  1599               0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
       
  1600               0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
       
  1601               0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
       
  1602               0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
       
  1603               0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
       
  1604               0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
       
  1605 
       
  1606     // next bits generated again from tables on the Unicode 3.0 CD.
       
  1607 
       
  1608     // $ for a in CP* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ) | sort | sed -e 's/#UNDEF.*$/0xFFFD/' | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
       
  1609 
       
  1610     { "CP 874", 0, //### what is the mib?
       
  1611         { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD,
       
  1612               0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1613               0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
       
  1614               0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1615               0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
       
  1616               0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
       
  1617               0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
       
  1618               0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
       
  1619               0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
       
  1620               0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
       
  1621               0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
       
  1622               0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
       
  1623               0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
       
  1624               0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
       
  1625               0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
       
  1626               0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
       
  1627     { "CP 1250", 2250,
       
  1628         { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
       
  1629               0xFFFD, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179,
       
  1630               0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
       
  1631               0xFFFD, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A,
       
  1632               0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7,
       
  1633               0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B,
       
  1634               0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
       
  1635               0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C,
       
  1636               0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
       
  1637               0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
       
  1638               0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
       
  1639               0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
       
  1640               0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
       
  1641               0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
       
  1642               0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
       
  1643               0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },
       
  1644     { "CP 1251", 2251,
       
  1645         { 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021,
       
  1646               0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F,
       
  1647               0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
       
  1648               0xFFFD, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F,
       
  1649               0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7,
       
  1650               0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407,
       
  1651               0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7,
       
  1652               0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457,
       
  1653               0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
       
  1654               0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
       
  1655               0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
       
  1656               0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
       
  1657               0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
       
  1658               0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
       
  1659               0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
       
  1660               0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F} },
       
  1661     { "CP 1252", 2252,
       
  1662         { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
       
  1663               0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
       
  1664               0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
       
  1665               0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178,
       
  1666               0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
       
  1667               0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
       
  1668               0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
       
  1669               0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
       
  1670               0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
       
  1671               0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
       
  1672               0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
       
  1673               0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
       
  1674               0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
       
  1675               0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
       
  1676               0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
       
  1677               0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
       
  1678     { "CP 1253", 2253,
       
  1679         { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
       
  1680               0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1681               0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
       
  1682               0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1683               0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
       
  1684               0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015,
       
  1685               0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7,
       
  1686               0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
       
  1687               0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
       
  1688               0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
       
  1689               0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
       
  1690               0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
       
  1691               0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
       
  1692               0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
       
  1693               0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
       
  1694               0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },
       
  1695     { "CP 1254", 2254,
       
  1696         { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
       
  1697               0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1698               0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
       
  1699               0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
       
  1700               0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
       
  1701               0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
       
  1702               0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
       
  1703               0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
       
  1704               0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
       
  1705               0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
       
  1706               0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
       
  1707               0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
       
  1708               0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
       
  1709               0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
       
  1710               0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
       
  1711               0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },
       
  1712     { "CP 1255", 2255,
       
  1713         { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
       
  1714               0x02C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1715               0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
       
  1716               0x02DC, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1717               0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7,
       
  1718               0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
       
  1719               0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
       
  1720               0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
       
  1721               0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7,
       
  1722               0x05B8, 0x05B9, 0xFFFD, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
       
  1723               0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3,
       
  1724               0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1725               0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
       
  1726               0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
       
  1727               0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
       
  1728               0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD} },
       
  1729     { "CP 1256", 2256,
       
  1730         { 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
       
  1731               0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688,
       
  1732               0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
       
  1733               0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA,
       
  1734               0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
       
  1735               0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
       
  1736               0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
       
  1737               0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F,
       
  1738               0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
       
  1739               0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
       
  1740               0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7,
       
  1741               0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643,
       
  1742               0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7,
       
  1743               0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF,
       
  1744               0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7,
       
  1745               0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2} },
       
  1746     { "CP 1257", 2257,
       
  1747         { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
       
  1748               0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0x00A8, 0x02C7, 0x00B8,
       
  1749               0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
       
  1750               0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0x00AF, 0x02DB, 0xFFFD,
       
  1751               0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7,
       
  1752               0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
       
  1753               0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
       
  1754               0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
       
  1755               0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
       
  1756               0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
       
  1757               0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
       
  1758               0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
       
  1759               0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
       
  1760               0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
       
  1761               0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
       
  1762               0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9} },
       
  1763     { "CP 1258", 2258,
       
  1764         { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
       
  1765               0x02C6, 0x2030, 0xFFFD, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1766               0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
       
  1767               0x02DC, 0x2122, 0xFFFD, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
       
  1768               0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
       
  1769               0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
       
  1770               0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
       
  1771               0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
       
  1772               0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
       
  1773               0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF,
       
  1774               0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7,
       
  1775               0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF,
       
  1776               0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
       
  1777               0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF,
       
  1778               0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7,
       
  1779               0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF} },
       
  1780 
       
  1781     // this one is generated from the charmap file located in /usr/share/i18n/charmaps
       
  1782     // on most Linux distributions. The thai character set tis620 is byte by byte equivalent
       
  1783     // to iso8859-11, so we name it 8859-11 here, but recognise the name tis620 too.
       
  1784 
       
  1785     // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; ( cut -c25- < TIS-620 ; cat /tmp/digits ) | awk '/^x[89ABCDEF]/{ print $1, $2 }' | sed -e 's/<U/0x/' -e 's/>//' | sort | uniq -w4 | cut -c5- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/tis-620
       
  1786     { "ISO 8859-11", 2259, // Thai character set mib enum taken from tis620 (which is byte by byte equivalent)
       
  1787         { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1788               0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1789               0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1790               0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
       
  1791               0xFFFD, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
       
  1792               0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
       
  1793               0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
       
  1794               0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
       
  1795               0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
       
  1796               0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
       
  1797               0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
       
  1798               0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
       
  1799               0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
       
  1800               0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
       
  1801               0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
       
  1802               0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
       
  1803 
       
  1804     // change LAST_MIB if you add more, and edit unicodevalues in
       
  1805     // kernel/qpsprinter.cpp too.
       
  1806 };
       
  1807 
       
  1808 
       
  1809 static const QSimpleTextCodec * reverseOwner = 0;
       
  1810 static QArray<char> * reverseMap = 0;
       
  1811 
       
  1812 
       
  1813 QSimpleTextCodec::QSimpleTextCodec( int i )
       
  1814     : QTextCodec(), forwardIndex( i )
       
  1815 {
       
  1816 }
       
  1817 
       
  1818 
       
  1819 QSimpleTextCodec::~QSimpleTextCodec()
       
  1820 {
       
  1821     if ( reverseOwner == this ) {
       
  1822         delete reverseMap;
       
  1823         reverseMap = 0;
       
  1824         reverseOwner = 0;
       
  1825     }
       
  1826 }
       
  1827 
       
  1828 // what happens if strlen(chars)<len?  what happens if !chars?  if len<1?
       
  1829 QString QSimpleTextCodec::toUnicode(const char* chars, int len) const
       
  1830 {
       
  1831     if(len <= 0)
       
  1832         return QString::null;
       
  1833 
       
  1834     int clen = qstrlen(chars);
       
  1835     len = QMIN(len, clen); // Note: NUL ends string
       
  1836 
       
  1837     QString r;
       
  1838     r.setUnicode(0, len);
       
  1839     QChar* uc = (QChar*)r.unicode(); // const_cast
       
  1840     const unsigned char * c = (const unsigned char *)chars;
       
  1841     for( int i=0; i<len; i++ ) {
       
  1842         if ( c[i] > 127 )
       
  1843             uc[i] = unicodevalues[forwardIndex].values[c[i]-128];
       
  1844         else
       
  1845             uc[i] = c[i];
       
  1846     }
       
  1847     return r;
       
  1848 }
       
  1849 
       
  1850 
       
  1851 QCString QSimpleTextCodec::fromUnicode(const QString& uc, int& len ) const
       
  1852 {
       
  1853     if ( reverseOwner != this ) {
       
  1854         int m = 0;
       
  1855         int i = 0;
       
  1856         while( i < 128 ) {
       
  1857             if ( unicodevalues[forwardIndex].values[i] > m &&
       
  1858                  unicodevalues[forwardIndex].values[i] < 0xfffd )
       
  1859                 m = unicodevalues[forwardIndex].values[i];
       
  1860             i++;
       
  1861         }
       
  1862         m++;
       
  1863         if ( !reverseMap )
       
  1864             reverseMap = new QArray<char>( m );
       
  1865         if ( m > (int)(reverseMap->size()) )
       
  1866             reverseMap->resize( m );
       
  1867         for( i = 0; i < 128 && i < m; i++ )
       
  1868             (*reverseMap)[i] = (char)i;
       
  1869         for( ;i < m; i++ )
       
  1870             (*reverseMap)[i] = '?';
       
  1871         for( i=128; i<256; i++ ) {
       
  1872             int u = unicodevalues[forwardIndex].values[i-128];
       
  1873             if ( u < m )
       
  1874                 (*reverseMap)[u] = (char)(unsigned char)(i);
       
  1875         }
       
  1876         reverseOwner = this;
       
  1877     }
       
  1878     if ( len <0 || len > (int)uc.length() )
       
  1879         len = uc.length();
       
  1880     QCString r( len+1 );
       
  1881     int i = len;
       
  1882     int u;
       
  1883     const QChar* ucp = uc.unicode();
       
  1884     char* rp = r.data();
       
  1885     char* rmp = reverseMap->data();
       
  1886     int rmsize = (int) reverseMap->size();
       
  1887     while( i-- )
       
  1888     {
       
  1889         u = ucp->unicode();
       
  1890         *rp++ = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : '?' );
       
  1891         ucp++;
       
  1892     }
       
  1893     r[len] = 0;
       
  1894     return r;
       
  1895 }
       
  1896 
       
  1897 
       
  1898 const char* QSimpleTextCodec::name() const
       
  1899 {
       
  1900     return unicodevalues[forwardIndex].cs;
       
  1901 }
       
  1902 
       
  1903 
       
  1904 int QSimpleTextCodec::mibEnum() const
       
  1905 {
       
  1906     return unicodevalues[forwardIndex].mib;
       
  1907 }
       
  1908 
       
  1909 int QSimpleTextCodec::heuristicNameMatch(const char* hint) const
       
  1910 {
       
  1911     if ( hint[0]=='k' ) {
       
  1912         // Help people with messy fonts
       
  1913         if ( QCString(hint) == "koi8-1" )
       
  1914             return QTextCodec::heuristicNameMatch("koi8-r")-1;
       
  1915         if ( QCString(hint) == "koi8-ru" )
       
  1916             return QTextCodec::heuristicNameMatch("koi8-r")-1;
       
  1917     } else if ( hint[0] == 't' && QCString(name()) == "ISO 8859-11" ) {
       
  1918 	// 8859-11 and tis620 are byte by bute equivalent
       
  1919 	int i = simpleHeuristicNameMatch("tis620-0", hint);
       
  1920 	if( !i )
       
  1921 	    i = simpleHeuristicNameMatch("tis-620", hint);
       
  1922 	if( i ) return i;
       
  1923     }
       
  1924     return QTextCodec::heuristicNameMatch(hint);
       
  1925 }
       
  1926 
       
  1927 int QSimpleTextCodec::heuristicContentMatch(const char* chars, int len) const
       
  1928 {
       
  1929     if ( len<1 || !chars )
       
  1930         return -1;
       
  1931     int i = 0;
       
  1932     const uchar * c = (const unsigned char *)chars;
       
  1933     int r = 0;
       
  1934     while( i<len && c && *c ) {
       
  1935         if ( *c >= 128 ) {
       
  1936             if ( unicodevalues[forwardIndex].values[(*c)-128] == 0xfffd )
       
  1937                 return -1;
       
  1938         }
       
  1939         if ( (*c >= ' ' && *c < 127) ||
       
  1940              *c == '\n' || *c == '\t' || *c == '\r' )
       
  1941             r++;
       
  1942         i++;
       
  1943         c++;
       
  1944     }
       
  1945     if ( mibEnum()==4 )
       
  1946         r+=1;
       
  1947     return r;
       
  1948 }
       
  1949 
       
  1950 
       
  1951 #endif // QT_NO_CODECS
       
  1952 
       
  1953 class QLatin1Codec: public QTextCodec
       
  1954 {
       
  1955 public:
       
  1956     QLatin1Codec();
       
  1957     ~QLatin1Codec();
       
  1958 
       
  1959     QString toUnicode(const char* chars, int len) const;
       
  1960     QCString fromUnicode(const QString& uc, int& lenInOut ) const;
       
  1961 
       
  1962     const char* name() const;
       
  1963     int mibEnum() const;
       
  1964 
       
  1965     int heuristicContentMatch(const char* chars, int len) const;
       
  1966 
       
  1967     int heuristicNameMatch(const char* hint) const;
       
  1968 
       
  1969 private:
       
  1970     int forwardIndex;
       
  1971 };
       
  1972 
       
  1973 
       
  1974 QLatin1Codec::QLatin1Codec()
       
  1975     : QTextCodec()
       
  1976 {
       
  1977 }
       
  1978 
       
  1979 
       
  1980 QLatin1Codec::~QLatin1Codec()
       
  1981 {
       
  1982 }
       
  1983 
       
  1984 // what happens if strlen(chars)<len?  what happens if !chars?  if len<1?
       
  1985 QString QLatin1Codec::toUnicode(const char* chars, int len) const
       
  1986 {
       
  1987     if(len <= 0)
       
  1988         return QString::null;
       
  1989 
       
  1990     return QString::fromLatin1(chars, len);
       
  1991 }
       
  1992 
       
  1993 
       
  1994 QCString QLatin1Codec::fromUnicode(const QString& uc, int& len ) const
       
  1995 {
       
  1996     if ( len <0 || len > (int)uc.length() )
       
  1997         len = uc.length();
       
  1998     QCString r( len+1 );
       
  1999     int i = 0;
       
  2000     const QChar *ch = uc.unicode();
       
  2001     while ( i < len ) {
       
  2002 	r[i] = ch->row() ? '?' : ch->cell();
       
  2003 	i++;
       
  2004 	ch++;
       
  2005     }
       
  2006     r[len] = 0;
       
  2007     return r;
       
  2008 }
       
  2009 
       
  2010 
       
  2011 const char* QLatin1Codec::name() const
       
  2012 {
       
  2013     return "ISO 8859-1";
       
  2014 }
       
  2015 
       
  2016 
       
  2017 int QLatin1Codec::mibEnum() const
       
  2018 {
       
  2019     return 4;
       
  2020 }
       
  2021 
       
  2022 int QLatin1Codec::heuristicNameMatch(const char* hint) const
       
  2023 {
       
  2024     return QTextCodec::heuristicNameMatch(hint);
       
  2025 }
       
  2026 
       
  2027 int QLatin1Codec::heuristicContentMatch(const char* chars, int len) const
       
  2028 {
       
  2029     if ( len<1 || !chars )
       
  2030         return -1;
       
  2031     int i = 0;
       
  2032     const uchar * c = (const unsigned char *)chars;
       
  2033     int r = 0;
       
  2034     while( i<len && c && *c ) {
       
  2035         if ( *c >= 0x80 && *c < 0xa0 )
       
  2036             return -1;
       
  2037         if ( (*c >= ' ' && *c < 127) ||
       
  2038              *c == '\n' || *c == '\t' || *c == '\r' )
       
  2039             r++;
       
  2040         i++;
       
  2041         c++;
       
  2042     }
       
  2043     return r;
       
  2044 }
       
  2045 
       
  2046 
       
  2047 static void setupBuiltinCodecs()
       
  2048 {
       
  2049     (void)new QLatin1Codec;
       
  2050 
       
  2051 #ifndef QT_NO_CODECS
       
  2052     int i = 0;
       
  2053     do {
       
  2054         (void)new QSimpleTextCodec( i );
       
  2055     } while( unicodevalues[i++].mib != LAST_MIB );
       
  2056 
       
  2057     (void)new QEucJpCodec;
       
  2058     (void)new QSjisCodec;
       
  2059     (void)new QJisCodec;
       
  2060     (void)new QEucKrCodec;
       
  2061     (void)new QGbkCodec;
       
  2062     (void)new QBig5Codec;
       
  2063     (void)new QUtf8Codec;
       
  2064     (void)new QUtf16Codec;
       
  2065     (void)new QHebrewCodec;
       
  2066     (void)new QArabicCodec;
       
  2067     (void)new QTsciiCodec;
       
  2068 #endif // QT_NO_CODECS
       
  2069 }
       
  2070 
       
  2071 #endif // QT_NO_TEXTCODEC