src/corelib/codecs/qisciicodec.cpp
changeset 0 1918ee327afb
child 4 3b1da2848fc7
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     4 ** All rights reserved.
       
     5 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     6 **
       
     7 ** This file is part of the QtCore module of the Qt Toolkit.
       
     8 **
       
     9 ** $QT_BEGIN_LICENSE:LGPL$
       
    10 ** No Commercial Usage
       
    11 ** This file contains pre-release code and may not be distributed.
       
    12 ** You may use this file in accordance with the terms and conditions
       
    13 ** contained in the Technology Preview License Agreement accompanying
       
    14 ** this package.
       
    15 **
       
    16 ** GNU Lesser General Public License Usage
       
    17 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    18 ** General Public License version 2.1 as published by the Free Software
       
    19 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    20 ** packaging of this file.  Please review the following information to
       
    21 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    23 **
       
    24 ** In addition, as a special exception, Nokia gives you certain additional
       
    25 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    27 **
       
    28 ** If you have questions regarding the use of this file, please contact
       
    29 ** Nokia at qt-info@nokia.com.
       
    30 **
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 ** $QT_END_LICENSE$
       
    39 **
       
    40 ****************************************************************************/
       
    41 #include "qisciicodec_p.h"
       
    42 #include "qlist.h"
       
    43 
       
    44 #ifndef QT_NO_CODECS
       
    45 
       
    46 QT_BEGIN_NAMESPACE
       
    47 
       
    48 /*!
       
    49     \class QIsciiCodec
       
    50     \brief The QIsciiCodec class provides conversion to and from the ISCII encoding.
       
    51 
       
    52     \internal
       
    53 */
       
    54 
       
    55 
       
    56 struct Codecs {
       
    57     const char name[10];
       
    58     ushort base;
       
    59 };
       
    60 
       
    61 static const Codecs codecs [] = {
       
    62     { "Iscii-Dev", 0x900 },
       
    63     { "Iscii-Bng", 0x980 },
       
    64     { "Iscii-Pnj", 0xa00 },
       
    65     { "Iscii-Gjr", 0xa80 },
       
    66     { "Iscii-Ori", 0xb00 },
       
    67     { "Iscii-Tml", 0xb80 },
       
    68     { "Iscii-Tlg", 0xc00 },
       
    69     { "Iscii-Knd", 0xc80 },
       
    70     { "Iscii-Mlm", 0xd00 }
       
    71 };
       
    72 
       
    73 QIsciiCodec::~QIsciiCodec()
       
    74 {
       
    75 }
       
    76 
       
    77 QByteArray QIsciiCodec::name() const
       
    78 {
       
    79   return codecs[idx].name;
       
    80 }
       
    81 
       
    82 int QIsciiCodec::mibEnum() const
       
    83 {
       
    84     /* There is no MIBEnum for Iscii */
       
    85     return -3000-idx;
       
    86 }
       
    87 
       
    88 static const uchar inv = 0xFF;
       
    89 
       
    90 /* iscii range from 0xa0 - 0xff */
       
    91 static const uchar iscii_to_uni_table[0x60] = {
       
    92     0x00, 0x01, 0x02, 0x03,
       
    93     0x05, 0x06, 0x07, 0x08,
       
    94     0x09, 0x0a, 0x0b, 0x0e,
       
    95     0x0f, 0x20, 0x0d, 0x12,
       
    96 
       
    97     0x13, 0x14, 0x11, 0x15,
       
    98     0x16, 0x17, 0x18, 0x19,
       
    99     0x1a, 0x1b, 0x1c, 0x1d,
       
   100     0x1e, 0x1f, 0x20, 0x21,
       
   101 
       
   102     0x22, 0x23, 0x24, 0x25,
       
   103     0x26, 0x27, 0x28, 0x29,
       
   104     0x2a, 0x2b, 0x2c, 0x2d,
       
   105     0x2e, 0x2f, 0x5f, 0x30,
       
   106 
       
   107     0x31, 0x32, 0x33, 0x34,
       
   108     0x35, 0x36, 0x37, 0x38,
       
   109     0x39,  inv, 0x3e, 0x3f,
       
   110     0x40, 0x41, 0x42, 0x43,
       
   111 
       
   112     0x46, 0x47, 0x48, 0x45,
       
   113     0x4a, 0x4b, 0x4c, 0x49,
       
   114     0x4d, 0x3c, 0x64, 0x00,
       
   115     0x00, 0x00, 0x00, 0x00,
       
   116 
       
   117     0x00, 0x66, 0x67, 0x68,
       
   118     0x69, 0x6a, 0x6b, 0x6c,
       
   119     0x6d, 0x6e, 0x6f, 0x00,
       
   120     0x00, 0x00, 0x00, 0x00
       
   121 };
       
   122 
       
   123 static const uchar uni_to_iscii_table[0x80] = {
       
   124     0x00, 0xa1, 0xa2, 0xa3,
       
   125     0x00, 0xa4, 0xa5, 0xa6,
       
   126     0xa7, 0xa8, 0xa9, 0xaa,
       
   127     0x00, 0xae, 0xab, 0xac,
       
   128 
       
   129     0xad, 0xb2, 0xaf, 0xb0,
       
   130     0xb1, 0xb3, 0xb4, 0xb5,
       
   131     0xb6, 0xb7, 0xb8, 0xb9,
       
   132     0xba, 0xbb, 0xbc, 0xbd,
       
   133 
       
   134     0xbe, 0xbf, 0xc0, 0xc1,
       
   135     0xc2, 0xc3, 0xc4, 0xc5,
       
   136     0xc6, 0xc7, 0xc8, 0xc9,
       
   137     0xca, 0xcb, 0xcc, 0xcd,
       
   138 
       
   139     0xcf, 0xd0, 0xd1, 0xd2,
       
   140     0xd3, 0xd4, 0xd5, 0xd6,
       
   141     0xd7, 0xd8, 0x00, 0x00,
       
   142     0xe9, 0x00, 0xda, 0xdb,
       
   143 
       
   144     0xdc, 0xdd, 0xde, 0xdf,
       
   145     0x00, 0xe3, 0xe0, 0xe1,
       
   146     0xe2, 0xe7, 0xe4, 0xe5,
       
   147     0xe6, 0xe8, 0x00, 0x00,
       
   148 
       
   149     0x00, 0x00, 0x00, 0x00,
       
   150     0x00, 0x00, 0x00, 0x00,
       
   151     0x01, 0x02, 0x03, 0x04, // decomposable into the uc codes listed here + nukta
       
   152     0x05, 0x06, 0x07, 0xce,
       
   153 
       
   154     0x00, 0x00, 0x00, 0x00,
       
   155     0xea, 0x08, 0xf1, 0xf2,
       
   156     0xf3, 0xf4, 0xf5, 0xf6,
       
   157     0xf7, 0xf8, 0xf9, 0xfa,
       
   158 
       
   159     0x00, 0x00, 0x00, 0x00,
       
   160     0x00, 0x00, 0x00, 0x00,
       
   161     0x00, 0x00, 0x00, 0x00,
       
   162     0x00, 0x00, 0x00, 0x00
       
   163 };
       
   164 
       
   165 static const uchar uni_to_iscii_pairs[] = {
       
   166     0x00, 0x00,
       
   167     0x15, 0x3c, // 0x958
       
   168     0x16, 0x3c, // 0x959
       
   169     0x17, 0x3c, // 0x95a
       
   170     0x1c, 0x3c, // 0x95b
       
   171     0x21, 0x3c, // 0x95c
       
   172     0x22, 0x3c, // 0x95d
       
   173     0x2b, 0x3c, // 0x95e
       
   174     0x64, 0x64  // 0x965
       
   175 };
       
   176 
       
   177 
       
   178 QByteArray QIsciiCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
       
   179 {
       
   180     char replacement = '?';
       
   181     bool halant = false;
       
   182     if (state) {
       
   183         if (state->flags & ConvertInvalidToNull)
       
   184             replacement = 0;
       
   185         halant = state->state_data[0];
       
   186     }
       
   187     int invalid = 0;
       
   188 
       
   189     QByteArray result(2 * len, Qt::Uninitialized); //worst case
       
   190 
       
   191     uchar *ch = reinterpret_cast<uchar *>(result.data());
       
   192 
       
   193     const int base = codecs[idx].base;
       
   194 
       
   195     for (int i =0; i < len; ++i) {
       
   196         const ushort codePoint = uc[i].unicode();
       
   197 
       
   198         /* The low 7 bits of ISCII is plain ASCII. However, we go all the
       
   199          * way up to 0xA0 such that we can roundtrip with convertToUnicode()'s
       
   200          * behavior. */
       
   201         if(codePoint < 0xA0) {
       
   202             *ch++ = static_cast<uchar>(codePoint);
       
   203             continue;
       
   204         }
       
   205 
       
   206         const int pos = codePoint - base;
       
   207         if (pos > 0 && pos < 0x80) {
       
   208             uchar iscii = uni_to_iscii_table[pos];
       
   209             if (iscii > 0x80) {
       
   210                 *ch++ = iscii;
       
   211             } else if (iscii) {
       
   212                 const uchar *pair = uni_to_iscii_pairs + 2*iscii;
       
   213                 *ch++ = *pair++;
       
   214                 *ch++ = *pair++;
       
   215             } else {
       
   216                 *ch++ = replacement;
       
   217                 ++invalid;
       
   218             }
       
   219         } else {
       
   220             if (uc[i].unicode() == 0x200c) { // ZWNJ
       
   221                 if (halant)
       
   222                     // Consonant Halant ZWNJ -> Consonant Halant Halant
       
   223                     *ch++ = 0xe8;
       
   224             } else if (uc[i].unicode() == 0x200d) { // ZWJ
       
   225                 if (halant)
       
   226                     // Consonant Halant ZWJ -> Consonant Halant Nukta
       
   227                     *ch++ = 0xe9;
       
   228             } else {
       
   229                 *ch++ = replacement;
       
   230                 ++invalid;
       
   231             }
       
   232         }
       
   233         halant = (pos == 0x4d);
       
   234     }
       
   235     result.truncate(ch - (uchar *)result.data());
       
   236 
       
   237     if (state) {
       
   238         state->invalidChars += invalid;
       
   239         state->state_data[0] = halant;
       
   240     }
       
   241     return result;
       
   242 }
       
   243 
       
   244 QString QIsciiCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const
       
   245 {
       
   246     bool halant = false;
       
   247     if (state) {
       
   248         halant = state->state_data[0];
       
   249     }
       
   250 
       
   251     QString result(len, Qt::Uninitialized);
       
   252     QChar *uc = result.data();
       
   253 
       
   254     const int base = codecs[idx].base;
       
   255 
       
   256     for (int i = 0; i < len; ++i) {
       
   257         ushort ch = (uchar) chars[i];
       
   258         if (ch < 0xa0)
       
   259             *uc++ = ch;
       
   260         else {
       
   261             ushort c = iscii_to_uni_table[ch - 0xa0];
       
   262             if (halant && (c == inv || c == 0xe9)) {
       
   263                 // Consonant Halant inv -> Consonant Halant ZWJ
       
   264                 // Consonant Halant Nukta -> Consonant Halant ZWJ
       
   265                 *uc++ = QChar(0x200d);
       
   266             } else if (halant && c == 0xe8) {
       
   267                 // Consonant Halant Halant -> Consonant Halant ZWNJ
       
   268                 *uc++ = QChar(0x200c);
       
   269             } else {
       
   270                 *uc++ = QChar(c+base);
       
   271             }
       
   272         }
       
   273         halant = ((uchar)chars[i] == 0xe8);
       
   274     }
       
   275     result.resize(uc - result.unicode());
       
   276 
       
   277     if (state) {
       
   278         state->state_data[0] = halant;
       
   279     }
       
   280     return result;
       
   281 }
       
   282 
       
   283 QT_END_NAMESPACE
       
   284 
       
   285 #endif // QT_NO_CODECS