|         |      1 /**************************************************************************** | 
|         |      2 ** | 
|         |      3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). | 
|         |      4 ** All rights reserved. | 
|         |      5 ** Contact: Nokia Corporation (qt-info@nokia.com) | 
|         |      6 ** | 
|         |      7 ** This file is part of the QtCore module of the Qt Toolkit. | 
|         |      8 ** | 
|         |      9 ** $QT_BEGIN_LICENSE:LGPL$ | 
|         |     10 ** No Commercial Usage | 
|         |     11 ** This file contains pre-release code and may not be distributed. | 
|         |     12 ** You may use this file in accordance with the terms and conditions | 
|         |     13 ** contained in the Technology Preview License Agreement accompanying | 
|         |     14 ** this package. | 
|         |     15 ** | 
|         |     16 ** GNU Lesser General Public License Usage | 
|         |     17 ** Alternatively, this file may be used under the terms of the GNU Lesser | 
|         |     18 ** General Public License version 2.1 as published by the Free Software | 
|         |     19 ** Foundation and appearing in the file LICENSE.LGPL included in the | 
|         |     20 ** packaging of this file.  Please review the following information to | 
|         |     21 ** ensure the GNU Lesser General Public License version 2.1 requirements | 
|         |     22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. | 
|         |     23 ** | 
|         |     24 ** In addition, as a special exception, Nokia gives you certain additional | 
|         |     25 ** rights.  These rights are described in the Nokia Qt LGPL Exception | 
|         |     26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. | 
|         |     27 ** | 
|         |     28 ** If you have questions regarding the use of this file, please contact | 
|         |     29 ** Nokia at qt-info@nokia.com. | 
|         |     30 ** | 
|         |     31 ** | 
|         |     32 ** | 
|         |     33 ** | 
|         |     34 ** | 
|         |     35 ** | 
|         |     36 ** | 
|         |     37 ** | 
|         |     38 ** $QT_END_LICENSE$ | 
|         |     39 ** | 
|         |     40 ****************************************************************************/ | 
|         |     41 #include "qisciicodec_p.h" | 
|         |     42 #include "qlist.h" | 
|         |     43  | 
|         |     44 #ifndef QT_NO_CODECS | 
|         |     45  | 
|         |     46 QT_BEGIN_NAMESPACE | 
|         |     47  | 
|         |     48 /*! | 
|         |     49     \class QIsciiCodec | 
|         |     50     \brief The QIsciiCodec class provides conversion to and from the ISCII encoding. | 
|         |     51  | 
|         |     52     \internal | 
|         |     53 */ | 
|         |     54  | 
|         |     55  | 
|         |     56 struct Codecs { | 
|         |     57     const char name[10]; | 
|         |     58     ushort base; | 
|         |     59 }; | 
|         |     60  | 
|         |     61 static const Codecs codecs [] = { | 
|         |     62     { "Iscii-Dev", 0x900 }, | 
|         |     63     { "Iscii-Bng", 0x980 }, | 
|         |     64     { "Iscii-Pnj", 0xa00 }, | 
|         |     65     { "Iscii-Gjr", 0xa80 }, | 
|         |     66     { "Iscii-Ori", 0xb00 }, | 
|         |     67     { "Iscii-Tml", 0xb80 }, | 
|         |     68     { "Iscii-Tlg", 0xc00 }, | 
|         |     69     { "Iscii-Knd", 0xc80 }, | 
|         |     70     { "Iscii-Mlm", 0xd00 } | 
|         |     71 }; | 
|         |     72  | 
|         |     73 QIsciiCodec::~QIsciiCodec() | 
|         |     74 { | 
|         |     75 } | 
|         |     76  | 
|         |     77 QByteArray QIsciiCodec::name() const | 
|         |     78 { | 
|         |     79   return codecs[idx].name; | 
|         |     80 } | 
|         |     81  | 
|         |     82 int QIsciiCodec::mibEnum() const | 
|         |     83 { | 
|         |     84     /* There is no MIBEnum for Iscii */ | 
|         |     85     return -3000-idx; | 
|         |     86 } | 
|         |     87  | 
|         |     88 static const uchar inv = 0xFF; | 
|         |     89  | 
|         |     90 /* iscii range from 0xa0 - 0xff */ | 
|         |     91 static const uchar iscii_to_uni_table[0x60] = { | 
|         |     92     0x00, 0x01, 0x02, 0x03, | 
|         |     93     0x05, 0x06, 0x07, 0x08, | 
|         |     94     0x09, 0x0a, 0x0b, 0x0e, | 
|         |     95     0x0f, 0x20, 0x0d, 0x12, | 
|         |     96  | 
|         |     97     0x13, 0x14, 0x11, 0x15, | 
|         |     98     0x16, 0x17, 0x18, 0x19, | 
|         |     99     0x1a, 0x1b, 0x1c, 0x1d, | 
|         |    100     0x1e, 0x1f, 0x20, 0x21, | 
|         |    101  | 
|         |    102     0x22, 0x23, 0x24, 0x25, | 
|         |    103     0x26, 0x27, 0x28, 0x29, | 
|         |    104     0x2a, 0x2b, 0x2c, 0x2d, | 
|         |    105     0x2e, 0x2f, 0x5f, 0x30, | 
|         |    106  | 
|         |    107     0x31, 0x32, 0x33, 0x34, | 
|         |    108     0x35, 0x36, 0x37, 0x38, | 
|         |    109     0x39,  inv, 0x3e, 0x3f, | 
|         |    110     0x40, 0x41, 0x42, 0x43, | 
|         |    111  | 
|         |    112     0x46, 0x47, 0x48, 0x45, | 
|         |    113     0x4a, 0x4b, 0x4c, 0x49, | 
|         |    114     0x4d, 0x3c, 0x64, 0x00, | 
|         |    115     0x00, 0x00, 0x00, 0x00, | 
|         |    116  | 
|         |    117     0x00, 0x66, 0x67, 0x68, | 
|         |    118     0x69, 0x6a, 0x6b, 0x6c, | 
|         |    119     0x6d, 0x6e, 0x6f, 0x00, | 
|         |    120     0x00, 0x00, 0x00, 0x00 | 
|         |    121 }; | 
|         |    122  | 
|         |    123 static const uchar uni_to_iscii_table[0x80] = { | 
|         |    124     0x00, 0xa1, 0xa2, 0xa3, | 
|         |    125     0x00, 0xa4, 0xa5, 0xa6, | 
|         |    126     0xa7, 0xa8, 0xa9, 0xaa, | 
|         |    127     0x00, 0xae, 0xab, 0xac, | 
|         |    128  | 
|         |    129     0xad, 0xb2, 0xaf, 0xb0, | 
|         |    130     0xb1, 0xb3, 0xb4, 0xb5, | 
|         |    131     0xb6, 0xb7, 0xb8, 0xb9, | 
|         |    132     0xba, 0xbb, 0xbc, 0xbd, | 
|         |    133  | 
|         |    134     0xbe, 0xbf, 0xc0, 0xc1, | 
|         |    135     0xc2, 0xc3, 0xc4, 0xc5, | 
|         |    136     0xc6, 0xc7, 0xc8, 0xc9, | 
|         |    137     0xca, 0xcb, 0xcc, 0xcd, | 
|         |    138  | 
|         |    139     0xcf, 0xd0, 0xd1, 0xd2, | 
|         |    140     0xd3, 0xd4, 0xd5, 0xd6, | 
|         |    141     0xd7, 0xd8, 0x00, 0x00, | 
|         |    142     0xe9, 0x00, 0xda, 0xdb, | 
|         |    143  | 
|         |    144     0xdc, 0xdd, 0xde, 0xdf, | 
|         |    145     0x00, 0xe3, 0xe0, 0xe1, | 
|         |    146     0xe2, 0xe7, 0xe4, 0xe5, | 
|         |    147     0xe6, 0xe8, 0x00, 0x00, | 
|         |    148  | 
|         |    149     0x00, 0x00, 0x00, 0x00, | 
|         |    150     0x00, 0x00, 0x00, 0x00, | 
|         |    151     0x01, 0x02, 0x03, 0x04, // decomposable into the uc codes listed here + nukta | 
|         |    152     0x05, 0x06, 0x07, 0xce, | 
|         |    153  | 
|         |    154     0x00, 0x00, 0x00, 0x00, | 
|         |    155     0xea, 0x08, 0xf1, 0xf2, | 
|         |    156     0xf3, 0xf4, 0xf5, 0xf6, | 
|         |    157     0xf7, 0xf8, 0xf9, 0xfa, | 
|         |    158  | 
|         |    159     0x00, 0x00, 0x00, 0x00, | 
|         |    160     0x00, 0x00, 0x00, 0x00, | 
|         |    161     0x00, 0x00, 0x00, 0x00, | 
|         |    162     0x00, 0x00, 0x00, 0x00 | 
|         |    163 }; | 
|         |    164  | 
|         |    165 static const uchar uni_to_iscii_pairs[] = { | 
|         |    166     0x00, 0x00, | 
|         |    167     0x15, 0x3c, // 0x958 | 
|         |    168     0x16, 0x3c, // 0x959 | 
|         |    169     0x17, 0x3c, // 0x95a | 
|         |    170     0x1c, 0x3c, // 0x95b | 
|         |    171     0x21, 0x3c, // 0x95c | 
|         |    172     0x22, 0x3c, // 0x95d | 
|         |    173     0x2b, 0x3c, // 0x95e | 
|         |    174     0x64, 0x64  // 0x965 | 
|         |    175 }; | 
|         |    176  | 
|         |    177  | 
|         |    178 QByteArray QIsciiCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const | 
|         |    179 { | 
|         |    180     char replacement = '?'; | 
|         |    181     bool halant = false; | 
|         |    182     if (state) { | 
|         |    183         if (state->flags & ConvertInvalidToNull) | 
|         |    184             replacement = 0; | 
|         |    185         halant = state->state_data[0]; | 
|         |    186     } | 
|         |    187     int invalid = 0; | 
|         |    188  | 
|         |    189     QByteArray result(2 * len, Qt::Uninitialized); //worst case | 
|         |    190  | 
|         |    191     uchar *ch = reinterpret_cast<uchar *>(result.data()); | 
|         |    192  | 
|         |    193     const int base = codecs[idx].base; | 
|         |    194  | 
|         |    195     for (int i =0; i < len; ++i) { | 
|         |    196         const ushort codePoint = uc[i].unicode(); | 
|         |    197  | 
|         |    198         /* The low 7 bits of ISCII is plain ASCII. However, we go all the | 
|         |    199          * way up to 0xA0 such that we can roundtrip with convertToUnicode()'s | 
|         |    200          * behavior. */ | 
|         |    201         if(codePoint < 0xA0) { | 
|         |    202             *ch++ = static_cast<uchar>(codePoint); | 
|         |    203             continue; | 
|         |    204         } | 
|         |    205  | 
|         |    206         const int pos = codePoint - base; | 
|         |    207         if (pos > 0 && pos < 0x80) { | 
|         |    208             uchar iscii = uni_to_iscii_table[pos]; | 
|         |    209             if (iscii > 0x80) { | 
|         |    210                 *ch++ = iscii; | 
|         |    211             } else if (iscii) { | 
|         |    212                 const uchar *pair = uni_to_iscii_pairs + 2*iscii; | 
|         |    213                 *ch++ = *pair++; | 
|         |    214                 *ch++ = *pair++; | 
|         |    215             } else { | 
|         |    216                 *ch++ = replacement; | 
|         |    217                 ++invalid; | 
|         |    218             } | 
|         |    219         } else { | 
|         |    220             if (uc[i].unicode() == 0x200c) { // ZWNJ | 
|         |    221                 if (halant) | 
|         |    222                     // Consonant Halant ZWNJ -> Consonant Halant Halant | 
|         |    223                     *ch++ = 0xe8; | 
|         |    224             } else if (uc[i].unicode() == 0x200d) { // ZWJ | 
|         |    225                 if (halant) | 
|         |    226                     // Consonant Halant ZWJ -> Consonant Halant Nukta | 
|         |    227                     *ch++ = 0xe9; | 
|         |    228             } else { | 
|         |    229                 *ch++ = replacement; | 
|         |    230                 ++invalid; | 
|         |    231             } | 
|         |    232         } | 
|         |    233         halant = (pos == 0x4d); | 
|         |    234     } | 
|         |    235     result.truncate(ch - (uchar *)result.data()); | 
|         |    236  | 
|         |    237     if (state) { | 
|         |    238         state->invalidChars += invalid; | 
|         |    239         state->state_data[0] = halant; | 
|         |    240     } | 
|         |    241     return result; | 
|         |    242 } | 
|         |    243  | 
|         |    244 QString QIsciiCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const | 
|         |    245 { | 
|         |    246     bool halant = false; | 
|         |    247     if (state) { | 
|         |    248         halant = state->state_data[0]; | 
|         |    249     } | 
|         |    250  | 
|         |    251     QString result(len, Qt::Uninitialized); | 
|         |    252     QChar *uc = result.data(); | 
|         |    253  | 
|         |    254     const int base = codecs[idx].base; | 
|         |    255  | 
|         |    256     for (int i = 0; i < len; ++i) { | 
|         |    257         ushort ch = (uchar) chars[i]; | 
|         |    258         if (ch < 0xa0) | 
|         |    259             *uc++ = ch; | 
|         |    260         else { | 
|         |    261             ushort c = iscii_to_uni_table[ch - 0xa0]; | 
|         |    262             if (halant && (c == inv || c == 0xe9)) { | 
|         |    263                 // Consonant Halant inv -> Consonant Halant ZWJ | 
|         |    264                 // Consonant Halant Nukta -> Consonant Halant ZWJ | 
|         |    265                 *uc++ = QChar(0x200d); | 
|         |    266             } else if (halant && c == 0xe8) { | 
|         |    267                 // Consonant Halant Halant -> Consonant Halant ZWNJ | 
|         |    268                 *uc++ = QChar(0x200c); | 
|         |    269             } else { | 
|         |    270                 *uc++ = QChar(c+base); | 
|         |    271             } | 
|         |    272         } | 
|         |    273         halant = ((uchar)chars[i] == 0xe8); | 
|         |    274     } | 
|         |    275     result.resize(uc - result.unicode()); | 
|         |    276  | 
|         |    277     if (state) { | 
|         |    278         state->state_data[0] = halant; | 
|         |    279     } | 
|         |    280     return result; | 
|         |    281 } | 
|         |    282  | 
|         |    283 QT_END_NAMESPACE | 
|         |    284  | 
|         |    285 #endif // QT_NO_CODECS |