src/plugins/codecs/jp/qjiscodec.cpp
changeset 0 1918ee327afb
child 4 3b1da2848fc7
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     4 ** All rights reserved.
       
     5 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     6 **
       
     7 ** This file is part of the plugins of the Qt Toolkit.
       
     8 **
       
     9 ** $QT_BEGIN_LICENSE:LGPL$
       
    10 ** No Commercial Usage
       
    11 ** This file contains pre-release code and may not be distributed.
       
    12 ** You may use this file in accordance with the terms and conditions
       
    13 ** contained in the Technology Preview License Agreement accompanying
       
    14 ** this package.
       
    15 **
       
    16 ** GNU Lesser General Public License Usage
       
    17 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    18 ** General Public License version 2.1 as published by the Free Software
       
    19 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    20 ** packaging of this file.  Please review the following information to
       
    21 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    23 **
       
    24 ** In addition, as a special exception, Nokia gives you certain additional
       
    25 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    27 **
       
    28 ** If you have questions regarding the use of this file, please contact
       
    29 ** Nokia at qt-info@nokia.com.
       
    30 **
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 ** $QT_END_LICENSE$
       
    39 **
       
    40 ****************************************************************************/
       
    41 
       
    42 // Most of the code here was originally written by Serika Kurusugawa,
       
    43 // a.k.a. Junji Takagi, and is included in Qt with the author's permission
       
    44 // and the grateful thanks of the Qt team.
       
    45 
       
    46 /*! \class QJisCodec
       
    47     \reentrant
       
    48     \internal
       
    49 */
       
    50 
       
    51 #include "qjiscodec.h"
       
    52 #include "qlist.h"
       
    53 
       
    54 QT_BEGIN_NAMESPACE
       
    55 
       
    56 #ifndef QT_NO_TEXTCODEC
       
    57 enum {
       
    58     Esc = 0x1b,
       
    59     So = 0x0e,         // Shift Out
       
    60     Si = 0x0f,         // Shift In
       
    61 
       
    62     ReverseSolidus = 0x5c,
       
    63     YenSign = 0x5c,
       
    64     Tilde = 0x7e,
       
    65     Overline = 0x7e
       
    66 };
       
    67 
       
    68 #define        IsKana(c)        (((c) >= 0xa1) && ((c) <= 0xdf))
       
    69 #define        IsJisChar(c)        (((c) >= 0x21) && ((c) <= 0x7e))
       
    70 
       
    71 #define        QValidChar(u)        ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter))
       
    72 
       
    73 enum Iso2022State{ Ascii, MinState = Ascii,
       
    74                    JISX0201_Latin, JISX0201_Kana,
       
    75                    JISX0208_1978, JISX0208_1983,
       
    76                    JISX0212, MaxState = JISX0212,
       
    77                    UnknownState };
       
    78 
       
    79 static const char Esc_CHARS[] = "()*+-./";
       
    80 
       
    81 static const char Esc_Ascii[]                 = {Esc, '(', 'B', 0 };
       
    82 static const char Esc_JISX0201_Latin[]        = {Esc, '(', 'J', 0 };
       
    83 static const char Esc_JISX0201_Kana[]        = {Esc, '(', 'I', 0 };
       
    84 static const char Esc_JISX0208_1978[]        = {Esc, '$', '@', 0 };
       
    85 static const char Esc_JISX0208_1983[]        = {Esc, '$', 'B', 0 };
       
    86 static const char Esc_JISX0212[]        = {Esc, '$', '(', 'D', 0 };
       
    87 static const char * const Esc_SEQ[] = { Esc_Ascii,
       
    88                                         Esc_JISX0201_Latin,
       
    89                                         Esc_JISX0201_Kana,
       
    90                                         Esc_JISX0208_1978,
       
    91                                         Esc_JISX0208_1983,
       
    92                                         Esc_JISX0212 };
       
    93 
       
    94 /*! \internal */
       
    95 QJisCodec::QJisCodec() : conv(QJpUnicodeConv::newConverter(QJpUnicodeConv::Default))
       
    96 {
       
    97 }
       
    98 
       
    99 
       
   100 /*! \internal */
       
   101 QJisCodec::~QJisCodec()
       
   102 {
       
   103     delete (QJpUnicodeConv*)conv;
       
   104     conv = 0;
       
   105 }
       
   106 
       
   107 QByteArray QJisCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *cs) const
       
   108 {
       
   109     char replacement = '?';
       
   110     if (cs) {
       
   111         if (cs->flags & ConvertInvalidToNull)
       
   112             replacement = 0;
       
   113     }
       
   114     int invalid = 0;
       
   115 
       
   116     QByteArray result;
       
   117     Iso2022State state = Ascii;
       
   118     Iso2022State prev = Ascii;
       
   119     for (int i = 0; i < len; i++) {
       
   120         QChar ch = uc[i];
       
   121         uint j;
       
   122         if (ch.row() == 0x00 && ch.cell() < 0x80) {
       
   123             // Ascii
       
   124             if (state != JISX0201_Latin ||
       
   125                 ch.cell() == ReverseSolidus || ch.cell() == Tilde) {
       
   126                 state = Ascii;
       
   127             }
       
   128             j = ch.cell();
       
   129         } else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) {
       
   130             if (j < 0x80) {
       
   131                 // JIS X 0201 Latin
       
   132                 if (state != Ascii ||
       
   133                     ch.cell() == YenSign || ch.cell() == Overline) {
       
   134                     state = JISX0201_Latin;
       
   135                 }
       
   136             } else {
       
   137                 // JIS X 0201 Kana
       
   138                 state = JISX0201_Kana;
       
   139                 j &= 0x7f;
       
   140             }
       
   141         } else if ((j = conv->unicodeToJisx0208(ch.row(), ch.cell())) != 0) {
       
   142             // JIS X 0208
       
   143             state = JISX0208_1983;
       
   144         } else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) {
       
   145             // JIS X 0212
       
   146             state = JISX0212;
       
   147         } else {
       
   148             // Invalid
       
   149             state = UnknownState;
       
   150             j = replacement;
       
   151             ++invalid;
       
   152         }
       
   153         if (state != prev) {
       
   154             if (state == UnknownState) {
       
   155                 result += Esc_Ascii;
       
   156             } else {
       
   157                 result += Esc_SEQ[state - MinState];
       
   158             }
       
   159             prev = state;
       
   160         }
       
   161         if (j < 0x0100) {
       
   162             result += j & 0xff;
       
   163         } else {
       
   164             result += (j >> 8) & 0xff;
       
   165             result += j & 0xff;
       
   166         }
       
   167     }
       
   168     if (prev != Ascii) {
       
   169         result += Esc_Ascii;
       
   170     }
       
   171 
       
   172     if (cs) {
       
   173         cs->invalidChars += invalid;
       
   174     }
       
   175     return result;
       
   176 }
       
   177 
       
   178 QString QJisCodec::convertToUnicode(const char* chars, int len, ConverterState *cs) const
       
   179 {
       
   180     uchar buf[4] = {0, 0, 0, 0};
       
   181     int nbuf = 0;
       
   182     Iso2022State state = Ascii, prev = Ascii;
       
   183     bool esc = false;
       
   184     QChar replacement = QChar::ReplacementCharacter;
       
   185     if (cs) {
       
   186         if (cs->flags & ConvertInvalidToNull)
       
   187             replacement = QChar::Null;
       
   188         nbuf = cs->remainingChars;
       
   189         buf[0] = (cs->state_data[0] >> 24) & 0xff;
       
   190         buf[1] = (cs->state_data[0] >> 16) & 0xff;
       
   191         buf[2] = (cs->state_data[0] >>  8) & 0xff;
       
   192         buf[3] = (cs->state_data[0] >>  0) & 0xff;
       
   193         state = (Iso2022State)((cs->state_data[1] >>  0) & 0xff);
       
   194         prev = (Iso2022State)((cs->state_data[1] >>  8) & 0xff);
       
   195         esc = cs->state_data[2];
       
   196     }
       
   197     int invalid = 0;
       
   198 
       
   199     QString result;
       
   200     for (int i=0; i<len; i++) {
       
   201         uchar ch = chars[i];
       
   202         if (esc) {
       
   203             // Escape sequence
       
   204             state = UnknownState;
       
   205             switch (nbuf) {
       
   206             case 0:
       
   207                 if (ch == '$' || strchr(Esc_CHARS, ch)) {
       
   208                     buf[nbuf++] = ch;
       
   209                 } else {
       
   210                     nbuf = 0;
       
   211                     esc = false;
       
   212                 }
       
   213                 break;
       
   214             case 1:
       
   215                 if (buf[0] == '$') {
       
   216                     if (strchr(Esc_CHARS, ch)) {
       
   217                         buf[nbuf++] = ch;
       
   218                     } else {
       
   219                         switch (ch) {
       
   220                         case '@':
       
   221                             state = JISX0208_1978;        // Esc $ @
       
   222                             break;
       
   223                         case 'B':
       
   224                             state = JISX0208_1983;        // Esc $ B
       
   225                             break;
       
   226                         }
       
   227                         nbuf = 0;
       
   228                         esc = false;
       
   229                     }
       
   230                 } else {
       
   231                     if (buf[0] == '(') {
       
   232                         switch (ch) {
       
   233                         case 'B':
       
   234                             state = Ascii;        // Esc (B
       
   235                             break;
       
   236                         case 'I':
       
   237                             state = JISX0201_Kana;        // Esc (I
       
   238                             break;
       
   239                         case 'J':
       
   240                             state = JISX0201_Latin;        // Esc (J
       
   241                             break;
       
   242                         }
       
   243                     }
       
   244                     nbuf = 0;
       
   245                     esc = false;
       
   246                 }
       
   247                 break;
       
   248             case 2:
       
   249                 if (buf[1] == '(') {
       
   250                     switch (ch) {
       
   251                     case 'D':
       
   252                         state = JISX0212;        // Esc $ (D
       
   253                         break;
       
   254                     }
       
   255                 }
       
   256                 nbuf = 0;
       
   257                 esc = false;
       
   258                 break;
       
   259             }
       
   260         } else {
       
   261             if (ch == Esc) {
       
   262                 // Escape sequence
       
   263                 nbuf = 0;
       
   264                 esc = true;
       
   265             } else if (ch == So) {
       
   266                 // Shift out
       
   267                 prev = state;
       
   268                 state = JISX0201_Kana;
       
   269                 nbuf = 0;
       
   270             } else if (ch == Si) {
       
   271                 // Shift in
       
   272                 if (prev == Ascii || prev == JISX0201_Latin) {
       
   273                     state = prev;
       
   274                 } else {
       
   275                     state = Ascii;
       
   276                 }
       
   277                 nbuf = 0;
       
   278             } else {
       
   279                 uint u;
       
   280                 switch (nbuf) {
       
   281                 case 0:
       
   282                     switch (state) {
       
   283                     case Ascii:
       
   284                         if (ch < 0x80) {
       
   285                             result += QLatin1Char(ch);
       
   286                             break;
       
   287                         }
       
   288                         /* fall through */
       
   289                     case JISX0201_Latin:
       
   290                         u = conv->jisx0201ToUnicode(ch);
       
   291                         result += QValidChar(u);
       
   292                         break;
       
   293                     case JISX0201_Kana:
       
   294                         u = conv->jisx0201ToUnicode(ch | 0x80);
       
   295                         result += QValidChar(u);
       
   296                         break;
       
   297                     case JISX0208_1978:
       
   298                     case JISX0208_1983:
       
   299                     case JISX0212:
       
   300                         buf[nbuf++] = ch;
       
   301                         break;
       
   302                     default:
       
   303                         result += QChar::ReplacementCharacter;
       
   304                         break;
       
   305                     }
       
   306                     break;
       
   307                 case 1:
       
   308                     switch (state) {
       
   309                     case JISX0208_1978:
       
   310                     case JISX0208_1983:
       
   311                         u = conv->jisx0208ToUnicode(buf[0] & 0x7f, ch & 0x7f);
       
   312                         result += QValidChar(u);
       
   313                         break;
       
   314                     case JISX0212:
       
   315                         u = conv->jisx0212ToUnicode(buf[0] & 0x7f, ch & 0x7f);
       
   316                         result += QValidChar(u);
       
   317                         break;
       
   318                     default:
       
   319                         result += replacement;
       
   320                         ++invalid;
       
   321                         break;
       
   322                     }
       
   323                     nbuf = 0;
       
   324                     break;
       
   325                 }
       
   326             }
       
   327         }
       
   328     }
       
   329 
       
   330     if (cs) {
       
   331         cs->remainingChars = nbuf;
       
   332         cs->invalidChars += invalid;
       
   333         cs->state_data[0] = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3];
       
   334         cs->state_data[1] = (prev << 8) + state;
       
   335         cs->state_data[2] = esc;
       
   336     }
       
   337 
       
   338     return result;
       
   339 }
       
   340 
       
   341 
       
   342 
       
   343 /*! \internal */
       
   344 int QJisCodec::_mibEnum()
       
   345 {
       
   346     return 39;
       
   347 }
       
   348 
       
   349 /*! \internal */
       
   350 QByteArray QJisCodec::_name()
       
   351 {
       
   352     return "ISO-2022-JP";
       
   353 }
       
   354 
       
   355 /*!
       
   356     Returns the codec's mime name.
       
   357 */
       
   358 QList<QByteArray> QJisCodec::_aliases()
       
   359 {
       
   360     QList<QByteArray> list;
       
   361     list << "JIS7"; // Qt 3 compat
       
   362     return list;
       
   363 }
       
   364 
       
   365 #endif // QT_NO_TEXTCODEC
       
   366 
       
   367 QT_END_NAMESPACE