src/plugins/codecs/jp/qjiscodec.cpp
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Fri, 22 Jan 2010 10:32:13 +0200 (2010-01-22)
changeset 1 ae9c8dab0e3e
parent 0 1918ee327afb
child 4 3b1da2848fc7
permissions -rw-r--r--
Revision: 201001 Kit: 201003
/****************************************************************************
**
** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
** All rights reserved.
** Contact: Nokia Corporation (qt-info@nokia.com)
**
** This file is part of the plugins of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** No Commercial Usage
** This file contains pre-release code and may not be distributed.
** You may use this file in accordance with the terms and conditions
** contained in the Technology Preview License Agreement accompanying
** this package.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL included in the
** packaging of this file.  Please review the following information to
** ensure the GNU Lesser General Public License version 2.1 requirements
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Nokia gives you certain additional
** rights.  These rights are described in the Nokia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** If you have questions regarding the use of this file, please contact
** Nokia at qt-info@nokia.com.
**
**
**
**
**
**
**
**
** $QT_END_LICENSE$
**
****************************************************************************/

// Most of the code here was originally written by Serika Kurusugawa,
// a.k.a. Junji Takagi, and is included in Qt with the author's permission
// and the grateful thanks of the Qt team.

/*! \class QJisCodec
    \reentrant
    \internal
*/

#include "qjiscodec.h"
#include "qlist.h"

QT_BEGIN_NAMESPACE

#ifndef QT_NO_TEXTCODEC
enum {
    Esc = 0x1b,
    So = 0x0e,         // Shift Out
    Si = 0x0f,         // Shift In

    ReverseSolidus = 0x5c,
    YenSign = 0x5c,
    Tilde = 0x7e,
    Overline = 0x7e
};

#define        IsKana(c)        (((c) >= 0xa1) && ((c) <= 0xdf))
#define        IsJisChar(c)        (((c) >= 0x21) && ((c) <= 0x7e))

#define        QValidChar(u)        ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter))

enum Iso2022State{ Ascii, MinState = Ascii,
                   JISX0201_Latin, JISX0201_Kana,
                   JISX0208_1978, JISX0208_1983,
                   JISX0212, MaxState = JISX0212,
                   UnknownState };

static const char Esc_CHARS[] = "()*+-./";

static const char Esc_Ascii[]                 = {Esc, '(', 'B', 0 };
static const char Esc_JISX0201_Latin[]        = {Esc, '(', 'J', 0 };
static const char Esc_JISX0201_Kana[]        = {Esc, '(', 'I', 0 };
static const char Esc_JISX0208_1978[]        = {Esc, '$', '@', 0 };
static const char Esc_JISX0208_1983[]        = {Esc, '$', 'B', 0 };
static const char Esc_JISX0212[]        = {Esc, '$', '(', 'D', 0 };
static const char * const Esc_SEQ[] = { Esc_Ascii,
                                        Esc_JISX0201_Latin,
                                        Esc_JISX0201_Kana,
                                        Esc_JISX0208_1978,
                                        Esc_JISX0208_1983,
                                        Esc_JISX0212 };

/*! \internal */
QJisCodec::QJisCodec() : conv(QJpUnicodeConv::newConverter(QJpUnicodeConv::Default))
{
}


/*! \internal */
QJisCodec::~QJisCodec()
{
    delete (QJpUnicodeConv*)conv;
    conv = 0;
}

QByteArray QJisCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *cs) const
{
    char replacement = '?';
    if (cs) {
        if (cs->flags & ConvertInvalidToNull)
            replacement = 0;
    }
    int invalid = 0;

    QByteArray result;
    Iso2022State state = Ascii;
    Iso2022State prev = Ascii;
    for (int i = 0; i < len; i++) {
        QChar ch = uc[i];
        uint j;
        if (ch.row() == 0x00 && ch.cell() < 0x80) {
            // Ascii
            if (state != JISX0201_Latin ||
                ch.cell() == ReverseSolidus || ch.cell() == Tilde) {
                state = Ascii;
            }
            j = ch.cell();
        } else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) {
            if (j < 0x80) {
                // JIS X 0201 Latin
                if (state != Ascii ||
                    ch.cell() == YenSign || ch.cell() == Overline) {
                    state = JISX0201_Latin;
                }
            } else {
                // JIS X 0201 Kana
                state = JISX0201_Kana;
                j &= 0x7f;
            }
        } else if ((j = conv->unicodeToJisx0208(ch.row(), ch.cell())) != 0) {
            // JIS X 0208
            state = JISX0208_1983;
        } else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) {
            // JIS X 0212
            state = JISX0212;
        } else {
            // Invalid
            state = UnknownState;
            j = replacement;
            ++invalid;
        }
        if (state != prev) {
            if (state == UnknownState) {
                result += Esc_Ascii;
            } else {
                result += Esc_SEQ[state - MinState];
            }
            prev = state;
        }
        if (j < 0x0100) {
            result += j & 0xff;
        } else {
            result += (j >> 8) & 0xff;
            result += j & 0xff;
        }
    }
    if (prev != Ascii) {
        result += Esc_Ascii;
    }

    if (cs) {
        cs->invalidChars += invalid;
    }
    return result;
}

QString QJisCodec::convertToUnicode(const char* chars, int len, ConverterState *cs) const
{
    uchar buf[4] = {0, 0, 0, 0};
    int nbuf = 0;
    Iso2022State state = Ascii, prev = Ascii;
    bool esc = false;
    QChar replacement = QChar::ReplacementCharacter;
    if (cs) {
        if (cs->flags & ConvertInvalidToNull)
            replacement = QChar::Null;
        nbuf = cs->remainingChars;
        buf[0] = (cs->state_data[0] >> 24) & 0xff;
        buf[1] = (cs->state_data[0] >> 16) & 0xff;
        buf[2] = (cs->state_data[0] >>  8) & 0xff;
        buf[3] = (cs->state_data[0] >>  0) & 0xff;
        state = (Iso2022State)((cs->state_data[1] >>  0) & 0xff);
        prev = (Iso2022State)((cs->state_data[1] >>  8) & 0xff);
        esc = cs->state_data[2];
    }
    int invalid = 0;

    QString result;
    for (int i=0; i<len; i++) {
        uchar ch = chars[i];
        if (esc) {
            // Escape sequence
            state = UnknownState;
            switch (nbuf) {
            case 0:
                if (ch == '$' || strchr(Esc_CHARS, ch)) {
                    buf[nbuf++] = ch;
                } else {
                    nbuf = 0;
                    esc = false;
                }
                break;
            case 1:
                if (buf[0] == '$') {
                    if (strchr(Esc_CHARS, ch)) {
                        buf[nbuf++] = ch;
                    } else {
                        switch (ch) {
                        case '@':
                            state = JISX0208_1978;        // Esc $ @
                            break;
                        case 'B':
                            state = JISX0208_1983;        // Esc $ B
                            break;
                        }
                        nbuf = 0;
                        esc = false;
                    }
                } else {
                    if (buf[0] == '(') {
                        switch (ch) {
                        case 'B':
                            state = Ascii;        // Esc (B
                            break;
                        case 'I':
                            state = JISX0201_Kana;        // Esc (I
                            break;
                        case 'J':
                            state = JISX0201_Latin;        // Esc (J
                            break;
                        }
                    }
                    nbuf = 0;
                    esc = false;
                }
                break;
            case 2:
                if (buf[1] == '(') {
                    switch (ch) {
                    case 'D':
                        state = JISX0212;        // Esc $ (D
                        break;
                    }
                }
                nbuf = 0;
                esc = false;
                break;
            }
        } else {
            if (ch == Esc) {
                // Escape sequence
                nbuf = 0;
                esc = true;
            } else if (ch == So) {
                // Shift out
                prev = state;
                state = JISX0201_Kana;
                nbuf = 0;
            } else if (ch == Si) {
                // Shift in
                if (prev == Ascii || prev == JISX0201_Latin) {
                    state = prev;
                } else {
                    state = Ascii;
                }
                nbuf = 0;
            } else {
                uint u;
                switch (nbuf) {
                case 0:
                    switch (state) {
                    case Ascii:
                        if (ch < 0x80) {
                            result += QLatin1Char(ch);
                            break;
                        }
                        /* fall through */
                    case JISX0201_Latin:
                        u = conv->jisx0201ToUnicode(ch);
                        result += QValidChar(u);
                        break;
                    case JISX0201_Kana:
                        u = conv->jisx0201ToUnicode(ch | 0x80);
                        result += QValidChar(u);
                        break;
                    case JISX0208_1978:
                    case JISX0208_1983:
                    case JISX0212:
                        buf[nbuf++] = ch;
                        break;
                    default:
                        result += QChar::ReplacementCharacter;
                        break;
                    }
                    break;
                case 1:
                    switch (state) {
                    case JISX0208_1978:
                    case JISX0208_1983:
                        u = conv->jisx0208ToUnicode(buf[0] & 0x7f, ch & 0x7f);
                        result += QValidChar(u);
                        break;
                    case JISX0212:
                        u = conv->jisx0212ToUnicode(buf[0] & 0x7f, ch & 0x7f);
                        result += QValidChar(u);
                        break;
                    default:
                        result += replacement;
                        ++invalid;
                        break;
                    }
                    nbuf = 0;
                    break;
                }
            }
        }
    }

    if (cs) {
        cs->remainingChars = nbuf;
        cs->invalidChars += invalid;
        cs->state_data[0] = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3];
        cs->state_data[1] = (prev << 8) + state;
        cs->state_data[2] = esc;
    }

    return result;
}



/*! \internal */
int QJisCodec::_mibEnum()
{
    return 39;
}

/*! \internal */
QByteArray QJisCodec::_name()
{
    return "ISO-2022-JP";
}

/*!
    Returns the codec's mime name.
*/
QList<QByteArray> QJisCodec::_aliases()
{
    QList<QByteArray> list;
    list << "JIS7"; // Qt 3 compat
    return list;
}

#endif // QT_NO_TEXTCODEC

QT_END_NAMESPACE