util/unicode/codecs/big5/main.cpp
changeset 0 1918ee327afb
child 4 3b1da2848fc7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/util/unicode/codecs/big5/main.cpp	Mon Jan 11 14:00:40 2010 +0000
@@ -0,0 +1,158 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#include <qtextcodec.h>
+#include <qbytearray.h>
+#include <qstring.h>
+#include <qdebug.h>
+#include <qfile.h>
+#include <qcoreapplication.h>
+#include <qset.h>
+
+struct Map { Map(uint u,  uint b) : uc(u),  b5(b) {} uint uc; uint b5; };
+
+int main(int argc, char **argv)
+{
+    QCoreApplication app(argc, argv);
+    QTextCodec *big5 = QTextCodec::codecForName("Big5-hkscs");
+
+#if 0
+    QFile f("/home/lars/dev/qt-4.0/util/unicode/data/big5-eten.txt");
+    f.open(QFile::ReadOnly);
+
+    while (!f.atEnd()) {
+        QByteArray line = f.readLine();
+        if (line.startsWith("#"))
+            continue;
+        line.replace("0x", "");
+        line.replace("U+", "");
+        line.replace("\t", " ");
+        line = line.simplified();
+        QList<QByteArray> split = line.split(' ');
+        bool ok;
+        int b5 = split.at(0).toInt(&ok, 16);
+        Q_ASSERT(ok);
+        int uc = split.at(1).toInt(&ok, 16);
+        Q_ASSERT(ok);
+        if (b5 < 0x100)
+            continue;
+#else
+    QFile f(":/BIG5");
+    f.open(QFile::ReadOnly);
+
+    while (!f.atEnd()) {
+        QByteArray line = f.readLine();
+        if (line.startsWith("CHARMAP"))
+            break;
+    }
+    QSet<uint> b5_ok;
+    QSet<uint> uc_ok;
+    QList<Map> b5_to_uc_map;
+    QList<Map> uc_to_b5_map;
+    while (!f.atEnd()) {
+        QByteArray line = f.readLine();
+        if (line.startsWith("%"))
+            continue;
+        if (line.startsWith("END CHARMAP"))
+            break;
+        line.replace("/x", "");
+        line.replace("<U", "");
+        line.replace(">", "");
+        line.replace("\t", " ");
+        line = line.simplified();
+        QList<QByteArray> split = line.split(' ');
+        bool ok;
+        int b5 = split.at(1).toInt(&ok, 16);
+        Q_ASSERT(ok);
+        int uc = split.at(0).toInt(&ok, 16);
+        Q_ASSERT(ok);
+        if (b5 < 0x100 || uc > 0xffff)
+            continue;
+#endif
+
+//         qDebug() << hex << "testing: '" << b5 << "' - '" << uc << "'";
+        QByteArray ba;
+
+        ba += (char)(uchar)(b5 >> 8);
+        ba += (char)(uchar)(b5 & 0xff);
+
+        QString s = big5->toUnicode(ba);
+        Q_ASSERT(s.length() == 1);
+        QString s2;
+        s2 = QChar(uc);
+        ba = big5->fromUnicode(s2);
+        Q_ASSERT(ba.length() <= 2);
+        int round;
+        if (ba.length() == 1)
+            round = (int)(uchar)ba[0];
+        else
+            round = ((int)(uchar)ba[0] << 8) + (int)(uchar)ba[1];
+        if (b5 != round)
+            uc_to_b5_map += Map(uc, b5);
+        else
+            b5_ok.insert(b5);
+
+        if (s[0].unicode() != uc)
+            b5_to_uc_map += Map(uc, b5);
+        else
+            uc_ok.insert(uc);
+    };
+
+    QList<QByteArray> list;
+    foreach(Map m, b5_to_uc_map) {
+        if (!uc_ok.contains(m.b5))
+            list += QByteArray("    { 0x" + QByteArray::number(m.b5, 16) + ", 0x" + QByteArray::number(m.uc, 16) + " }\n");;
+    }
+    QByteArray ba;
+    qSort(list);
+    foreach(QByteArray a, list)
+        ba += a;
+    qDebug() << "struct B5Map b5_to_uc_map = {\n" << ba + "\n};";
+
+    list = QList<QByteArray>();
+    foreach(Map m, uc_to_b5_map)
+        if (!b5_ok.contains(m.uc))
+            list += QByteArray("    { 0x" + QByteArray::number(m.uc, 16) + ", 0x" + QByteArray::number(m.b5, 16) + " }\n");;
+    ba = QByteArray();
+    qSort(list);
+    foreach(QByteArray a, list)
+        ba += a;
+    qDebug() << "struct B5Map uc_to_b5_map = {\n" << ba + "\n};";
+}