util/unicode/main.cpp
changeset 30 5dc02b23752f
parent 18 2f34d5167611
equal deleted inserted replaced
29:b72c6db6890b 30:5dc02b23752f
    36 **
    36 **
    37 **
    37 **
    38 ** $QT_END_LICENSE$
    38 ** $QT_END_LICENSE$
    39 **
    39 **
    40 ****************************************************************************/
    40 ****************************************************************************/
       
    41 
    41 #include <qlist.h>
    42 #include <qlist.h>
    42 #include <qhash.h>
    43 #include <qhash.h>
    43 #include <qfile.h>
    44 #include <qfile.h>
       
    45 #include <qbytearray.h>
    44 #include <qstring.h>
    46 #include <qstring.h>
    45 #include <qchar.h>
    47 #include <qchar.h>
    46 #include <private/qunicodetables_p.h>
       
    47 #include <qvector.h>
    48 #include <qvector.h>
    48 #include <qdebug.h>
    49 #include <qdebug.h>
    49 
    50 #if 0
    50 
    51 #include <private/qunicodetables_p.h>
    51 static struct AgeMap {
    52 #endif
    52     const char *age;
    53 
    53     const QChar::UnicodeVersion version;
    54 #define DATA_VERSION_S "5.0"
    54 } ageMap [] = {
    55 #define DATA_VERSION_STR "QChar::Unicode_5_0"
    55     { "1.1", QChar::Unicode_1_1 },
    56 
    56     { "2.0", QChar::Unicode_2_0 },
    57 #define LAST_CODEPOINT 0x10ffff
    57     { "2.1", QChar::Unicode_2_1_2 },
    58 #define LAST_CODEPOINT_STR "0x10ffff"
    58     { "3.0", QChar::Unicode_3_0 },
    59 
    59     { "3.1", QChar::Unicode_3_1 },
    60 
    60     { "3.2", QChar::Unicode_3_2 },
    61 static QHash<QByteArray, QChar::UnicodeVersion> age_map;
    61     { "4.0", QChar::Unicode_4_0 },
    62 
    62     { "4.1", QChar::Unicode_4_1 },
    63 static void initAgeMap()
    63     { "5.0", QChar::Unicode_5_0 },
    64 {
    64     { 0, QChar::Unicode_Unassigned }
    65     struct AgeMap {
    65 };
    66         const QChar::UnicodeVersion version;
    66 #define CURRENT_UNICODE_VERSION "QChar::Unicode_5_0"
    67         const char *age;
       
    68     } ageMap[] = {
       
    69         { QChar::Unicode_1_1,   "1.1" },
       
    70         { QChar::Unicode_2_0,   "2.0" },
       
    71         { QChar::Unicode_2_1_2, "2.1" },
       
    72         { QChar::Unicode_3_0,   "3.0" },
       
    73         { QChar::Unicode_3_1,   "3.1" },
       
    74         { QChar::Unicode_3_2,   "3.2" },
       
    75         { QChar::Unicode_4_0,   "4.0" },
       
    76         { QChar::Unicode_4_1,   "4.1" },
       
    77         { QChar::Unicode_5_0,   "5.0" },
       
    78         { QChar::Unicode_Unassigned, 0 }
       
    79     };
       
    80     AgeMap *d = ageMap;
       
    81     while (d->age) {
       
    82         age_map.insert(d->age, d->version);
       
    83         ++d;
       
    84     }
       
    85 }
       
    86 
    67 
    87 
    68 static const char *grapheme_break_string =
    88 static const char *grapheme_break_string =
    69     "    enum GraphemeBreak {\n"
    89     "    enum GraphemeBreak {\n"
    70     "        GraphemeBreakOther, \n"
    90     "        GraphemeBreakOther,\n"
    71     "        GraphemeBreakCR,\n"
    91     "        GraphemeBreakCR,\n"
    72     "        GraphemeBreakLF,\n"
    92     "        GraphemeBreakLF,\n"
    73     "        GraphemeBreakControl,\n"
    93     "        GraphemeBreakControl,\n"
    74     "        GraphemeBreakExtend,\n"
    94     "        GraphemeBreakExtend,\n"
    75     "        GraphemeBreakL,\n"
    95     "        GraphemeBreakL,\n"
    88     GraphemeBreakL,
   108     GraphemeBreakL,
    89     GraphemeBreakV,
   109     GraphemeBreakV,
    90     GraphemeBreakT,
   110     GraphemeBreakT,
    91     GraphemeBreakLV,
   111     GraphemeBreakLV,
    92     GraphemeBreakLVT
   112     GraphemeBreakLVT
       
   113 
       
   114     , GraphemeBreak_Unassigned
    93 };
   115 };
    94 
   116 
    95 QHash<QByteArray, GraphemeBreak> grapheme_break_map;
   117 static QHash<QByteArray, GraphemeBreak> grapheme_break_map;
    96 
   118 
    97 static void initGraphemeBreak()
   119 static void initGraphemeBreak()
    98 {
   120 {
    99     struct GraphemeBreakList {
   121     struct GraphemeBreakList {
   100         GraphemeBreak brk;
   122         GraphemeBreak brk;
   108         { GraphemeBreakL, "L" },
   130         { GraphemeBreakL, "L" },
   109         { GraphemeBreakV, "V" },
   131         { GraphemeBreakV, "V" },
   110         { GraphemeBreakT, "T" },
   132         { GraphemeBreakT, "T" },
   111         { GraphemeBreakLV, "LV" },
   133         { GraphemeBreakLV, "LV" },
   112         { GraphemeBreakLVT, "LVT" },
   134         { GraphemeBreakLVT, "LVT" },
   113         { GraphemeBreakOther, 0 }
   135         { GraphemeBreak_Unassigned, 0 }
   114     };
   136     };
   115     GraphemeBreakList *d = breaks;
   137     GraphemeBreakList *d = breaks;
   116     while (d->name) {
   138     while (d->name) {
   117         grapheme_break_map.insert(d->name, d->brk);
   139         grapheme_break_map.insert(d->name, d->brk);
   118         ++d;
   140         ++d;
   119     }
   141     }
   120 }
   142 }
   121 
   143 
   122 const char *word_break_string =
   144 
       
   145 static const char *word_break_string =
   123     "    enum WordBreak {\n"
   146     "    enum WordBreak {\n"
   124     "        WordBreakOther,\n"
   147     "        WordBreakOther,\n"
   125     "        WordBreakFormat,\n"
   148     "        WordBreakFormat,\n"
   126     "        WordBreakKatakana,\n"
   149     "        WordBreakKatakana,\n"
   127     "        WordBreakALetter,\n"
   150     "        WordBreakALetter,\n"
   138     WordBreakALetter,
   161     WordBreakALetter,
   139     WordBreakMidLetter,
   162     WordBreakMidLetter,
   140     WordBreakMidNum,
   163     WordBreakMidNum,
   141     WordBreakNumeric,
   164     WordBreakNumeric,
   142     WordBreakExtendNumLet
   165     WordBreakExtendNumLet
       
   166 
       
   167     , WordBreak_Unassigned
   143 };
   168 };
   144 
   169 
   145 
   170 static QHash<QByteArray, WordBreak> word_break_map;
   146 QHash<QByteArray, WordBreak> word_break_map;
       
   147 
   171 
   148 static void initWordBreak()
   172 static void initWordBreak()
   149 {
   173 {
   150     struct WordBreakList {
   174     struct WordBreakList {
   151         WordBreak brk;
   175         WordBreak brk;
   157         { WordBreakALetter, "ALetter" },
   181         { WordBreakALetter, "ALetter" },
   158         { WordBreakMidLetter, "MidLetter" },
   182         { WordBreakMidLetter, "MidLetter" },
   159         { WordBreakMidNum, "MidNum" },
   183         { WordBreakMidNum, "MidNum" },
   160         { WordBreakNumeric, "Numeric" },
   184         { WordBreakNumeric, "Numeric" },
   161         { WordBreakExtendNumLet, "ExtendNumLet" },
   185         { WordBreakExtendNumLet, "ExtendNumLet" },
   162         { WordBreakFormat,  0 }
   186         { WordBreak_Unassigned, 0 }
   163     };
   187     };
   164     WordBreakList *d = breaks;
   188     WordBreakList *d = breaks;
   165     while (d->name) {
   189     while (d->name) {
   166         word_break_map.insert(d->name, d->brk);
   190         word_break_map.insert(d->name, d->brk);
   167         ++d;
   191         ++d;
   194     SentenceBreakOLetter,
   218     SentenceBreakOLetter,
   195     SentenceBreakNumeric,
   219     SentenceBreakNumeric,
   196     SentenceBreakATerm,
   220     SentenceBreakATerm,
   197     SentenceBreakSTerm,
   221     SentenceBreakSTerm,
   198     SentenceBreakClose
   222     SentenceBreakClose
       
   223 
       
   224     , SentenceBreak_Unassigned
   199 };
   225 };
   200 
   226 
   201 
   227 static QHash<QByteArray, SentenceBreak> sentence_break_map;
   202 QHash<QByteArray, SentenceBreak> sentence_break_map;
       
   203 
   228 
   204 static void initSentenceBreak()
   229 static void initSentenceBreak()
   205 {
   230 {
   206     struct SentenceBreakList {
   231     struct SentenceBreakList {
   207         SentenceBreak brk;
   232         SentenceBreak brk;
   216         { SentenceBreakOLetter, "OLetter" },
   241         { SentenceBreakOLetter, "OLetter" },
   217         { SentenceBreakNumeric, "Numeric" },
   242         { SentenceBreakNumeric, "Numeric" },
   218         { SentenceBreakATerm, "ATerm" },
   243         { SentenceBreakATerm, "ATerm" },
   219         { SentenceBreakSTerm, "STerm" },
   244         { SentenceBreakSTerm, "STerm" },
   220         { SentenceBreakClose, "Close" },
   245         { SentenceBreakClose, "Close" },
   221         { SentenceBreakOther,  0 }
   246         { SentenceBreak_Unassigned, 0 }
   222     };
   247     };
   223     SentenceBreakList *d = breaks;
   248     SentenceBreakList *d = breaks;
   224     while (d->name) {
   249     while (d->name) {
   225         sentence_break_map.insert(d->name, d->brk);
   250         sentence_break_map.insert(d->name, d->brk);
   226         ++d;
   251         ++d;
   227     }
   252     }
   228 }
   253 }
   229 
   254 
   230 
   255 
   231 // Keep this one in sync with the code in createPropertyInfo
   256 static const char *lineBreakClass =
   232 const char *property_string =
       
   233     "    struct Properties {\n"
       
   234     "        ushort category : 8;\n"
       
   235     "        ushort line_break_class : 8;\n"
       
   236     "        ushort direction : 8;\n"
       
   237     "        ushort combiningClass :8;\n"
       
   238     "        ushort joining : 2;\n"
       
   239     "        signed short digitValue : 6; /* 5 needed */\n"
       
   240     "        ushort unicodeVersion : 4;\n"
       
   241     "        ushort lowerCaseSpecial : 1;\n"
       
   242     "        ushort upperCaseSpecial : 1;\n"
       
   243     "        ushort titleCaseSpecial : 1;\n"
       
   244     "        ushort caseFoldSpecial : 1; /* currently unused */\n"
       
   245     "        signed short mirrorDiff : 16;\n"
       
   246     "        signed short lowerCaseDiff : 16;\n"
       
   247     "        signed short upperCaseDiff : 16;\n"
       
   248     "        signed short titleCaseDiff : 16;\n"
       
   249     "        signed short caseFoldDiff : 16;\n"
       
   250     "        ushort graphemeBreak : 8;\n"
       
   251     "        ushort wordBreak : 8;\n"
       
   252     "        ushort sentenceBreak : 8;\n"
       
   253     "    };\n"
       
   254     "    Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);\n"
       
   255     "    Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);\n";
       
   256 
       
   257 const char *lineBreakClass =
       
   258     "    // see http://www.unicode.org/reports/tr14/tr14-19.html\n"
   257     "    // see http://www.unicode.org/reports/tr14/tr14-19.html\n"
   259     "    // we don't use the XX, AI and CB properties and map them to AL instead.\n"
   258     "    // we don't use the XX, AI and CB properties and map them to AL instead.\n"
   260     "    // as we don't support any EBDIC based OS'es, NL is ignored and mapped to AL as well.\n"
   259     "    // as we don't support any EBDIC based OS'es, NL is ignored and mapped to AL as well.\n"
   261     "    enum LineBreakClass {\n"
   260     "    enum LineBreakClass {\n"
   262     "        LineBreak_OP, LineBreak_CL, LineBreak_QU, LineBreak_GL, LineBreak_NS,\n"
   261     "        LineBreak_OP, LineBreak_CL, LineBreak_QU, LineBreak_GL, LineBreak_NS,\n"
   266     "        LineBreak_WJ, LineBreak_H2, LineBreak_H3, LineBreak_JL, LineBreak_JV,\n"
   265     "        LineBreak_WJ, LineBreak_H2, LineBreak_H3, LineBreak_JL, LineBreak_JV,\n"
   267     "        LineBreak_JT, LineBreak_SA, LineBreak_SG,\n"
   266     "        LineBreak_JT, LineBreak_SA, LineBreak_SG,\n"
   268     "        LineBreak_SP, LineBreak_CR, LineBreak_LF, LineBreak_BK\n"
   267     "        LineBreak_SP, LineBreak_CR, LineBreak_LF, LineBreak_BK\n"
   269     "    };\n\n";
   268     "    };\n\n";
   270 
   269 
   271 const char *methods =
   270 enum LineBreakClass {
       
   271     LineBreak_OP, LineBreak_CL, LineBreak_QU, LineBreak_GL, LineBreak_NS,
       
   272     LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR, LineBreak_PO,
       
   273     LineBreak_NU, LineBreak_AL, LineBreak_ID, LineBreak_IN, LineBreak_HY,
       
   274     LineBreak_BA, LineBreak_BB, LineBreak_B2, LineBreak_ZW, LineBreak_CM,
       
   275     LineBreak_WJ, LineBreak_H2, LineBreak_H3, LineBreak_JL, LineBreak_JV,
       
   276     LineBreak_JT, LineBreak_SA, LineBreak_SG,
       
   277     LineBreak_SP, LineBreak_CR, LineBreak_LF, LineBreak_BK
       
   278 
       
   279     , LineBreak_Unassigned
       
   280 };
       
   281 
       
   282 static QHash<QByteArray, LineBreakClass> line_break_map;
       
   283 
       
   284 static void initLineBreak()
       
   285 {
       
   286     // ### Classes XX and AI are left out and mapped to AL for now;
       
   287     // ### Class NL is ignored and mapped to AL as well.
       
   288     struct LineBreakList {
       
   289         LineBreakClass brk;
       
   290         const char *name;
       
   291     } breaks[] = {
       
   292         { LineBreak_BK, "BK" },
       
   293         { LineBreak_CR, "CR" },
       
   294         { LineBreak_LF, "LF" },
       
   295         { LineBreak_CM, "CM" },
       
   296         { LineBreak_AL, "NL" },
       
   297         { LineBreak_SG, "SG" },
       
   298         { LineBreak_WJ, "WJ" },
       
   299         { LineBreak_ZW, "ZW" },
       
   300         { LineBreak_GL, "GL" },
       
   301         { LineBreak_SP, "SP" },
       
   302         { LineBreak_B2, "B2" },
       
   303         { LineBreak_BA, "BA" },
       
   304         { LineBreak_BB, "BB" },
       
   305         { LineBreak_HY, "HY" },
       
   306         { LineBreak_AL, "CB" }, // ###
       
   307         { LineBreak_CL, "CL" },
       
   308         { LineBreak_EX, "EX" },
       
   309         { LineBreak_IN, "IN" },
       
   310         { LineBreak_NS, "NS" },
       
   311         { LineBreak_OP, "OP" },
       
   312         { LineBreak_QU, "QU" },
       
   313         { LineBreak_IS, "IS" },
       
   314         { LineBreak_NU, "NU" },
       
   315         { LineBreak_PO, "PO" },
       
   316         { LineBreak_PR, "PR" },
       
   317         { LineBreak_SY, "SY" },
       
   318         { LineBreak_AL, "AI" },
       
   319         { LineBreak_AL, "AL" },
       
   320         { LineBreak_H2, "H2" },
       
   321         { LineBreak_H3, "H3" },
       
   322         { LineBreak_ID, "ID" },
       
   323         { LineBreak_JL, "JL" },
       
   324         { LineBreak_JV, "JV" },
       
   325         { LineBreak_JT, "JT" },
       
   326         { LineBreak_SA, "SA" },
       
   327         { LineBreak_AL, "XX" },
       
   328         { LineBreak_Unassigned, 0 }
       
   329     };
       
   330     LineBreakList *d = breaks;
       
   331     while (d->name) {
       
   332         line_break_map.insert(d->name, d->brk);
       
   333         ++d;
       
   334     }
       
   335 }
       
   336 
       
   337 
       
   338 // Keep this one in sync with the code in createPropertyInfo
       
   339 static const char *property_string =
       
   340     "    struct Properties {\n"
       
   341     "        ushort category         : 8; /* 5 needed */\n"
       
   342     "        ushort line_break_class : 8; /* 6 needed */\n"
       
   343     "        ushort direction        : 8; /* 5 needed */\n"
       
   344     "        ushort combiningClass   : 8;\n"
       
   345     "        ushort joining          : 2;\n"
       
   346     "        signed short digitValue : 6; /* 5 needed */\n"
       
   347     "        ushort unicodeVersion   : 4;\n"
       
   348     "        ushort lowerCaseSpecial : 1;\n"
       
   349     "        ushort upperCaseSpecial : 1;\n"
       
   350     "        ushort titleCaseSpecial : 1;\n"
       
   351     "        ushort caseFoldSpecial  : 1; /* currently unused */\n"
       
   352     "        signed short mirrorDiff    : 16;\n"
       
   353     "        signed short lowerCaseDiff : 16;\n"
       
   354     "        signed short upperCaseDiff : 16;\n"
       
   355     "        signed short titleCaseDiff : 16;\n"
       
   356     "        signed short caseFoldDiff  : 16;\n"
       
   357     "        ushort graphemeBreak    : 8; /* 4 needed */\n"
       
   358     "        ushort wordBreak        : 8; /* 4 needed */\n"
       
   359     "        ushort sentenceBreak    : 8; /* 4 needed */\n"
       
   360     "    };\n"
       
   361     "    Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);\n"
       
   362     "    Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);\n";
       
   363 
       
   364 static const char *methods =
   272     "    Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);\n"
   365     "    Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);\n"
   273     "    inline int lineBreakClass(const QChar &ch) {\n"
   366     "    inline int lineBreakClass(const QChar &ch)\n"
   274     "        return QUnicodeTables::lineBreakClass(ch.unicode());\n"
   367     "    { return lineBreakClass(ch.unicode()); }\n"
   275     "    }\n"
       
   276     "\n"
   368     "\n"
   277     "    Q_CORE_EXPORT int QT_FASTCALL script(uint ucs4);\n"
   369     "    Q_CORE_EXPORT int QT_FASTCALL script(uint ucs4);\n"
   278     "    Q_CORE_EXPORT_INLINE int QT_FASTCALL script(const QChar &ch) {\n"
   370     "    inline int script(const QChar &ch)\n"
   279     "        return script(ch.unicode());\n"
   371     "    { return script(ch.unicode()); }\n\n";
   280     "    }\n\n";
       
   281 
   372 
   282 
   373 
   283 struct PropertyFlags {
   374 struct PropertyFlags {
   284     bool operator ==(const PropertyFlags &o) {
   375     bool operator ==(const PropertyFlags &o) {
   285         return (combiningClass == o.combiningClass
   376         return (combiningClass == o.combiningClass
   310     // from ArabicShaping.txt
   401     // from ArabicShaping.txt
   311     QChar::Joining joining : 2;
   402     QChar::Joining joining : 2;
   312     // from DerivedAge.txt
   403     // from DerivedAge.txt
   313     QChar::UnicodeVersion age : 4;
   404     QChar::UnicodeVersion age : 4;
   314     int digitValue;
   405     int digitValue;
   315     uint line_break_class : 5;
   406     uint line_break_class : 6;
   316 
   407 
   317     int mirrorDiff : 16;
   408     int mirrorDiff : 16;
   318 
   409 
   319     int lowerCaseDiff;
   410     int lowerCaseDiff;
   320     int upperCaseDiff;
   411     int upperCaseDiff;
   327     GraphemeBreak graphemeBreak;
   418     GraphemeBreak graphemeBreak;
   328     WordBreak wordBreak;
   419     WordBreak wordBreak;
   329     SentenceBreak sentenceBreak;
   420     SentenceBreak sentenceBreak;
   330 };
   421 };
   331 
   422 
   332 QList<int> specialCaseMap;
   423 
   333 int specialCaseMaxLen = 0;
   424 static QList<int> specialCaseMap;
       
   425 static int specialCaseMaxLen = 0;
   334 
   426 
   335 static int appendToSpecialCaseMap(const QList<int> &map)
   427 static int appendToSpecialCaseMap(const QList<int> &map)
   336 {
   428 {
   337     QList<int> utf16map;
   429     QList<int> utf16map;
   338     for (int i = 0; i < map.size(); ++i) {
   430     for (int i = 0; i < map.size(); ++i) {
   345         }
   437         }
   346     }
   438     }
   347     specialCaseMaxLen = qMax(specialCaseMaxLen, utf16map.size());
   439     specialCaseMaxLen = qMax(specialCaseMaxLen, utf16map.size());
   348     utf16map << 0;
   440     utf16map << 0;
   349 
   441 
   350     for (int i = 0; i < specialCaseMap.size() - utf16map.size() - 1; ++i) {
   442     for (int i = 0; i < specialCaseMap.size() - utf16map.size() + 1; ++i) {
   351         int j;
   443         int j;
   352         for (j = 0; j < utf16map.size(); ++j) {
   444         for (j = 0; j < utf16map.size(); ++j) {
   353             if (specialCaseMap.at(i+j) != utf16map.at(j))
   445             if (specialCaseMap.at(i+j) != utf16map.at(j))
   354                 break;
   446                 break;
   355         }
   447         }
   362     return pos;
   454     return pos;
   363 }
   455 }
   364 
   456 
   365 struct UnicodeData {
   457 struct UnicodeData {
   366     UnicodeData(int codepoint = 0) {
   458     UnicodeData(int codepoint = 0) {
   367         p.category = QChar::NoCategory;
   459         p.category = QChar::Other_NotAssigned; // Cn
   368         p.combiningClass = 0;
   460         p.combiningClass = 0;
   369 
   461 
   370         p.direction = QChar::DirL;
   462         p.direction = QChar::DirL;
   371         // DirR for:  U+0590..U+05FF, U+07C0..U+08FF, U+FB1D..U+FB4F, U+10800..U+10FFF
   463         // DirR for:  U+0590..U+05FF, U+07C0..U+08FF, U+FB1D..U+FB4F, U+10800..U+10FFF
   372         if ((codepoint >= 0x590 && codepoint <= 0x5ff)
   464         if ((codepoint >= 0x590 && codepoint <= 0x5ff)
   385         decompositionType = QChar::NoDecomposition;
   477         decompositionType = QChar::NoDecomposition;
   386         p.joining = QChar::OtherJoining;
   478         p.joining = QChar::OtherJoining;
   387         p.age = QChar::Unicode_Unassigned;
   479         p.age = QChar::Unicode_Unassigned;
   388         p.mirrorDiff = 0;
   480         p.mirrorDiff = 0;
   389         p.digitValue = -1;
   481         p.digitValue = -1;
   390         p.line_break_class = QUnicodeTables::LineBreak_AL;
   482         p.line_break_class = LineBreak_AL; // XX -> AL
   391         p.lowerCaseDiff = 0;
   483         p.lowerCaseDiff = 0;
   392         p.upperCaseDiff = 0;
   484         p.upperCaseDiff = 0;
   393         p.titleCaseDiff = 0;
   485         p.titleCaseDiff = 0;
   394         p.caseFoldDiff = 0;
   486         p.caseFoldDiff = 0;
   395         p.lowerCaseSpecial = 0;
   487         p.lowerCaseSpecial = 0;
   436     UD_UpperCase,
   528     UD_UpperCase,
   437     UD_LowerCase,
   529     UD_LowerCase,
   438     UD_TitleCase
   530     UD_TitleCase
   439 };
   531 };
   440 
   532 
   441 QHash<QByteArray, QChar::Category> categoryMap;
   533 
       
   534 static QHash<QByteArray, QChar::Category> categoryMap;
   442 
   535 
   443 static void initCategoryMap()
   536 static void initCategoryMap()
   444 {
   537 {
   445     struct Cat {
   538     struct Cat {
   446         QChar::Category cat;
   539         QChar::Category cat;
   447         const char *name;
   540         const char *name;
   448     } categories [] = {
   541     } categories[] = {
   449         { QChar::Mark_NonSpacing,          "Mn" },
   542         { QChar::Mark_NonSpacing,          "Mn" },
   450         { QChar::Mark_SpacingCombining,    "Mc" },
   543         { QChar::Mark_SpacingCombining,    "Mc" },
   451         { QChar::Mark_Enclosing,           "Me" },
   544         { QChar::Mark_Enclosing,           "Me" },
   452 
   545 
   453         { QChar::Number_DecimalDigit,      "Nd" },
   546         { QChar::Number_DecimalDigit,      "Nd" },
   483         { QChar::Symbol_Modifier,          "Sk" },
   576         { QChar::Symbol_Modifier,          "Sk" },
   484         { QChar::Symbol_Other,             "So" },
   577         { QChar::Symbol_Other,             "So" },
   485         { QChar::NoCategory, 0 }
   578         { QChar::NoCategory, 0 }
   486     };
   579     };
   487     Cat *c = categories;
   580     Cat *c = categories;
   488     while (c->cat != QChar::NoCategory) {
   581     while (c->name) {
   489         categoryMap.insert(c->name, c->cat);
   582         categoryMap.insert(c->name, c->cat);
   490         ++c;
   583         ++c;
   491     }
   584     }
   492 }
   585 }
   493 
   586 
   494 QHash<QByteArray, QChar::Direction> directionMap;
   587 
       
   588 static QHash<QByteArray, QChar::Direction> directionMap;
   495 
   589 
   496 static void initDirectionMap()
   590 static void initDirectionMap()
   497 {
   591 {
   498     struct Dir {
   592     struct Dir {
   499         QChar::Direction dir;
   593         QChar::Direction dir;
   526         ++d;
   620         ++d;
   527     }
   621     }
   528 }
   622 }
   529 
   623 
   530 
   624 
   531 QHash<QByteArray, QChar::Decomposition> decompositionMap;
   625 static QHash<QByteArray, QChar::Decomposition> decompositionMap;
   532 
   626 
   533 static void initDecompositionMap()
   627 static void initDecompositionMap()
   534 {
   628 {
   535     struct Dec {
   629     struct Dec {
   536         QChar::Decomposition dec;
   630         QChar::Decomposition dec;
   551         { QChar::Narrow, "<narrow>" },
   645         { QChar::Narrow, "<narrow>" },
   552         { QChar::Small, "<small>" },
   646         { QChar::Small, "<small>" },
   553         { QChar::Square, "<square>" },
   647         { QChar::Square, "<square>" },
   554         { QChar::Compat, "<compat>" },
   648         { QChar::Compat, "<compat>" },
   555         { QChar::Fraction, "<fraction>" },
   649         { QChar::Fraction, "<fraction>" },
   556         { QChar::NoDecomposition,  0 }
   650         { QChar::NoDecomposition, 0 }
   557     };
   651     };
   558     Dec *d = decompositions;
   652     Dec *d = decompositions;
   559     while (d->name) {
   653     while (d->name) {
   560         decompositionMap.insert(d->name, d->dec);
   654         decompositionMap.insert(d->name, d->dec);
   561         ++d;
   655         ++d;
   562     }
   656     }
   563 }
   657 }
   564 
   658 
   565 
   659 
   566 QHash<int, UnicodeData> unicodeData;
   660 static QHash<int, UnicodeData> unicodeData;
   567 QList<PropertyFlags> uniqueProperties;
   661 static QList<PropertyFlags> uniqueProperties;
   568 
   662 
   569 
   663 
   570 QHash<int, int> decompositionLength;
   664 static QHash<int, int> decompositionLength;
   571 int highestComposedCharacter = 0;
   665 static int highestComposedCharacter = 0;
   572 int numLigatures = 0;
   666 static int numLigatures = 0;
   573 int highestLigature = 0;
   667 static int highestLigature = 0;
   574 
   668 
   575 struct Ligature {ushort u1; ushort u2; ushort ligature;};
   669 struct Ligature {
       
   670     ushort u1;
       
   671     ushort u2;
       
   672     ushort ligature;
       
   673 };
   576 // we need them sorted after the first component for fast lookup
   674 // we need them sorted after the first component for fast lookup
   577 bool operator < (const Ligature &l1, const Ligature &l2) {
   675 bool operator < (const Ligature &l1, const Ligature &l2)
   578     return l1.u1 < l2.u1;
   676 { return l1.u1 < l2.u1; }
   579 }
   677 
   580 
   678 static QHash<ushort, QList<Ligature> > ligatureHashes;
   581 QHash<ushort, QList<Ligature> > ligatureHashes;
   679 
   582 
   680 static QHash<int, int> combiningClassUsage;
   583 QHash<int, int> combiningClassUsage;
   681 
   584 
   682 static int maxLowerCaseDiff = 0;
   585 int maxLowerCaseDiff = 0;
   683 static int maxUpperCaseDiff = 0;
   586 int maxUpperCaseDiff = 0;
   684 static int maxTitleCaseDiff = 0;
   587 int maxTitleCaseDiff = 0;
       
   588 
   685 
   589 static void readUnicodeData()
   686 static void readUnicodeData()
   590 {
   687 {
   591     QFile f("data/UnicodeData.txt");
   688     QFile f("data/UnicodeData.txt");
   592     if (!f.exists())
   689     if (!f.exists())
   607             continue;
   704             continue;
   608 
   705 
   609         QList<QByteArray> properties = line.split(';');
   706         QList<QByteArray> properties = line.split(';');
   610         bool ok;
   707         bool ok;
   611         int codepoint = properties[UD_Value].toInt(&ok, 16);
   708         int codepoint = properties[UD_Value].toInt(&ok, 16);
       
   709         Q_ASSERT(ok);
       
   710         Q_ASSERT(codepoint <= LAST_CODEPOINT);
   612         int lastCodepoint = codepoint;
   711         int lastCodepoint = codepoint;
   613 
   712 
   614         QByteArray name = properties[UD_Name];
   713         QByteArray name = properties[UD_Name];
   615         if (name.startsWith('<') && name.contains("First")) {
   714         if (name.startsWith('<') && name.contains("First")) {
   616             QByteArray nextLine;
   715             QByteArray nextLine;
   617             nextLine.resize(1024);
   716             nextLine.resize(1024);
   618             f.readLine(nextLine.data(), 1024);
   717             f.readLine(nextLine.data(), 1024);
   619             QList<QByteArray> properties = nextLine.split(';');
   718             QList<QByteArray> properties = nextLine.split(';');
       
   719             Q_ASSERT(properties[UD_Name].startsWith('<') && properties[UD_Name].contains("Last"));
   620             lastCodepoint = properties[UD_Value].toInt(&ok, 16);
   720             lastCodepoint = properties[UD_Value].toInt(&ok, 16);
       
   721             Q_ASSERT(ok);
       
   722             Q_ASSERT(lastCodepoint <= LAST_CODEPOINT);
   621         }
   723         }
   622 
   724 
   623         UnicodeData data(codepoint);
   725         UnicodeData data(codepoint);
   624         data.p.category = categoryMap.value(properties[UD_Category], QChar::NoCategory);
   726         data.p.category = categoryMap.value(properties[UD_Category], QChar::NoCategory);
       
   727         if (data.p.category == QChar::NoCategory)
       
   728             qFatal("unassigned char category: %s", properties[UD_Category].constData());
   625         data.p.combiningClass = properties[UD_CombiningClass].toInt();
   729         data.p.combiningClass = properties[UD_CombiningClass].toInt();
   626 
   730 
   627         if (!combiningClassUsage.contains(data.p.combiningClass))
   731         if (!combiningClassUsage.contains(data.p.combiningClass))
   628             combiningClassUsage[data.p.combiningClass] = 1;
   732             combiningClassUsage[data.p.combiningClass] = 1;
   629         else
   733         else
   632         data.p.direction = directionMap.value(properties[UD_BidiCategory], data.p.direction);
   736         data.p.direction = directionMap.value(properties[UD_BidiCategory], data.p.direction);
   633 
   737 
   634         if (!properties[UD_UpperCase].isEmpty()) {
   738         if (!properties[UD_UpperCase].isEmpty()) {
   635             int upperCase = properties[UD_UpperCase].toInt(&ok, 16);
   739             int upperCase = properties[UD_UpperCase].toInt(&ok, 16);
   636             Q_ASSERT(ok);
   740             Q_ASSERT(ok);
       
   741             if (qAbs(upperCase - codepoint) >= (1<<14))
       
   742                 qWarning() << "upperCaseDiff exceeded (" << hex << codepoint << "->" << upperCase << ")";
   637             data.p.upperCaseDiff = upperCase - codepoint;
   743             data.p.upperCaseDiff = upperCase - codepoint;
   638             maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(data.p.upperCaseDiff));
   744             maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(data.p.upperCaseDiff));
   639             if (codepoint > 0xffff) {
   745             if (codepoint > 0xffff) {
   640                 // if the condition below doesn't hold anymore we need to modify our case folding code
   746                 // if the condition below doesn't hold anymore we need to modify our case folding code
   641                 //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
   747                 //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
   642                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(upperCase));
   748                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(upperCase));
   643             }
   749             }
   644         }
   750         }
   645         if (!properties[UD_LowerCase].isEmpty()) {
   751         if (!properties[UD_LowerCase].isEmpty()) {
   646             int lowerCase = properties[UD_LowerCase].toInt(&ok, 16);
   752             int lowerCase = properties[UD_LowerCase].toInt(&ok, 16);
   647             Q_ASSERT (ok);
   753             Q_ASSERT(ok);
       
   754             if (qAbs(lowerCase - codepoint) >= (1<<14))
       
   755                 qWarning() << "lowerCaseDiff exceeded (" << hex << codepoint << "->" << lowerCase << ")";
   648             data.p.lowerCaseDiff = lowerCase - codepoint;
   756             data.p.lowerCaseDiff = lowerCase - codepoint;
   649             maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(data.p.lowerCaseDiff));
   757             maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(data.p.lowerCaseDiff));
   650             if (codepoint > 0xffff) {
   758             if (codepoint > 0xffff) {
   651                 // if the condition below doesn't hold anymore we need to modify our case folding code
   759                 // if the condition below doesn't hold anymore we need to modify our case folding code
   652                 //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
   760                 //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
   656         // we want toTitleCase to map to ToUpper in case we don't have any titlecase.
   764         // we want toTitleCase to map to ToUpper in case we don't have any titlecase.
   657         if (properties[UD_TitleCase].isEmpty())
   765         if (properties[UD_TitleCase].isEmpty())
   658             properties[UD_TitleCase] = properties[UD_UpperCase];
   766             properties[UD_TitleCase] = properties[UD_UpperCase];
   659         if (!properties[UD_TitleCase].isEmpty()) {
   767         if (!properties[UD_TitleCase].isEmpty()) {
   660             int titleCase = properties[UD_TitleCase].toInt(&ok, 16);
   768             int titleCase = properties[UD_TitleCase].toInt(&ok, 16);
   661             Q_ASSERT (ok);
   769             Q_ASSERT(ok);
       
   770             if (qAbs(titleCase - codepoint) >= (1<<14))
       
   771                 qWarning() << "titleCaseDiff exceeded (" << hex << codepoint << "->" << titleCase << ")";
   662             data.p.titleCaseDiff = titleCase - codepoint;
   772             data.p.titleCaseDiff = titleCase - codepoint;
   663             maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(data.p.titleCaseDiff));
   773             maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(data.p.titleCaseDiff));
   664             if (codepoint > 0xffff) {
   774             if (codepoint > 0xffff) {
   665                 // if the condition below doesn't hold anymore we need to modify our case folding code
   775                 // if the condition below doesn't hold anymore we need to modify our case folding code
   666                 //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
   776                 //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
   675         QByteArray decomposition = properties[UD_Decomposition];
   785         QByteArray decomposition = properties[UD_Decomposition];
   676         if (!decomposition.isEmpty()) {
   786         if (!decomposition.isEmpty()) {
   677             highestComposedCharacter = qMax(highestComposedCharacter, codepoint);
   787             highestComposedCharacter = qMax(highestComposedCharacter, codepoint);
   678             QList<QByteArray> d = decomposition.split(' ');
   788             QList<QByteArray> d = decomposition.split(' ');
   679             if (d[0].contains('<')) {
   789             if (d[0].contains('<')) {
   680                 data.decompositionType = decompositionMap.value(d[0], QChar::Canonical);
   790                 data.decompositionType = decompositionMap.value(d[0], QChar::NoDecomposition);
       
   791                 if (data.decompositionType == QChar::NoDecomposition)
       
   792                     qFatal("unassigned decomposition type: %s", d[0].constData());
   681                 d.takeFirst();
   793                 d.takeFirst();
   682             } else {
   794             } else {
   683                 data.decompositionType = QChar::Canonical;
   795                 data.decompositionType = QChar::Canonical;
   684             }
   796             }
   685             for (int i = 0; i < d.size(); ++i)
   797             for (int i = 0; i < d.size(); ++i) {
   686                 data.decomposition.append(d[i].toInt(&ok, 16));
   798                 data.decomposition.append(d[i].toInt(&ok, 16));
       
   799                 Q_ASSERT(ok);
       
   800             }
   687             if (!decompositionLength.contains(data.decomposition.size()))
   801             if (!decompositionLength.contains(data.decomposition.size()))
   688                 decompositionLength[data.decomposition.size()] = 1;
   802                 decompositionLength[data.decomposition.size()] = 1;
   689             else
   803             else
   690                 ++decompositionLength[data.decomposition.size()];
   804                 ++decompositionLength[data.decomposition.size()];
   691         }
   805         }
   723         QList<QByteArray> pair = line.split(';');
   837         QList<QByteArray> pair = line.split(';');
   724         Q_ASSERT(pair.size() == 2);
   838         Q_ASSERT(pair.size() == 2);
   725 
   839 
   726         bool ok;
   840         bool ok;
   727         int codepoint = pair[0].toInt(&ok, 16);
   841         int codepoint = pair[0].toInt(&ok, 16);
       
   842         Q_ASSERT(ok);
   728         int mirror = pair[1].toInt(&ok, 16);
   843         int mirror = pair[1].toInt(&ok, 16);
       
   844         Q_ASSERT(ok);
   729 
   845 
   730         UnicodeData d = unicodeData.value(codepoint, UnicodeData(codepoint));
   846         UnicodeData d = unicodeData.value(codepoint, UnicodeData(codepoint));
   731         d.mirroredChar = mirror;
   847         d.mirroredChar = mirror;
   732         if (qAbs(codepoint-d.mirroredChar) > maxMirroredDiff)
       
   733             maxMirroredDiff = qAbs(codepoint - d.mirroredChar);
       
   734 
       
   735         d.p.mirrorDiff = d.mirroredChar - codepoint;
   848         d.p.mirrorDiff = d.mirroredChar - codepoint;
       
   849         maxMirroredDiff = qMax(maxMirroredDiff, qAbs(d.p.mirrorDiff));
   736         unicodeData.insert(codepoint, d);
   850         unicodeData.insert(codepoint, d);
   737     }
   851     }
   738 }
   852 }
   739 
   853 
   740 static void readArabicShaping()
   854 static void readArabicShaping()
   762         QList<QByteArray> shaping = line.split(';');
   876         QList<QByteArray> shaping = line.split(';');
   763         Q_ASSERT(shaping.size() == 4);
   877         Q_ASSERT(shaping.size() == 4);
   764 
   878 
   765         bool ok;
   879         bool ok;
   766         int codepoint = shaping[0].toInt(&ok, 16);
   880         int codepoint = shaping[0].toInt(&ok, 16);
       
   881         Q_ASSERT(ok);
       
   882 
   767         QChar::Joining j = QChar::OtherJoining;
   883         QChar::Joining j = QChar::OtherJoining;
   768         QByteArray shape = shaping[2].trimmed();
   884         QByteArray shape = shaping[2].trimmed();
   769         if (shape == "R")
   885         if (shape == "R")
   770             j = QChar::Right;
   886             j = QChar::Right;
   771         else if (shape == "D")
   887         else if (shape == "D")
   808         codes.replace("..", ".");
   924         codes.replace("..", ".");
   809         QList<QByteArray> cl = codes.split('.');
   925         QList<QByteArray> cl = codes.split('.');
   810 
   926 
   811         bool ok;
   927         bool ok;
   812         int from = cl[0].toInt(&ok, 16);
   928         int from = cl[0].toInt(&ok, 16);
       
   929         Q_ASSERT(ok);
   813         int to = from;
   930         int to = from;
   814         if (cl.size() == 2)
   931         if (cl.size() == 2) {
   815             to = cl[1].toInt(&ok, 16);
   932             to = cl[1].toInt(&ok, 16);
   816 
   933             Q_ASSERT(ok);
   817         QChar::UnicodeVersion age = QChar::Unicode_Unassigned;
   934         }
   818         QByteArray ba = l[1];
   935 
   819         AgeMap *map = ageMap;
   936         QChar::UnicodeVersion age = age_map.value(l[1].trimmed(), QChar::Unicode_Unassigned);
   820         while (map->age) {
       
   821             if (ba == map->age) {
       
   822                 age = map->version;
       
   823                 break;
       
   824             }
       
   825             ++map;
       
   826         }
       
   827         //qDebug() << hex << from << ".." << to << ba << age;
   937         //qDebug() << hex << from << ".." << to << ba << age;
   828         Q_ASSERT(age != QChar::Unicode_Unassigned);
   938         if (age == QChar::Unicode_Unassigned)
       
   939             qFatal("unassigned or unhandled age value: %s", l[1].constData());
   829 
   940 
   830         for (int codepoint = from; codepoint <= to; ++codepoint) {
   941         for (int codepoint = from; codepoint <= to; ++codepoint) {
   831             UnicodeData d = unicodeData.value(codepoint, UnicodeData(codepoint));
   942             UnicodeData d = unicodeData.value(codepoint, UnicodeData(codepoint));
   832             d.p.age = age;
   943             d.p.age = age;
   833             unicodeData.insert(codepoint, d);
   944             unicodeData.insert(codepoint, d);
   834         }
   945         }
   835     }
   946     }
   836 }
   947 }
   837 
   948 
   838 
   949 
   839 static void readCompositionExclusion()
   950 static void readDerivedNormalizationProps()
   840 {
   951 {
   841     QFile f("data/CompositionExclusions.txt");
   952     QFile f("data/DerivedNormalizationProps.txt");
   842     if (!f.exists())
   953     if (!f.exists())
   843         qFatal("Couldn't find CompositionExclusions.txt");
   954         qFatal("Couldn't find DerivedNormalizationProps.txt");
   844 
   955 
   845     f.open(QFile::ReadOnly);
   956     f.open(QFile::ReadOnly);
   846 
   957 
   847     while (!f.atEnd()) {
   958     while (!f.atEnd()) {
   848         QByteArray line;
   959         QByteArray line;
   851         line.resize(len-1);
   962         line.resize(len-1);
   852 
   963 
   853         int comment = line.indexOf('#');
   964         int comment = line.indexOf('#');
   854         if (comment >= 0)
   965         if (comment >= 0)
   855             line = line.left(comment);
   966             line = line.left(comment);
   856         line.replace(" ", "");
   967 
   857 
   968         if (line.trimmed().isEmpty())
   858         if (line.isEmpty())
       
   859             continue;
   969             continue;
   860 
   970 
   861         Q_ASSERT(!line.contains(".."));
   971         QList<QByteArray> l = line.split(';');
       
   972         Q_ASSERT(l.size() >= 2);
       
   973 
       
   974         QByteArray propName = l[1].trimmed();
       
   975         if (propName != "Full_Composition_Exclusion")
       
   976             // ###
       
   977             continue;
       
   978 
       
   979         QByteArray codes = l[0].trimmed();
       
   980         codes.replace("..", ".");
       
   981         QList<QByteArray> cl = codes.split('.');
   862 
   982 
   863         bool ok;
   983         bool ok;
   864         int codepoint = line.toInt(&ok, 16);
   984         int from = cl[0].toInt(&ok, 16);
   865 
   985         Q_ASSERT(ok);
       
   986         int to = from;
       
   987         if (cl.size() == 2) {
       
   988             to = cl[1].toInt(&ok, 16);
       
   989             Q_ASSERT(ok);
       
   990         }
       
   991 
       
   992         for (int codepoint = from; codepoint <= to; ++codepoint) {
       
   993             UnicodeData d = unicodeData.value(codepoint, UnicodeData(codepoint));
       
   994             d.excludedComposition = true;
       
   995             unicodeData.insert(codepoint, d);
       
   996         }
       
   997     }
       
   998 
       
   999     for (int codepoint = 0; codepoint <= LAST_CODEPOINT; ++codepoint) {
   866         UnicodeData d = unicodeData.value(codepoint, UnicodeData(codepoint));
  1000         UnicodeData d = unicodeData.value(codepoint, UnicodeData(codepoint));
   867         d.excludedComposition = true;
  1001         if (!d.excludedComposition
   868         unicodeData.insert(codepoint, d);
  1002             && d.decompositionType == QChar::Canonical
   869     }
  1003             && d.decomposition.size() > 1) {
   870 
  1004             Q_ASSERT(d.decomposition.size() == 2);
   871     for (int i = 0; i < 0x110000; ++i) {
  1005 
   872         UnicodeData data = unicodeData.value(i, UnicodeData(i));
  1006             uint part1 = d.decomposition.at(0);
   873         if (!data.excludedComposition
  1007             uint part2 = d.decomposition.at(1);
   874             && data.decompositionType == QChar::Canonical
  1008 
   875             && data.decomposition.size() > 1) {
  1009             // all non-starters are listed in DerivedNormalizationProps.txt
   876             Q_ASSERT(data.decomposition.size() == 2);
  1010             // and already excluded from composition
   877 
  1011             Q_ASSERT(unicodeData.value(part1, UnicodeData(part1)).p.combiningClass == 0);
   878             uint part1 = data.decomposition.at(0);
       
   879             uint part2 = data.decomposition.at(1);
       
   880             UnicodeData first = unicodeData.value(part1, UnicodeData(part1));
       
   881             if (first.p.combiningClass != 0)
       
   882                 continue;
       
   883 
  1012 
   884             ++numLigatures;
  1013             ++numLigatures;
   885             highestLigature = qMax(highestLigature, (int)part1);
  1014             highestLigature = qMax(highestLigature, (int)part1);
   886             Ligature l = {(ushort)part1, (ushort)part2, i};
  1015             Ligature l = {(ushort)part1, (ushort)part2, codepoint};
   887             ligatureHashes[part2].append(l);
  1016             ligatureHashes[part2].append(l);
   888         }
  1017         }
   889     }
  1018     }
   890 }
  1019 }
       
  1020 
   891 
  1021 
   892 struct NormalizationCorrection {
  1022 struct NormalizationCorrection {
   893     uint codepoint;
  1023     uint codepoint;
   894     uint mapped;
  1024     uint mapped;
   895     uint version;
  1025     uint version;
   931         Q_ASSERT(!line.contains(".."));
  1061         Q_ASSERT(!line.contains(".."));
   932 
  1062 
   933         QList<QByteArray> fields = line.split(';');
  1063         QList<QByteArray> fields = line.split(';');
   934         Q_ASSERT(fields.size() == 4);
  1064         Q_ASSERT(fields.size() == 4);
   935 
  1065 
   936         NormalizationCorrection c;
  1066         NormalizationCorrection c = { 0, 0, 0 };
   937         bool ok;
  1067         bool ok;
   938         c.codepoint = fields.at(0).toInt(&ok, 16);
  1068         c.codepoint = fields.at(0).toInt(&ok, 16);
       
  1069         Q_ASSERT(ok);
   939         c.mapped = fields.at(1).toInt(&ok, 16);
  1070         c.mapped = fields.at(1).toInt(&ok, 16);
       
  1071         Q_ASSERT(ok);
   940         if (fields.at(3) == "3.2.0")
  1072         if (fields.at(3) == "3.2.0")
   941             c.version = QChar::Unicode_3_2;
  1073             c.version = QChar::Unicode_3_2;
   942         else if (fields.at(3) == "4.0.0")
  1074         else if (fields.at(3) == "4.0.0")
   943             c.version = QChar::Unicode_4_0;
  1075             c.version = QChar::Unicode_4_0;
   944         else
  1076         else
   951 
  1083 
   952     out += "};\n\n"
  1084     out += "};\n\n"
   953 
  1085 
   954            "enum { NumNormalizationCorrections = " + QByteArray::number(numCorrections) + " };\n\n";
  1086            "enum { NumNormalizationCorrections = " + QByteArray::number(numCorrections) + " };\n\n";
   955 
  1087 
   956 
       
   957     return out;
  1088     return out;
   958 }
  1089 }
   959 
  1090 
   960 
  1091 
   961 static void computeUniqueProperties()
  1092 static void computeUniqueProperties()
   962 {
  1093 {
   963     qDebug("computeUniqueProperties:");
  1094     qDebug("computeUniqueProperties:");
   964     for (int uc = 0; uc < 0x110000; ++uc) {
  1095     for (int uc = 0; uc <= LAST_CODEPOINT; ++uc) {
   965         UnicodeData d = unicodeData.value(uc, UnicodeData(uc));
  1096         UnicodeData d = unicodeData.value(uc, UnicodeData(uc));
   966 
  1097 
   967         int index = uniqueProperties.indexOf(d.p);
  1098         int index = uniqueProperties.indexOf(d.p);
   968         if (index == -1) {
  1099         if (index == -1) {
   969             index = uniqueProperties.size();
  1100             index = uniqueProperties.size();
   970             uniqueProperties.append(d.p);
  1101             uniqueProperties.append(d.p);
   971         }
  1102         }
   972         d.propertyIndex = index;
  1103         d.propertyIndex = index;
   973         unicodeData.insert(uc, d);
  1104         unicodeData.insert(uc, d);
   974     }
  1105     }
   975     qDebug("    %d unicode properties found", uniqueProperties.size());
  1106     qDebug("    %d unique unicode properties found", uniqueProperties.size());
   976 }
  1107 }
   977 
  1108 
   978 
  1109 
   979 static void readLineBreak()
  1110 static void readLineBreak()
   980 {
  1111 {
  1005         codes.replace("..", ".");
  1136         codes.replace("..", ".");
  1006         QList<QByteArray> cl = codes.split('.');
  1137         QList<QByteArray> cl = codes.split('.');
  1007 
  1138 
  1008         bool ok;
  1139         bool ok;
  1009         int from = cl[0].toInt(&ok, 16);
  1140         int from = cl[0].toInt(&ok, 16);
       
  1141         Q_ASSERT(ok);
  1010         int to = from;
  1142         int to = from;
  1011         if (cl.size() == 2)
  1143         if (cl.size() == 2) {
  1012             to = cl[1].toInt(&ok, 16);
  1144             to = cl[1].toInt(&ok, 16);
  1013 
  1145             Q_ASSERT(ok);
  1014         // ### Classes XX and AI are left out and mapped to AL for now
  1146         }
  1015         QUnicodeTables::LineBreakClass lb = QUnicodeTables::LineBreak_AL;
  1147 
  1016         QByteArray ba = l[1];
  1148         LineBreakClass lb = line_break_map.value(l[1].trimmed(), LineBreak_Unassigned);
  1017 
  1149         if (lb == LineBreak_Unassigned)
  1018         if (ba == "AI") lb = QUnicodeTables::LineBreak_AL;
  1150             qFatal("unassigned line break class: %s", l[1].constData());
  1019         else if (ba == "XX") lb = QUnicodeTables::LineBreak_AL;
       
  1020         else if (ba == "NL") lb = QUnicodeTables::LineBreak_AL;
       
  1021         else if (ba == "OP") lb = QUnicodeTables::LineBreak_OP;
       
  1022         else if (ba == "CL") lb = QUnicodeTables::LineBreak_CL;
       
  1023         else if (ba == "QU") lb = QUnicodeTables::LineBreak_QU;
       
  1024         else if (ba == "GL") lb = QUnicodeTables::LineBreak_GL;
       
  1025         else if (ba == "NS") lb = QUnicodeTables::LineBreak_NS;
       
  1026         else if (ba == "EX") lb = QUnicodeTables::LineBreak_EX;
       
  1027         else if (ba == "SY") lb = QUnicodeTables::LineBreak_SY;
       
  1028         else if (ba == "IS") lb = QUnicodeTables::LineBreak_IS;
       
  1029         else if (ba == "PR") lb = QUnicodeTables::LineBreak_PR;
       
  1030         else if (ba == "PO") lb = QUnicodeTables::LineBreak_PO;
       
  1031         else if (ba == "NU") lb = QUnicodeTables::LineBreak_NU;
       
  1032         else if (ba == "AL") lb = QUnicodeTables::LineBreak_AL;
       
  1033         else if (ba == "ID") lb = QUnicodeTables::LineBreak_ID;
       
  1034         else if (ba == "IN") lb = QUnicodeTables::LineBreak_IN;
       
  1035         else if (ba == "HY") lb = QUnicodeTables::LineBreak_HY;
       
  1036         else if (ba == "BA") lb = QUnicodeTables::LineBreak_BA;
       
  1037         else if (ba == "BB") lb = QUnicodeTables::LineBreak_BB;
       
  1038         else if (ba == "B2") lb = QUnicodeTables::LineBreak_B2;
       
  1039         else if (ba == "ZW") lb = QUnicodeTables::LineBreak_ZW;
       
  1040         else if (ba == "CM") lb = QUnicodeTables::LineBreak_CM;
       
  1041         else if (ba == "SA") lb = QUnicodeTables::LineBreak_SA;
       
  1042         else if (ba == "BK") lb = QUnicodeTables::LineBreak_BK;
       
  1043         else if (ba == "CR") lb = QUnicodeTables::LineBreak_CR;
       
  1044         else if (ba == "LF") lb = QUnicodeTables::LineBreak_LF;
       
  1045         else if (ba == "SG") lb = QUnicodeTables::LineBreak_SG;
       
  1046         else if (ba == "CB") lb = QUnicodeTables::LineBreak_AL;
       
  1047         else if (ba == "SP") lb = QUnicodeTables::LineBreak_SP;
       
  1048         else if (ba == "WJ") lb = QUnicodeTables::LineBreak_WJ;
       
  1049         else if (ba == "H2") lb = QUnicodeTables::LineBreak_H2;
       
  1050         else if (ba == "H3") lb = QUnicodeTables::LineBreak_H3;
       
  1051         else if (ba == "JL") lb = QUnicodeTables::LineBreak_JL;
       
  1052         else if (ba == "JV") lb = QUnicodeTables::LineBreak_JV;
       
  1053         else if (ba == "JT") lb = QUnicodeTables::LineBreak_JT;
       
  1054         else {
       
  1055             qDebug() << "unhandled line break class:" << ba;
       
  1056         }
       
  1057 
  1151 
  1058         for (int codepoint = from; codepoint <= to; ++codepoint) {
  1152         for (int codepoint = from; codepoint <= to; ++codepoint) {
  1059             UnicodeData d = unicodeData.value(codepoint, UnicodeData(codepoint));
  1153             UnicodeData d = unicodeData.value(codepoint, UnicodeData(codepoint));
  1060             d.p.line_break_class = lb;
  1154             d.p.line_break_class = lb;
  1061             unicodeData.insert(codepoint, d);
  1155             unicodeData.insert(codepoint, d);
  1064 }
  1158 }
  1065 
  1159 
  1066 
  1160 
  1067 static void readSpecialCasing()
  1161 static void readSpecialCasing()
  1068 {
  1162 {
  1069 //     qDebug() << "Reading SpecialCasing.txt";
  1163     qDebug() << "Reading SpecialCasing.txt";
  1070     QFile f("data/SpecialCasing.txt");
  1164     QFile f("data/SpecialCasing.txt");
  1071     if (!f.exists())
  1165     if (!f.exists())
  1072         qFatal("Couldn't find SpecialCasing.txt");
  1166         qFatal("Couldn't find SpecialCasing.txt");
  1073 
  1167 
  1074     f.open(QFile::ReadOnly);
  1168     f.open(QFile::ReadOnly);
  1112         QList<QByteArray> title = l[2].trimmed().split(' ');
  1206         QList<QByteArray> title = l[2].trimmed().split(' ');
  1113         QList<int> titleMap;
  1207         QList<int> titleMap;
  1114         for (int i = 0; i < title.size(); ++i) {
  1208         for (int i = 0; i < title.size(); ++i) {
  1115             bool ok;
  1209             bool ok;
  1116             titleMap.append(title.at(i).toInt(&ok, 16));
  1210             titleMap.append(title.at(i).toInt(&ok, 16));
  1117             if (!ok)
       
  1118                 qDebug() << line << title.at(i);
       
  1119             Q_ASSERT(ok);
  1211             Q_ASSERT(ok);
  1120         }
  1212         }
  1121 
  1213 
  1122         QList<QByteArray> upper = l[3].trimmed().split(' ');
  1214         QList<QByteArray> upper = l[3].trimmed().split(' ');
  1123         QList<int> upperMap;
  1215         QList<int> upperMap;
  1149 
  1241 
  1150         unicodeData.insert(codepoint, ud);
  1242         unicodeData.insert(codepoint, ud);
  1151     }
  1243     }
  1152 }
  1244 }
  1153 
  1245 
  1154 int maxCaseFoldDiff = 0;
  1246 static int maxCaseFoldDiff = 0;
  1155 
  1247 
  1156 static void readCaseFolding()
  1248 static void readCaseFolding()
  1157 {
  1249 {
  1158     qDebug() << "Reading CaseFolding.txt";
  1250     qDebug() << "Reading CaseFolding.txt";
  1159     QFile f("data/CaseFolding.txt");
  1251     QFile f("data/CaseFolding.txt");
  1176             continue;
  1268             continue;
  1177 
  1269 
  1178         QList<QByteArray> l = line.split(';');
  1270         QList<QByteArray> l = line.split(';');
  1179 
  1271 
  1180         bool ok;
  1272         bool ok;
  1181         uint codepoint = l[0].trimmed().toInt(&ok, 16);
  1273         int codepoint = l[0].trimmed().toInt(&ok, 16);
  1182         Q_ASSERT(ok);
  1274         Q_ASSERT(ok);
  1183 
  1275 
  1184 
  1276 
  1185         l[1] = l[1].trimmed();
  1277         l[1] = l[1].trimmed();
  1186         if (l[1] == "F" || l[1] == "T")
  1278         if (l[1] == "F" || l[1] == "T")
  1196             Q_ASSERT(ok);
  1288             Q_ASSERT(ok);
  1197         }
  1289         }
  1198 
  1290 
  1199         UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint));
  1291         UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint));
  1200         if (foldMap.size() == 1) {
  1292         if (foldMap.size() == 1) {
       
  1293             if (qAbs(foldMap.at(0) - codepoint) >= (1<<14))
       
  1294                 qWarning() << "caseFoldDiff exceeded (" << hex << codepoint << "->" << foldMap.at(0) << ")";
  1201             ud.p.caseFoldDiff = foldMap.at(0) - codepoint;
  1295             ud.p.caseFoldDiff = foldMap.at(0) - codepoint;
  1202             maxCaseFoldDiff = qMax(maxCaseFoldDiff, ud.p.caseFoldDiff);
  1296             maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(ud.p.caseFoldDiff));
  1203             if (codepoint > 0xffff) {
  1297             if (codepoint > 0xffff) {
  1204                 // if the condition below doesn't hold anymore we need to modify our case folding code
  1298                 // if the condition below doesn't hold anymore we need to modify our case folding code
  1205                 //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
  1299                 //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
  1206                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(foldMap.at(0)));
  1300                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(foldMap.at(0)));
  1207             }
  1301             }
  1208             if (foldMap.at(0) != codepoint + ud.p.lowerCaseDiff)
  1302             if (foldMap.at(0) != codepoint + ud.p.lowerCaseDiff)
  1209                 qDebug() << hex << codepoint;
  1303                 qDebug() << hex << codepoint;
  1210         } else {
  1304         } else {
  1211             Q_ASSERT(false); // we currently don't support full case foldings
  1305             qFatal("we currently don't support full case foldings");
  1212 //             qDebug() << "special" << hex << foldMap;
  1306 //             qDebug() << "special" << hex << foldMap;
  1213             ud.p.caseFoldSpecial = true;
  1307             ud.p.caseFoldSpecial = true;
  1214             ud.p.caseFoldDiff = appendToSpecialCaseMap(foldMap);
  1308             ud.p.caseFoldDiff = appendToSpecialCaseMap(foldMap);
  1215         }
  1309         }
  1216         unicodeData.insert(codepoint, ud);
  1310         unicodeData.insert(codepoint, ud);
  1252         if (cl.size() == 2) {
  1346         if (cl.size() == 2) {
  1253             to = cl[1].toInt(&ok, 16);
  1347             to = cl[1].toInt(&ok, 16);
  1254             Q_ASSERT(ok);
  1348             Q_ASSERT(ok);
  1255         }
  1349         }
  1256 
  1350 
  1257         GraphemeBreak brk = grapheme_break_map.value(l[1].trimmed(), GraphemeBreakOther);
  1351         GraphemeBreak brk = grapheme_break_map.value(l[1].trimmed(), GraphemeBreak_Unassigned);
       
  1352         if (brk == GraphemeBreak_Unassigned)
       
  1353             qFatal("unassigned grapheme break class: %s", l[1].constData());
  1258 
  1354 
  1259         for (int codepoint = from; codepoint <= to; ++codepoint) {
  1355         for (int codepoint = from; codepoint <= to; ++codepoint) {
  1260             UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint));
  1356             UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint));
  1261             ud.p.graphemeBreak = brk;
  1357             ud.p.graphemeBreak = brk;
  1262             unicodeData.insert(codepoint, ud);
  1358             unicodeData.insert(codepoint, ud);
  1299         if (cl.size() == 2) {
  1395         if (cl.size() == 2) {
  1300             to = cl[1].toInt(&ok, 16);
  1396             to = cl[1].toInt(&ok, 16);
  1301             Q_ASSERT(ok);
  1397             Q_ASSERT(ok);
  1302         }
  1398         }
  1303 
  1399 
  1304         WordBreak brk = word_break_map.value(l[1].trimmed(), WordBreakOther);
  1400         WordBreak brk = word_break_map.value(l[1].trimmed(), WordBreak_Unassigned);
  1305         Q_ASSERT(brk != WordBreakOther);
  1401         if (brk == WordBreak_Unassigned)
       
  1402             qFatal("unassigned word break class: %s", l[1].constData());
  1306 
  1403 
  1307         for (int codepoint = from; codepoint <= to; ++codepoint) {
  1404         for (int codepoint = from; codepoint <= to; ++codepoint) {
  1308             UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint));
  1405             UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint));
  1309             ud.p.wordBreak = brk;
  1406             ud.p.wordBreak = brk;
  1310             unicodeData.insert(codepoint, ud);
  1407             unicodeData.insert(codepoint, ud);
  1347         if (cl.size() == 2) {
  1444         if (cl.size() == 2) {
  1348             to = cl[1].toInt(&ok, 16);
  1445             to = cl[1].toInt(&ok, 16);
  1349             Q_ASSERT(ok);
  1446             Q_ASSERT(ok);
  1350         }
  1447         }
  1351 
  1448 
  1352         SentenceBreak brk = sentence_break_map.value(l[1].trimmed(), SentenceBreakOther);
  1449         SentenceBreak brk = sentence_break_map.value(l[1].trimmed(), SentenceBreak_Unassigned);
  1353         Q_ASSERT(brk != SentenceBreakOther);
  1450         if (brk == SentenceBreak_Unassigned)
       
  1451             qFatal("unassigned sentence break class: %s", l[1].constData());
  1354 
  1452 
  1355         for (int codepoint = from; codepoint <= to; ++codepoint) {
  1453         for (int codepoint = from; codepoint <= to; ++codepoint) {
  1356             UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint));
  1454             UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint));
  1357             ud.p.sentenceBreak = brk;
  1455             ud.p.sentenceBreak = brk;
  1358             unicodeData.insert(codepoint, ud);
  1456             unicodeData.insert(codepoint, ud);
  1642         "Kannada",
  1740         "Kannada",
  1643         "Khmer",
  1741         "Khmer",
  1644         "Lao",
  1742         "Lao",
  1645         "Malayalam",
  1743         "Malayalam",
  1646         "Myanmar",
  1744         "Myanmar",
       
  1745         "Nko",
  1647         "Ogham",
  1746         "Ogham",
  1648         "Oriya",
  1747         "Oriya",
  1649         "Runic",
  1748         "Runic",
  1650         "Sinhala",
  1749         "Sinhala",
  1651         "Syriac",
  1750         "Syriac",
  1659     const int specialScriptsCount = sizeof(specialScripts) / sizeof(const char *);
  1758     const int specialScriptsCount = sizeof(specialScripts) / sizeof(const char *);
  1660 
  1759 
  1661     // generate script enum
  1760     // generate script enum
  1662     QByteArray declaration;
  1761     QByteArray declaration;
  1663 
  1762 
  1664     declaration += "    // See http://www.unicode.org/reports/tr24/tr24-5.html\n\n";
  1763     declaration += "    // See http://www.unicode.org/reports/tr24/tr24-5.html\n";
  1665     declaration += "    enum Script {\n        Common";
  1764     declaration += "    enum Script {\n        Common";
  1666 
  1765 
  1667     int uniqueScripts = 1; // Common
  1766     int uniqueScripts = 1; // Common
  1668 
  1767 
  1669     // output the ones with special processing first
  1768     // output the ones with special processing first
  1670     for (int i = 1; i < scriptNames.size(); ++i) {
  1769     for (int i = 1; i < scriptNames.size(); ++i) {
  1671         QByteArray scriptName = scriptNames.at(i);
  1770         QByteArray scriptName = scriptNames.at(i);
  1672         // does the script require special processing?
  1771         // does the script require special processing?
  1673         bool special = false;
  1772         bool special = false;
  1674         for (int s = 0; !special && s < specialScriptsCount; ++s) {
  1773         for (int s = 0; s < specialScriptsCount; ++s) {
  1675             if (scriptName == specialScripts[s])
  1774             if (scriptName == specialScripts[s]) {
  1676                 special = true;
  1775                 special = true;
       
  1776                 break;
       
  1777             }
  1677         }
  1778         }
  1678         if (!special) {
  1779         if (!special) {
  1679             scriptHash[i] =  0; // alias for 'Common'
  1780             scriptHash[i] = 0; // alias for 'Common'
  1680             continue;
  1781             continue;
  1681         } else {
  1782         } else {
  1682             ++uniqueScripts;
  1783             ++uniqueScripts;
  1683             scriptHash[i] = i;
  1784             scriptHash[i] = i;
  1684         }
  1785         }
  1685 
  1786 
  1686         declaration += ",\n        ";
  1787         if (scriptName != "Inherited") {
  1687         declaration += scriptName;
  1788             declaration += ",\n        ";
  1688     }
  1789             declaration += scriptName;
       
  1790         }
       
  1791     }
       
  1792     declaration += ",\n        Inherited";
  1689     declaration += ",\n        ScriptCount = Inherited";
  1793     declaration += ",\n        ScriptCount = Inherited";
  1690 
  1794 
  1691     // output the ones that are an alias for 'Common'
  1795     // output the ones that are an alias for 'Common'
  1692     for (int i = 1; i < scriptNames.size(); ++i) {
  1796     for (int i = 1; i < scriptNames.size(); ++i) {
  1693         if (scriptHash.value(i) != 0)
  1797         if (scriptHash.value(i) != 0)
  1694             continue;
  1798             continue;
  1695         QByteArray scriptName = scriptNames.at(i);
       
  1696         scriptName += " = Common";
       
  1697         declaration += ",\n        ";
  1799         declaration += ",\n        ";
  1698         declaration += scriptName;
  1800         declaration += scriptNames.at(i);
       
  1801         declaration += " = Common";
  1699     }
  1802     }
  1700 
  1803 
  1701     declaration += "\n    };\n";
  1804     declaration += "\n    };\n";
  1702 
  1805 
  1703     scriptSentinel = ((uniqueScripts + 16) / 32) * 32; // a multiple of 32
  1806     scriptSentinel = ((uniqueScripts + 16) / 32) * 32; // a multiple of 32
  1829 
  1932 
  1830 struct PropertyBlock {
  1933 struct PropertyBlock {
  1831     PropertyBlock() { index = -1; }
  1934     PropertyBlock() { index = -1; }
  1832     int index;
  1935     int index;
  1833     QList<int> properties;
  1936     QList<int> properties;
  1834     bool operator ==(const PropertyBlock &other) { return properties == other.properties; }
  1937     bool operator==(const PropertyBlock &other)
       
  1938     { return properties == other.properties; }
  1835 };
  1939 };
  1836 
  1940 
  1837 static QByteArray createPropertyInfo()
  1941 static QByteArray createPropertyInfo()
  1838 {
  1942 {
  1839     qDebug("createPropertyInfo:");
  1943     qDebug("createPropertyInfo:");
  1840 
  1944 
  1841     const int BMP_BLOCKSIZE=32;
  1945     const int BMP_BLOCKSIZE = 32;
  1842     const int BMP_SHIFT = 5;
  1946     const int BMP_SHIFT = 5;
  1843     const int BMP_END = 0x11000;
  1947     const int BMP_END = 0x11000;
  1844     const int SMP_END = 0x110000;
  1948     const int SMP_END = 0x110000;
  1845     const int SMP_BLOCKSIZE = 256;
  1949     const int SMP_BLOCKSIZE = 256;
  1846     const int SMP_SHIFT = 8;
  1950     const int SMP_SHIFT = 8;
  1888     }
  1992     }
  1889 
  1993 
  1890     int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*2;
  1994     int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*2;
  1891     int bmp_trie = BMP_END/BMP_BLOCKSIZE*2;
  1995     int bmp_trie = BMP_END/BMP_BLOCKSIZE*2;
  1892     int bmp_mem = bmp_block_data + bmp_trie;
  1996     int bmp_mem = bmp_block_data + bmp_trie;
  1893     qDebug("    %d unique blocks in BMP.",blocks.size());
  1997     qDebug("    %d unique blocks in BMP.", blocks.size());
  1894     qDebug("        block data uses: %d bytes", bmp_block_data);
  1998     qDebug("        block data uses: %d bytes", bmp_block_data);
  1895     qDebug("        trie data uses : %d bytes", bmp_trie);
  1999     qDebug("        trie data uses : %d bytes", bmp_trie);
  1896 
  2000 
  1897     int smp_block_data = (blocks.size()- bmp_blocks)*SMP_BLOCKSIZE*2;
  2001     int smp_block_data = (blocks.size() - bmp_blocks)*SMP_BLOCKSIZE*2;
  1898     int smp_trie = (SMP_END-BMP_END)/SMP_BLOCKSIZE*2;
  2002     int smp_trie = (SMP_END-BMP_END)/SMP_BLOCKSIZE*2;
  1899     int smp_mem = smp_block_data + smp_trie;
  2003     int smp_mem = smp_block_data + smp_trie;
  1900     qDebug("    %d unique blocks in SMP.",blocks.size()-bmp_blocks);
  2004     qDebug("    %d unique blocks in SMP.", blocks.size()-bmp_blocks);
  1901     qDebug("        block data uses: %d bytes", smp_block_data);
  2005     qDebug("        block data uses: %d bytes", smp_block_data);
  1902     qDebug("        trie data uses : %d bytes", smp_trie);
  2006     qDebug("        trie data uses : %d bytes", smp_trie);
  1903 
  2007 
  1904     qDebug("\n        properties use : %d bytes", uniqueProperties.size()*20);
  2008     qDebug("\n        properties use : %d bytes", uniqueProperties.size()*20);
  1905     qDebug("    memory usage: %d bytes", bmp_mem+smp_mem + uniqueProperties.size()*20);
  2009     qDebug("    memory usage: %d bytes", bmp_mem+smp_mem + uniqueProperties.size()*20);
  1906 
  2010 
  1907     QByteArray out;
  2011     QByteArray out;
  1908     out += "static const unsigned short uc_property_trie[] = {\n";
  2012     out += "static const unsigned short uc_property_trie[] = {\n";
  1909 
  2013 
  1910     // first write the map
  2014     // first write the map
  1911     out += "    // 0x" + QByteArray::number(BMP_END, 16);
  2015     out += "    // 0 - 0x" + QByteArray::number(BMP_END, 16);
  1912     for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) {
  2016     for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) {
  1913         if (!(i % 8)) {
  2017         if (!(i % 8)) {
  1914             if (out.endsWith(' '))
  2018             if (out.endsWith(' '))
  1915                 out.chop(1);
  2019                 out.chop(1);
  1916             if (!((i*BMP_BLOCKSIZE) % 0x1000))
  2020             if (!((i*BMP_BLOCKSIZE) % 0x1000))
  1975            "#define GET_PROP_INDEX_UCS2(ucs2) \\\n"
  2079            "#define GET_PROP_INDEX_UCS2(ucs2) \\\n"
  1976            "(uc_property_trie[uc_property_trie[ucs2>>" + QByteArray::number(BMP_SHIFT) +
  2080            "(uc_property_trie[uc_property_trie[ucs2>>" + QByteArray::number(BMP_SHIFT) +
  1977            "] + (ucs2 & 0x" + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")])\n\n"
  2081            "] + (ucs2 & 0x" + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")])\n\n"
  1978 
  2082 
  1979 
  2083 
  1980            "static const QUnicodeTables::Properties uc_properties [] = {\n";
  2084            "static const QUnicodeTables::Properties uc_properties[] = {\n";
  1981 
  2085 
  1982     // keep in sync with the property declaration
  2086     // keep in sync with the property declaration
  1983     for (int i = 0; i < uniqueProperties.size(); ++i) {
  2087     for (int i = 0; i < uniqueProperties.size(); ++i) {
  1984         PropertyFlags p = uniqueProperties.at(i);
  2088         PropertyFlags p = uniqueProperties.at(i);
  1985         out += "    { ";
  2089         out += "    { ";
  2034         out += QByteArray::number( p.graphemeBreak );
  2138         out += QByteArray::number( p.graphemeBreak );
  2035         out += ", ";
  2139         out += ", ";
  2036         out += QByteArray::number( p.wordBreak );
  2140         out += QByteArray::number( p.wordBreak );
  2037         out += ", ";
  2141         out += ", ";
  2038         out += QByteArray::number( p.sentenceBreak );
  2142         out += QByteArray::number( p.sentenceBreak );
  2039         out += "},\n";
  2143         out += " },\n";
  2040     }
  2144     }
  2041     out += "};\n\n";
  2145     out += "};\n\n";
  2042 
  2146 
  2043     out += "static inline const QUnicodeTables::Properties *qGetProp(uint ucs4)\n"
  2147     out += "static inline const QUnicodeTables::Properties *qGetProp(uint ucs4)\n"
  2044            "{\n"
  2148            "{\n"
  2062            "{\n"
  2166            "{\n"
  2063            "    int index = GET_PROP_INDEX_UCS2(ucs2);\n"
  2167            "    int index = GET_PROP_INDEX_UCS2(ucs2);\n"
  2064            "    return uc_properties + index;\n"
  2168            "    return uc_properties + index;\n"
  2065            "}\n\n";
  2169            "}\n\n";
  2066 
  2170 
  2067     out += "#define CURRENT_VERSION "CURRENT_UNICODE_VERSION"\n\n";
  2171     out += "static const ushort specialCaseMap[] = {\n   ";
  2068 
       
  2069     out += "static const ushort specialCaseMap [] = {";
       
  2070     for (int i = 0; i < specialCaseMap.size(); ++i) {
  2172     for (int i = 0; i < specialCaseMap.size(); ++i) {
  2071         if (!(i % 16))
       
  2072             out += "\n   ";
       
  2073         out += QByteArray(" 0x") + QByteArray::number(specialCaseMap.at(i), 16);
  2173         out += QByteArray(" 0x") + QByteArray::number(specialCaseMap.at(i), 16);
  2074         if (i < specialCaseMap.size() - 1)
  2174         if (i < specialCaseMap.size() - 1)
  2075             out += ",";
  2175             out += ",";
       
  2176         if (!specialCaseMap.at(i))
       
  2177             out += "\n   ";
  2076     }
  2178     }
  2077     out += "\n};\n";
  2179     out += "\n};\n";
  2078     out += "#define SPECIAL_CASE_MAX_LEN " + QByteArray::number(specialCaseMaxLen) + "\n\n";
  2180     out += "#define SPECIAL_CASE_MAX_LEN " + QByteArray::number(specialCaseMaxLen) + "\n\n";
  2079 
  2181 
  2080     qDebug() << "Special case map uses " << specialCaseMap.size()*2 << "bytes";
  2182     qDebug("Special case map uses : %d bytes", specialCaseMap.size()*2);
  2081 
  2183 
  2082     return out;
  2184     return out;
  2083 }
  2185 }
  2084 
  2186 
  2085 
  2187 
  2086 struct DecompositionBlock {
  2188 struct DecompositionBlock {
  2087     DecompositionBlock() { index = -1; }
  2189     DecompositionBlock() { index = -1; }
  2088     int index;
  2190     int index;
  2089     QList<int> decompositionPositions;
  2191     QList<int> decompositionPositions;
  2090     bool operator ==(const DecompositionBlock &other)
  2192     bool operator ==(const DecompositionBlock &other)
  2091         { return decompositionPositions == other.decompositionPositions; }
  2193     { return decompositionPositions == other.decompositionPositions; }
  2092 };
  2194 };
  2093 
  2195 
  2094 static QByteArray createCompositionInfo()
  2196 static QByteArray createCompositionInfo()
  2095 {
  2197 {
  2096     qDebug("createCompositionInfo:");
  2198     qDebug("createCompositionInfo:");
  2097 
  2199 
  2098     const int BMP_BLOCKSIZE=16;
  2200     const int BMP_BLOCKSIZE = 16;
  2099     const int BMP_SHIFT = 4;
  2201     const int BMP_SHIFT = 4;
  2100     const int BMP_END = 0x3400; // start of Han
  2202     const int BMP_END = 0x3400; // start of Han
  2101     const int SMP_END = 0x30000;
  2203     const int SMP_END = 0x30000;
  2102     const int SMP_BLOCKSIZE = 256;
  2204     const int SMP_BLOCKSIZE = 256;
  2103     const int SMP_SHIFT = 8;
  2205     const int SMP_SHIFT = 8;
  2118             int uc = block*BMP_BLOCKSIZE + i;
  2220             int uc = block*BMP_BLOCKSIZE + i;
  2119             UnicodeData d = unicodeData.value(uc, UnicodeData(uc));
  2221             UnicodeData d = unicodeData.value(uc, UnicodeData(uc));
  2120             if (!d.decomposition.isEmpty()) {
  2222             if (!d.decomposition.isEmpty()) {
  2121                 int utf16Chars = 0;
  2223                 int utf16Chars = 0;
  2122                 for (int j = 0; j < d.decomposition.size(); ++j)
  2224                 for (int j = 0; j < d.decomposition.size(); ++j)
  2123                     utf16Chars += d.decomposition.at(j) > 0x10000 ? 2 : 1;
  2225                     utf16Chars += d.decomposition.at(j) >= 0x10000 ? 2 : 1;
  2124                 decompositions.append(d.decompositionType + (utf16Chars<<8));
  2226                 decompositions.append(d.decompositionType + (utf16Chars<<8));
  2125                 for (int j = 0; j < d.decomposition.size(); ++j) {
  2227                 for (int j = 0; j < d.decomposition.size(); ++j) {
  2126                     int code = d.decomposition.at(j);
  2228                     int code = d.decomposition.at(j);
  2127                     if (code > 0x10000) {
  2229                     if (code >= 0x10000) {
  2128                         // save as surrogate pair
  2230                         // save as surrogate pair
  2129                         code -= 0x10000;
  2231                         ushort high = QChar::highSurrogate(code);
  2130                         ushort high = code/0x400 + 0xd800;
  2232                         ushort low = QChar::lowSurrogate(code);
  2131                         ushort low = code%0x400 + 0xdc00;
       
  2132                         decompositions.append(high);
  2233                         decompositions.append(high);
  2133                         decompositions.append(low);
  2234                         decompositions.append(low);
  2134                     } else {
  2235                     } else {
  2135                         decompositions.append(code);
  2236                         decompositions.append(code);
  2136                     }
  2237                     }
  2160             int uc = block*SMP_BLOCKSIZE + i;
  2261             int uc = block*SMP_BLOCKSIZE + i;
  2161             UnicodeData d = unicodeData.value(uc, UnicodeData(uc));
  2262             UnicodeData d = unicodeData.value(uc, UnicodeData(uc));
  2162             if (!d.decomposition.isEmpty()) {
  2263             if (!d.decomposition.isEmpty()) {
  2163                 int utf16Chars = 0;
  2264                 int utf16Chars = 0;
  2164                 for (int j = 0; j < d.decomposition.size(); ++j)
  2265                 for (int j = 0; j < d.decomposition.size(); ++j)
  2165                     utf16Chars += d.decomposition.at(j) > 0x10000 ? 2 : 1;
  2266                     utf16Chars += d.decomposition.at(j) >= 0x10000 ? 2 : 1;
  2166                 decompositions.append(d.decompositionType + (utf16Chars<<8));
  2267                 decompositions.append(d.decompositionType + (utf16Chars<<8));
  2167                 for (int j = 0; j < d.decomposition.size(); ++j) {
  2268                 for (int j = 0; j < d.decomposition.size(); ++j) {
  2168                     int code = d.decomposition.at(j);
  2269                     int code = d.decomposition.at(j);
  2169                     if (code > 0x10000) {
  2270                     if (code >= 0x10000) {
  2170                         // save as surrogate pair
  2271                         // save as surrogate pair
  2171                         code -= 0x10000;
  2272                         ushort high = QChar::highSurrogate(code);
  2172                         ushort high = code/0x400 + 0xd800;
  2273                         ushort low = QChar::lowSurrogate(code);
  2173                         ushort low = code%0x400 + 0xdc00;
       
  2174                         decompositions.append(high);
  2274                         decompositions.append(high);
  2175                         decompositions.append(low);
  2275                         decompositions.append(low);
  2176                     } else {
  2276                     } else {
  2177                         decompositions.append(code);
  2277                         decompositions.append(code);
  2178                     }
  2278                     }
  2194     }
  2294     }
  2195 
  2295 
  2196     int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*2;
  2296     int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*2;
  2197     int bmp_trie = BMP_END/BMP_BLOCKSIZE*2;
  2297     int bmp_trie = BMP_END/BMP_BLOCKSIZE*2;
  2198     int bmp_mem = bmp_block_data + bmp_trie;
  2298     int bmp_mem = bmp_block_data + bmp_trie;
  2199     qDebug("    %d unique blocks in BMP.",blocks.size());
  2299     qDebug("    %d unique blocks in BMP.", blocks.size());
  2200     qDebug("        block data uses: %d bytes", bmp_block_data);
  2300     qDebug("        block data uses: %d bytes", bmp_block_data);
  2201     qDebug("        trie data uses : %d bytes", bmp_trie);
  2301     qDebug("        trie data uses : %d bytes", bmp_trie);
  2202     qDebug("        memory usage: %d bytes", bmp_mem);
  2302     qDebug("        memory usage: %d bytes", bmp_mem);
  2203 
  2303 
  2204     int smp_block_data = (blocks.size()- bmp_blocks)*SMP_BLOCKSIZE*2;
  2304     int smp_block_data = (blocks.size() - bmp_blocks)*SMP_BLOCKSIZE*2;
  2205     int smp_trie = (SMP_END-BMP_END)/SMP_BLOCKSIZE*2;
  2305     int smp_trie = (SMP_END-BMP_END)/SMP_BLOCKSIZE*2;
  2206     int smp_mem = smp_block_data + smp_trie;
  2306     int smp_mem = smp_block_data + smp_trie;
  2207     qDebug("    %d unique blocks in SMP.",blocks.size()-bmp_blocks);
  2307     qDebug("    %d unique blocks in SMP.", blocks.size()-bmp_blocks);
  2208     qDebug("        block data uses: %d bytes", smp_block_data);
  2308     qDebug("        block data uses: %d bytes", smp_block_data);
  2209     qDebug("        trie data uses : %d bytes", smp_trie);
  2309     qDebug("        trie data uses : %d bytes", smp_trie);
  2210 
  2310 
  2211     qDebug("\n        decomposition table use : %d bytes", decompositions.size()*2);
  2311     qDebug("\n        decomposition table use : %d bytes", decompositions.size()*2);
  2212     qDebug("    memory usage: %d bytes", bmp_mem+smp_mem + decompositions.size()*2);
  2312     qDebug("    memory usage: %d bytes", bmp_mem+smp_mem + decompositions.size()*2);
  2345     Q_ASSERT(blockMap.size() == BMP_END/BMP_BLOCKSIZE);
  2445     Q_ASSERT(blockMap.size() == BMP_END/BMP_BLOCKSIZE);
  2346 
  2446 
  2347     int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*2;
  2447     int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*2;
  2348     int bmp_trie = BMP_END/BMP_BLOCKSIZE*2;
  2448     int bmp_trie = BMP_END/BMP_BLOCKSIZE*2;
  2349     int bmp_mem = bmp_block_data + bmp_trie;
  2449     int bmp_mem = bmp_block_data + bmp_trie;
  2350     qDebug("    %d unique blocks in BMP.",blocks.size());
  2450     qDebug("    %d unique blocks in BMP.", blocks.size());
  2351     qDebug("        block data uses: %d bytes", bmp_block_data);
  2451     qDebug("        block data uses: %d bytes", bmp_block_data);
  2352     qDebug("        trie data uses : %d bytes", bmp_trie);
  2452     qDebug("        trie data uses : %d bytes", bmp_trie);
  2353     qDebug("        ligature data uses : %d bytes", ligatures.size()*2);
  2453     qDebug("        ligature data uses : %d bytes", ligatures.size()*2);
  2354     qDebug("        memory usage: %d bytes", bmp_mem + ligatures.size() * 2);
  2454     qDebug("        memory usage: %d bytes", bmp_mem + ligatures.size() * 2);
  2355 
  2455 
  2397            "#define GET_LIGATURE_INDEX(u2) "
  2497            "#define GET_LIGATURE_INDEX(u2) "
  2398            "(u2 < 0x" + QByteArray::number(BMP_END, 16) + " ? "
  2498            "(u2 < 0x" + QByteArray::number(BMP_END, 16) + " ? "
  2399            "uc_ligature_trie[uc_ligature_trie[u2>>" + QByteArray::number(BMP_SHIFT) +
  2499            "uc_ligature_trie[uc_ligature_trie[u2>>" + QByteArray::number(BMP_SHIFT) +
  2400            "] + (u2 & 0x" + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")] : 0xffff);\n\n"
  2500            "] + (u2 & 0x" + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")] : 0xffff);\n\n"
  2401 
  2501 
  2402            "static const unsigned short uc_ligature_map [] = {\n";
  2502            "static const unsigned short uc_ligature_map[] = {\n";
  2403 
  2503 
  2404     for (int i = 0; i < ligatures.size(); ++i) {
  2504     for (int i = 0; i < ligatures.size(); ++i) {
  2405         if (!(i % 8)) {
  2505         if (!(i % 8)) {
  2406             if (out.endsWith(' '))
  2506             if (out.endsWith(' '))
  2407                 out.chop(1);
  2507                 out.chop(1);
  2431     return out;
  2531     return out;
  2432 }
  2532 }
  2433 
  2533 
  2434 int main(int, char **)
  2534 int main(int, char **)
  2435 {
  2535 {
       
  2536     initAgeMap();
  2436     initCategoryMap();
  2537     initCategoryMap();
  2437     initDirectionMap();
  2538     initDirectionMap();
  2438     initDecompositionMap();
  2539     initDecompositionMap();
  2439     initGraphemeBreak();
  2540     initGraphemeBreak();
  2440     initWordBreak();
  2541     initWordBreak();
  2441     initSentenceBreak();
  2542     initSentenceBreak();
  2442     
  2543     initLineBreak();
       
  2544 
  2443     readUnicodeData();
  2545     readUnicodeData();
  2444     readBidiMirroring();
  2546     readBidiMirroring();
  2445     readArabicShaping();
  2547     readArabicShaping();
  2446     readDerivedAge();
  2548     readDerivedAge();
  2447     readCompositionExclusion();
  2549     readDerivedNormalizationProps();
  2448     readLineBreak();
       
  2449     readSpecialCasing();
  2550     readSpecialCasing();
  2450     readCaseFolding();
  2551     readCaseFolding();
  2451     // readBlocks();
  2552     // readBlocks();
  2452     readScripts();
  2553     readScripts();
  2453     readGraphemeBreak();
  2554     readGraphemeBreak();
  2454     readWordBreak();
  2555     readWordBreak();
  2455     readSentenceBreak();
  2556     readSentenceBreak();
       
  2557     readLineBreak();
  2456 
  2558 
  2457     computeUniqueProperties();
  2559     computeUniqueProperties();
  2458     QByteArray properties = createPropertyInfo();
  2560     QByteArray properties = createPropertyInfo();
  2459     QByteArray compositions = createCompositionInfo();
  2561     QByteArray compositions = createCompositionInfo();
  2460     QByteArray ligatures = createLigatureInfo();
  2562     QByteArray ligatures = createLigatureInfo();
  2461     QByteArray normalizationCorrections = createNormalizationCorrections();
  2563     QByteArray normalizationCorrections = createNormalizationCorrections();
  2462     QByteArray scriptEnumDeclaration = createScriptEnumDeclaration();
  2564     QByteArray scriptEnumDeclaration = createScriptEnumDeclaration();
  2463     QByteArray scriptTableDeclaration = createScriptTableDeclaration();
  2565     QByteArray scriptTableDeclaration = createScriptTableDeclaration();
  2464 
       
  2465     QFile f("../../src/corelib/tools/qunicodetables.cpp");
       
  2466     f.open(QFile::WriteOnly|QFile::Truncate);
       
  2467 
  2566 
  2468     QByteArray header =
  2567     QByteArray header =
  2469         "/****************************************************************************\n"
  2568         "/****************************************************************************\n"
  2470         "**\n"
  2569         "**\n"
  2471         "** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).\n"
  2570         "** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).\n"
  2503         "**\n"
  2602         "**\n"
  2504         "**\n"
  2603         "**\n"
  2505         "**\n"
  2604         "**\n"
  2506         "** $QT_END_LICENSE$\n"
  2605         "** $QT_END_LICENSE$\n"
  2507         "**\n"
  2606         "**\n"
  2508         "****************************************************************************/\n\n"
  2607         "****************************************************************************/\n\n";
  2509 
  2608 
  2510         "/* This file is autogenerated from the Unicode 5.0 database. Do not edit */\n\n";
  2609     QByteArray note =
       
  2610         "/* This file is autogenerated from the Unicode "DATA_VERSION_S" database. Do not edit */\n\n";
  2511 
  2611 
  2512     QByteArray warning =
  2612     QByteArray warning =
  2513         "//\n"
  2613         "//\n"
  2514         "//  W A R N I N G\n"
  2614         "//  W A R N I N G\n"
  2515         "//  -------------\n"
  2615         "//  -------------\n"
  2519         "// without notice, or even be removed.\n"
  2619         "// without notice, or even be removed.\n"
  2520         "//\n"
  2620         "//\n"
  2521         "// We mean it.\n"
  2621         "// We mean it.\n"
  2522         "//\n\n";
  2622         "//\n\n";
  2523 
  2623 
       
  2624     QFile f("../../src/corelib/tools/qunicodetables.cpp");
       
  2625     f.open(QFile::WriteOnly|QFile::Truncate);
  2524     f.write(header);
  2626     f.write(header);
       
  2627     f.write(note);
  2525     f.write("QT_BEGIN_NAMESPACE\n\n");
  2628     f.write("QT_BEGIN_NAMESPACE\n\n");
  2526     f.write(properties);
  2629     f.write(properties);
  2527     f.write(compositions);
  2630     f.write(compositions);
  2528     f.write(ligatures);
  2631     f.write(ligatures);
  2529     f.write(normalizationCorrections);
  2632     f.write(normalizationCorrections);
  2530     f.write(scriptTableDeclaration);
  2633     f.write(scriptTableDeclaration);
  2531     f.write("\nQT_END_NAMESPACE\n");
  2634     f.write("QT_END_NAMESPACE\n");
  2532     f.close();
  2635     f.close();
  2533 
  2636 
  2534     f.setFileName("../../src/corelib/tools/qunicodetables_p.h");
  2637     f.setFileName("../../src/corelib/tools/qunicodetables_p.h");
  2535     f.open(QFile::WriteOnly | QFile::Truncate);
  2638     f.open(QFile::WriteOnly | QFile::Truncate);
  2536     f.write(header);
  2639     f.write(header);
       
  2640     f.write(note);
  2537     f.write(warning);
  2641     f.write(warning);
  2538     f.write("#ifndef QUNICODETABLES_P_H\n"
  2642     f.write("#ifndef QUNICODETABLES_P_H\n"
  2539             "#define QUNICODETABLES_P_H\n\n"
  2643             "#define QUNICODETABLES_P_H\n\n"
  2540             "#include <QtCore/qchar.h>\n\n"
  2644             "#include <QtCore/qchar.h>\n\n"
  2541             "QT_BEGIN_NAMESPACE\n\n");
  2645             "QT_BEGIN_NAMESPACE\n\n");
  2542     f.write("namespace QUnicodeTables {\n");
  2646     f.write("#define UNICODE_DATA_VERSION "DATA_VERSION_STR"\n\n");
       
  2647     f.write("#define UNICODE_LAST_CODEPOINT "LAST_CODEPOINT_STR"\n\n");
       
  2648     f.write("namespace QUnicodeTables {\n\n");
  2543     f.write(property_string);
  2649     f.write(property_string);
  2544     f.write("\n");
  2650     f.write("\n");
  2545     f.write(scriptEnumDeclaration);
  2651     f.write(scriptEnumDeclaration);
  2546     f.write("\n");
  2652     f.write("\n");
  2547     f.write(lineBreakClass);
  2653     f.write(lineBreakClass);
  2548     f.write("\n");
  2654     f.write("\n");
  2549     f.write(methods);
       
  2550     f.write("\n");
       
  2551     f.write(grapheme_break_string);
  2655     f.write(grapheme_break_string);
  2552     f.write("\n");
  2656     f.write("\n");
  2553     f.write(word_break_string);
  2657     f.write(word_break_string);
  2554     f.write("\n");
  2658     f.write("\n");
  2555     f.write(sentence_break_string);
  2659     f.write(sentence_break_string);
  2556     f.write("\n}\n\n"
  2660     f.write("\n");
       
  2661     f.write(methods);
       
  2662     f.write("} // namespace QUnicodeTables\n\n"
  2557             "QT_END_NAMESPACE\n\n"
  2663             "QT_END_NAMESPACE\n\n"
  2558             "#endif\n");
  2664             "#endif // QUNICODETABLES_P_H\n");
  2559     f.close();
  2665     f.close();
  2560 
  2666 
  2561     qDebug() << "maxMirroredDiff  = " << hex << maxMirroredDiff;
  2667     qDebug() << "maxMirroredDiff  = " << hex << maxMirroredDiff;
  2562     qDebug() << "maxLowerCaseDiff = " << hex << maxLowerCaseDiff;
  2668     qDebug() << "maxLowerCaseDiff = " << hex << maxLowerCaseDiff;
  2563     qDebug() << "maxUpperCaseDiff = " << hex << maxUpperCaseDiff;
  2669     qDebug() << "maxUpperCaseDiff = " << hex << maxUpperCaseDiff;
  2576         qDebug("    length %d used %d times", i, decompositionLength.value(i, 0));
  2682         qDebug("    length %d used %d times", i, decompositionLength.value(i, 0));
  2577         totalcompositions += i*decompositionLength.value(i, 0);
  2683         totalcompositions += i*decompositionLength.value(i, 0);
  2578         sum += decompositionLength.value(i, 0);
  2684         sum += decompositionLength.value(i, 0);
  2579     }
  2685     }
  2580     qDebug("    len decomposition map %d, average length %f, num composed chars %d",
  2686     qDebug("    len decomposition map %d, average length %f, num composed chars %d",
  2581            totalcompositions, (float)totalcompositions/(float)sum,  sum);
  2687            totalcompositions, (float)totalcompositions/(float)sum, sum);
  2582     qDebug("highest composed character %x", highestComposedCharacter);
  2688     qDebug("highest composed character %x", highestComposedCharacter);
  2583     qDebug("num ligatures = %d highest=%x, maxLength=%d", numLigatures, highestLigature, longestLigature);
  2689     qDebug("num ligatures = %d highest=%x, maxLength=%d", numLigatures, highestLigature, longestLigature);
  2584 
  2690 
  2585     qBubbleSort(ligatures);
  2691     qBubbleSort(ligatures);
  2586     for (int i = 0; i < ligatures.size(); ++i)
  2692     for (int i = 0; i < ligatures.size(); ++i)
  2597 //     }
  2703 //     }
  2598 //     qDebug("total of %d combining classes used", numClasses);
  2704 //     qDebug("total of %d combining classes used", numClasses);
  2599 
  2705 
  2600 #endif
  2706 #endif
  2601 }
  2707 }
  2602