author | Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com> |
Wed, 18 Aug 2010 10:37:55 +0300 | |
changeset 33 | 3e2da88830cd |
parent 30 | 5dc02b23752f |
permissions | -rw-r--r-- |
0 | 1 |
/**************************************************************************** |
2 |
** |
|
18
2f34d5167611
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
3 |
** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). |
0 | 4 |
** All rights reserved. |
5 |
** Contact: Nokia Corporation (qt-info@nokia.com) |
|
6 |
** |
|
7 |
** This file is part of the utils of the Qt Toolkit. |
|
8 |
** |
|
9 |
** $QT_BEGIN_LICENSE:LGPL$ |
|
10 |
** No Commercial Usage |
|
11 |
** This file contains pre-release code and may not be distributed. |
|
12 |
** You may use this file in accordance with the terms and conditions |
|
13 |
** contained in the Technology Preview License Agreement accompanying |
|
14 |
** this package. |
|
15 |
** |
|
16 |
** GNU Lesser General Public License Usage |
|
17 |
** Alternatively, this file may be used under the terms of the GNU Lesser |
|
18 |
** General Public License version 2.1 as published by the Free Software |
|
19 |
** Foundation and appearing in the file LICENSE.LGPL included in the |
|
20 |
** packaging of this file. Please review the following information to |
|
21 |
** ensure the GNU Lesser General Public License version 2.1 requirements |
|
22 |
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
|
23 |
** |
|
24 |
** In addition, as a special exception, Nokia gives you certain additional |
|
25 |
** rights. These rights are described in the Nokia Qt LGPL Exception |
|
26 |
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
|
27 |
** |
|
28 |
** If you have questions regarding the use of this file, please contact |
|
29 |
** Nokia at qt-info@nokia.com. |
|
30 |
** |
|
31 |
** |
|
32 |
** |
|
33 |
** |
|
34 |
** |
|
35 |
** |
|
36 |
** |
|
37 |
** |
|
38 |
** $QT_END_LICENSE$ |
|
39 |
** |
|
40 |
****************************************************************************/ |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
41 |
|
0 | 42 |
#include <qlist.h> |
43 |
#include <qhash.h> |
|
44 |
#include <qfile.h> |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
45 |
#include <qbytearray.h> |
0 | 46 |
#include <qstring.h> |
47 |
#include <qchar.h> |
|
48 |
#include <qvector.h> |
|
49 |
#include <qdebug.h> |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
50 |
#if 0 |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
51 |
#include <private/qunicodetables_p.h> |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
52 |
#endif |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
53 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
54 |
#define DATA_VERSION_S "5.0" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
55 |
#define DATA_VERSION_STR "QChar::Unicode_5_0" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
56 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
57 |
#define LAST_CODEPOINT 0x10ffff |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
58 |
#define LAST_CODEPOINT_STR "0x10ffff" |
0 | 59 |
|
60 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
61 |
static QHash<QByteArray, QChar::UnicodeVersion> age_map; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
62 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
63 |
static void initAgeMap() |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
64 |
{ |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
65 |
struct AgeMap { |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
66 |
const QChar::UnicodeVersion version; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
67 |
const char *age; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
68 |
} ageMap[] = { |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
69 |
{ QChar::Unicode_1_1, "1.1" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
70 |
{ QChar::Unicode_2_0, "2.0" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
71 |
{ QChar::Unicode_2_1_2, "2.1" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
72 |
{ QChar::Unicode_3_0, "3.0" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
73 |
{ QChar::Unicode_3_1, "3.1" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
74 |
{ QChar::Unicode_3_2, "3.2" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
75 |
{ QChar::Unicode_4_0, "4.0" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
76 |
{ QChar::Unicode_4_1, "4.1" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
77 |
{ QChar::Unicode_5_0, "5.0" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
78 |
{ QChar::Unicode_Unassigned, 0 } |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
79 |
}; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
80 |
AgeMap *d = ageMap; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
81 |
while (d->age) { |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
82 |
age_map.insert(d->age, d->version); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
83 |
++d; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
84 |
} |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
85 |
} |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
86 |
|
0 | 87 |
|
88 |
static const char *grapheme_break_string = |
|
89 |
" enum GraphemeBreak {\n" |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
90 |
" GraphemeBreakOther,\n" |
0 | 91 |
" GraphemeBreakCR,\n" |
92 |
" GraphemeBreakLF,\n" |
|
93 |
" GraphemeBreakControl,\n" |
|
94 |
" GraphemeBreakExtend,\n" |
|
95 |
" GraphemeBreakL,\n" |
|
96 |
" GraphemeBreakV,\n" |
|
97 |
" GraphemeBreakT,\n" |
|
98 |
" GraphemeBreakLV,\n" |
|
99 |
" GraphemeBreakLVT\n" |
|
100 |
" };\n\n"; |
|
101 |
||
102 |
enum GraphemeBreak { |
|
103 |
GraphemeBreakOther, |
|
104 |
GraphemeBreakCR, |
|
105 |
GraphemeBreakLF, |
|
106 |
GraphemeBreakControl, |
|
107 |
GraphemeBreakExtend, |
|
108 |
GraphemeBreakL, |
|
109 |
GraphemeBreakV, |
|
110 |
GraphemeBreakT, |
|
111 |
GraphemeBreakLV, |
|
112 |
GraphemeBreakLVT |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
113 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
114 |
, GraphemeBreak_Unassigned |
0 | 115 |
}; |
116 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
117 |
static QHash<QByteArray, GraphemeBreak> grapheme_break_map; |
0 | 118 |
|
119 |
static void initGraphemeBreak() |
|
120 |
{ |
|
121 |
struct GraphemeBreakList { |
|
122 |
GraphemeBreak brk; |
|
123 |
const char *name; |
|
124 |
} breaks[] = { |
|
125 |
{ GraphemeBreakOther, "Other" }, |
|
126 |
{ GraphemeBreakCR, "CR" }, |
|
127 |
{ GraphemeBreakLF, "LF" }, |
|
128 |
{ GraphemeBreakControl, "Control" }, |
|
129 |
{ GraphemeBreakExtend, "Extend" }, |
|
130 |
{ GraphemeBreakL, "L" }, |
|
131 |
{ GraphemeBreakV, "V" }, |
|
132 |
{ GraphemeBreakT, "T" }, |
|
133 |
{ GraphemeBreakLV, "LV" }, |
|
134 |
{ GraphemeBreakLVT, "LVT" }, |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
135 |
{ GraphemeBreak_Unassigned, 0 } |
0 | 136 |
}; |
137 |
GraphemeBreakList *d = breaks; |
|
138 |
while (d->name) { |
|
139 |
grapheme_break_map.insert(d->name, d->brk); |
|
140 |
++d; |
|
141 |
} |
|
142 |
} |
|
143 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
144 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
145 |
static const char *word_break_string = |
0 | 146 |
" enum WordBreak {\n" |
147 |
" WordBreakOther,\n" |
|
148 |
" WordBreakFormat,\n" |
|
149 |
" WordBreakKatakana,\n" |
|
150 |
" WordBreakALetter,\n" |
|
151 |
" WordBreakMidLetter,\n" |
|
152 |
" WordBreakMidNum,\n" |
|
153 |
" WordBreakNumeric,\n" |
|
154 |
" WordBreakExtendNumLet\n" |
|
155 |
" };\n\n"; |
|
156 |
||
157 |
enum WordBreak { |
|
158 |
WordBreakOther, |
|
159 |
WordBreakFormat, |
|
160 |
WordBreakKatakana, |
|
161 |
WordBreakALetter, |
|
162 |
WordBreakMidLetter, |
|
163 |
WordBreakMidNum, |
|
164 |
WordBreakNumeric, |
|
165 |
WordBreakExtendNumLet |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
166 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
167 |
, WordBreak_Unassigned |
0 | 168 |
}; |
169 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
170 |
static QHash<QByteArray, WordBreak> word_break_map; |
0 | 171 |
|
172 |
static void initWordBreak() |
|
173 |
{ |
|
174 |
struct WordBreakList { |
|
175 |
WordBreak brk; |
|
176 |
const char *name; |
|
177 |
} breaks[] = { |
|
178 |
{ WordBreakFormat, "Format" }, |
|
179 |
{ WordBreakFormat, "Extend" }, // these are copied in from GraphemeBreakProperty.txt |
|
180 |
{ WordBreakKatakana, "Katakana" }, |
|
181 |
{ WordBreakALetter, "ALetter" }, |
|
182 |
{ WordBreakMidLetter, "MidLetter" }, |
|
183 |
{ WordBreakMidNum, "MidNum" }, |
|
184 |
{ WordBreakNumeric, "Numeric" }, |
|
185 |
{ WordBreakExtendNumLet, "ExtendNumLet" }, |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
186 |
{ WordBreak_Unassigned, 0 } |
0 | 187 |
}; |
188 |
WordBreakList *d = breaks; |
|
189 |
while (d->name) { |
|
190 |
word_break_map.insert(d->name, d->brk); |
|
191 |
++d; |
|
192 |
} |
|
193 |
} |
|
194 |
||
195 |
||
196 |
static const char *sentence_break_string = |
|
197 |
" enum SentenceBreak {\n" |
|
198 |
" SentenceBreakOther,\n" |
|
199 |
" SentenceBreakSep,\n" |
|
200 |
" SentenceBreakFormat,\n" |
|
201 |
" SentenceBreakSp,\n" |
|
202 |
" SentenceBreakLower,\n" |
|
203 |
" SentenceBreakUpper,\n" |
|
204 |
" SentenceBreakOLetter,\n" |
|
205 |
" SentenceBreakNumeric,\n" |
|
206 |
" SentenceBreakATerm,\n" |
|
207 |
" SentenceBreakSTerm,\n" |
|
208 |
" SentenceBreakClose\n" |
|
209 |
" };\n\n"; |
|
210 |
||
211 |
enum SentenceBreak { |
|
212 |
SentenceBreakOther, |
|
213 |
SentenceBreakSep, |
|
214 |
SentenceBreakFormat, |
|
215 |
SentenceBreakSp, |
|
216 |
SentenceBreakLower, |
|
217 |
SentenceBreakUpper, |
|
218 |
SentenceBreakOLetter, |
|
219 |
SentenceBreakNumeric, |
|
220 |
SentenceBreakATerm, |
|
221 |
SentenceBreakSTerm, |
|
222 |
SentenceBreakClose |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
223 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
224 |
, SentenceBreak_Unassigned |
0 | 225 |
}; |
226 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
227 |
static QHash<QByteArray, SentenceBreak> sentence_break_map; |
0 | 228 |
|
229 |
static void initSentenceBreak() |
|
230 |
{ |
|
231 |
struct SentenceBreakList { |
|
232 |
SentenceBreak brk; |
|
233 |
const char *name; |
|
234 |
} breaks[] = { |
|
235 |
{ SentenceBreakOther, "Other" }, |
|
236 |
{ SentenceBreakSep, "Sep" }, |
|
237 |
{ SentenceBreakFormat, "Format" }, |
|
238 |
{ SentenceBreakSp, "Sp" }, |
|
239 |
{ SentenceBreakLower, "Lower" }, |
|
240 |
{ SentenceBreakUpper, "Upper" }, |
|
241 |
{ SentenceBreakOLetter, "OLetter" }, |
|
242 |
{ SentenceBreakNumeric, "Numeric" }, |
|
243 |
{ SentenceBreakATerm, "ATerm" }, |
|
244 |
{ SentenceBreakSTerm, "STerm" }, |
|
245 |
{ SentenceBreakClose, "Close" }, |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
246 |
{ SentenceBreak_Unassigned, 0 } |
0 | 247 |
}; |
248 |
SentenceBreakList *d = breaks; |
|
249 |
while (d->name) { |
|
250 |
sentence_break_map.insert(d->name, d->brk); |
|
251 |
++d; |
|
252 |
} |
|
253 |
} |
|
254 |
||
255 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
256 |
static const char *lineBreakClass = |
0 | 257 |
" // see http://www.unicode.org/reports/tr14/tr14-19.html\n" |
258 |
" // we don't use the XX, AI and CB properties and map them to AL instead.\n" |
|
259 |
" // as we don't support any EBDIC based OS'es, NL is ignored and mapped to AL as well.\n" |
|
260 |
" enum LineBreakClass {\n" |
|
261 |
" LineBreak_OP, LineBreak_CL, LineBreak_QU, LineBreak_GL, LineBreak_NS,\n" |
|
262 |
" LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR, LineBreak_PO,\n" |
|
263 |
" LineBreak_NU, LineBreak_AL, LineBreak_ID, LineBreak_IN, LineBreak_HY,\n" |
|
264 |
" LineBreak_BA, LineBreak_BB, LineBreak_B2, LineBreak_ZW, LineBreak_CM,\n" |
|
265 |
" LineBreak_WJ, LineBreak_H2, LineBreak_H3, LineBreak_JL, LineBreak_JV,\n" |
|
266 |
" LineBreak_JT, LineBreak_SA, LineBreak_SG,\n" |
|
267 |
" LineBreak_SP, LineBreak_CR, LineBreak_LF, LineBreak_BK\n" |
|
268 |
" };\n\n"; |
|
269 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
270 |
enum LineBreakClass { |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
271 |
LineBreak_OP, LineBreak_CL, LineBreak_QU, LineBreak_GL, LineBreak_NS, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
272 |
LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR, LineBreak_PO, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
273 |
LineBreak_NU, LineBreak_AL, LineBreak_ID, LineBreak_IN, LineBreak_HY, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
274 |
LineBreak_BA, LineBreak_BB, LineBreak_B2, LineBreak_ZW, LineBreak_CM, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
275 |
LineBreak_WJ, LineBreak_H2, LineBreak_H3, LineBreak_JL, LineBreak_JV, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
276 |
LineBreak_JT, LineBreak_SA, LineBreak_SG, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
277 |
LineBreak_SP, LineBreak_CR, LineBreak_LF, LineBreak_BK |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
278 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
279 |
, LineBreak_Unassigned |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
280 |
}; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
281 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
282 |
static QHash<QByteArray, LineBreakClass> line_break_map; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
283 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
284 |
static void initLineBreak() |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
285 |
{ |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
286 |
// ### Classes XX and AI are left out and mapped to AL for now; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
287 |
// ### Class NL is ignored and mapped to AL as well. |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
288 |
struct LineBreakList { |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
289 |
LineBreakClass brk; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
290 |
const char *name; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
291 |
} breaks[] = { |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
292 |
{ LineBreak_BK, "BK" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
293 |
{ LineBreak_CR, "CR" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
294 |
{ LineBreak_LF, "LF" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
295 |
{ LineBreak_CM, "CM" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
296 |
{ LineBreak_AL, "NL" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
297 |
{ LineBreak_SG, "SG" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
298 |
{ LineBreak_WJ, "WJ" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
299 |
{ LineBreak_ZW, "ZW" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
300 |
{ LineBreak_GL, "GL" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
301 |
{ LineBreak_SP, "SP" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
302 |
{ LineBreak_B2, "B2" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
303 |
{ LineBreak_BA, "BA" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
304 |
{ LineBreak_BB, "BB" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
305 |
{ LineBreak_HY, "HY" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
306 |
{ LineBreak_AL, "CB" }, // ### |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
307 |
{ LineBreak_CL, "CL" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
308 |
{ LineBreak_EX, "EX" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
309 |
{ LineBreak_IN, "IN" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
310 |
{ LineBreak_NS, "NS" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
311 |
{ LineBreak_OP, "OP" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
312 |
{ LineBreak_QU, "QU" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
313 |
{ LineBreak_IS, "IS" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
314 |
{ LineBreak_NU, "NU" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
315 |
{ LineBreak_PO, "PO" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
316 |
{ LineBreak_PR, "PR" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
317 |
{ LineBreak_SY, "SY" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
318 |
{ LineBreak_AL, "AI" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
319 |
{ LineBreak_AL, "AL" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
320 |
{ LineBreak_H2, "H2" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
321 |
{ LineBreak_H3, "H3" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
322 |
{ LineBreak_ID, "ID" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
323 |
{ LineBreak_JL, "JL" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
324 |
{ LineBreak_JV, "JV" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
325 |
{ LineBreak_JT, "JT" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
326 |
{ LineBreak_SA, "SA" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
327 |
{ LineBreak_AL, "XX" }, |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
328 |
{ LineBreak_Unassigned, 0 } |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
329 |
}; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
330 |
LineBreakList *d = breaks; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
331 |
while (d->name) { |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
332 |
line_break_map.insert(d->name, d->brk); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
333 |
++d; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
334 |
} |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
335 |
} |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
336 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
337 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
338 |
// Keep this one in sync with the code in createPropertyInfo |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
339 |
static const char *property_string = |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
340 |
" struct Properties {\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
341 |
" ushort category : 8; /* 5 needed */\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
342 |
" ushort line_break_class : 8; /* 6 needed */\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
343 |
" ushort direction : 8; /* 5 needed */\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
344 |
" ushort combiningClass : 8;\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
345 |
" ushort joining : 2;\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
346 |
" signed short digitValue : 6; /* 5 needed */\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
347 |
" ushort unicodeVersion : 4;\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
348 |
" ushort lowerCaseSpecial : 1;\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
349 |
" ushort upperCaseSpecial : 1;\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
350 |
" ushort titleCaseSpecial : 1;\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
351 |
" ushort caseFoldSpecial : 1; /* currently unused */\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
352 |
" signed short mirrorDiff : 16;\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
353 |
" signed short lowerCaseDiff : 16;\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
354 |
" signed short upperCaseDiff : 16;\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
355 |
" signed short titleCaseDiff : 16;\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
356 |
" signed short caseFoldDiff : 16;\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
357 |
" ushort graphemeBreak : 8; /* 4 needed */\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
358 |
" ushort wordBreak : 8; /* 4 needed */\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
359 |
" ushort sentenceBreak : 8; /* 4 needed */\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
360 |
" };\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
361 |
" Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
362 |
" Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);\n"; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
363 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
364 |
static const char *methods = |
0 | 365 |
" Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);\n" |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
366 |
" inline int lineBreakClass(const QChar &ch)\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
367 |
" { return lineBreakClass(ch.unicode()); }\n" |
0 | 368 |
"\n" |
369 |
" Q_CORE_EXPORT int QT_FASTCALL script(uint ucs4);\n" |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
370 |
" inline int script(const QChar &ch)\n" |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
371 |
" { return script(ch.unicode()); }\n\n"; |
0 | 372 |
|
373 |
||
374 |
struct PropertyFlags { |
|
375 |
bool operator ==(const PropertyFlags &o) { |
|
376 |
return (combiningClass == o.combiningClass |
|
377 |
&& category == o.category |
|
378 |
&& direction == o.direction |
|
379 |
&& joining == o.joining |
|
380 |
&& age == o.age |
|
381 |
&& digitValue == o.digitValue |
|
382 |
&& line_break_class == o.line_break_class |
|
383 |
&& mirrorDiff == o.mirrorDiff |
|
384 |
&& lowerCaseDiff == o.lowerCaseDiff |
|
385 |
&& upperCaseDiff == o.upperCaseDiff |
|
386 |
&& titleCaseDiff == o.titleCaseDiff |
|
387 |
&& caseFoldDiff == o.caseFoldDiff |
|
388 |
&& lowerCaseSpecial == o.lowerCaseSpecial |
|
389 |
&& upperCaseSpecial == o.upperCaseSpecial |
|
390 |
&& titleCaseSpecial == o.titleCaseSpecial |
|
391 |
&& caseFoldSpecial == o.caseFoldSpecial |
|
392 |
&& graphemeBreak == o.graphemeBreak |
|
393 |
&& wordBreak == o.wordBreak |
|
394 |
&& sentenceBreak == o.sentenceBreak |
|
395 |
); |
|
396 |
} |
|
397 |
// from UnicodeData.txt |
|
398 |
uchar combiningClass : 8; |
|
399 |
QChar::Category category : 5; |
|
400 |
QChar::Direction direction : 5; |
|
401 |
// from ArabicShaping.txt |
|
402 |
QChar::Joining joining : 2; |
|
403 |
// from DerivedAge.txt |
|
404 |
QChar::UnicodeVersion age : 4; |
|
405 |
int digitValue; |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
406 |
uint line_break_class : 6; |
0 | 407 |
|
408 |
int mirrorDiff : 16; |
|
409 |
||
410 |
int lowerCaseDiff; |
|
411 |
int upperCaseDiff; |
|
412 |
int titleCaseDiff; |
|
413 |
int caseFoldDiff; |
|
414 |
bool lowerCaseSpecial; |
|
415 |
bool upperCaseSpecial; |
|
416 |
bool titleCaseSpecial; |
|
417 |
bool caseFoldSpecial; |
|
418 |
GraphemeBreak graphemeBreak; |
|
419 |
WordBreak wordBreak; |
|
420 |
SentenceBreak sentenceBreak; |
|
421 |
}; |
|
422 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
423 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
424 |
static QList<int> specialCaseMap; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
425 |
static int specialCaseMaxLen = 0; |
0 | 426 |
|
427 |
static int appendToSpecialCaseMap(const QList<int> &map) |
|
428 |
{ |
|
429 |
QList<int> utf16map; |
|
430 |
for (int i = 0; i < map.size(); ++i) { |
|
431 |
int val = map.at(i); |
|
432 |
if (val > 0xffff) { |
|
433 |
utf16map << QChar::highSurrogate(val); |
|
434 |
utf16map << QChar::lowSurrogate(val); |
|
435 |
} else { |
|
436 |
utf16map << val; |
|
437 |
} |
|
438 |
} |
|
439 |
specialCaseMaxLen = qMax(specialCaseMaxLen, utf16map.size()); |
|
440 |
utf16map << 0; |
|
441 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
442 |
for (int i = 0; i < specialCaseMap.size() - utf16map.size() + 1; ++i) { |
0 | 443 |
int j; |
444 |
for (j = 0; j < utf16map.size(); ++j) { |
|
445 |
if (specialCaseMap.at(i+j) != utf16map.at(j)) |
|
446 |
break; |
|
447 |
} |
|
448 |
if (j == utf16map.size()) |
|
449 |
return i; |
|
450 |
} |
|
451 |
||
452 |
int pos = specialCaseMap.size(); |
|
453 |
specialCaseMap << utf16map; |
|
454 |
return pos; |
|
455 |
} |
|
456 |
||
457 |
struct UnicodeData { |
|
458 |
UnicodeData(int codepoint = 0) { |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
459 |
p.category = QChar::Other_NotAssigned; // Cn |
0 | 460 |
p.combiningClass = 0; |
461 |
||
462 |
p.direction = QChar::DirL; |
|
463 |
// DirR for: U+0590..U+05FF, U+07C0..U+08FF, U+FB1D..U+FB4F, U+10800..U+10FFF |
|
464 |
if ((codepoint >= 0x590 && codepoint <= 0x5ff) |
|
465 |
|| (codepoint >= 0x7c0 && codepoint <= 0x8ff) |
|
466 |
|| (codepoint >= 0xfb1d && codepoint <= 0xfb4f) |
|
467 |
|| (codepoint >= 0x10800 && codepoint <= 0x10fff)) |
|
468 |
p.direction = QChar::DirR; |
|
469 |
// DirAL for: U+0600..U+07BF, U+FB50..U+FDCF, U+FDF0..U+FDFF, U+FE70..U+FEFE |
|
470 |
if ((codepoint >= 0x600 && codepoint <= 0x7bf) |
|
471 |
|| (codepoint >= 0xfb50 && codepoint <= 0xfdcf) |
|
472 |
|| (codepoint >= 0xfdf0 && codepoint <= 0xfdff) |
|
473 |
|| (codepoint >= 0xfe70 && codepoint <= 0xfefe)) |
|
474 |
p.direction = QChar::DirAL; |
|
475 |
||
476 |
mirroredChar = 0; |
|
477 |
decompositionType = QChar::NoDecomposition; |
|
478 |
p.joining = QChar::OtherJoining; |
|
479 |
p.age = QChar::Unicode_Unassigned; |
|
480 |
p.mirrorDiff = 0; |
|
481 |
p.digitValue = -1; |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
482 |
p.line_break_class = LineBreak_AL; // XX -> AL |
0 | 483 |
p.lowerCaseDiff = 0; |
484 |
p.upperCaseDiff = 0; |
|
485 |
p.titleCaseDiff = 0; |
|
486 |
p.caseFoldDiff = 0; |
|
487 |
p.lowerCaseSpecial = 0; |
|
488 |
p.upperCaseSpecial = 0; |
|
489 |
p.titleCaseSpecial = 0; |
|
490 |
p.caseFoldSpecial = 0; |
|
491 |
p.graphemeBreak = GraphemeBreakOther; |
|
492 |
p.wordBreak = WordBreakOther; |
|
493 |
p.sentenceBreak = SentenceBreakOther; |
|
494 |
propertyIndex = -1; |
|
495 |
excludedComposition = false; |
|
496 |
} |
|
497 |
PropertyFlags p; |
|
498 |
||
499 |
// from UnicodeData.txt |
|
500 |
QChar::Decomposition decompositionType; |
|
501 |
QList<int> decomposition; |
|
502 |
||
503 |
QList<int> specialFolding; |
|
504 |
||
505 |
// from BidiMirroring.txt |
|
506 |
int mirroredChar; |
|
507 |
||
508 |
// CompositionExclusions.txt |
|
509 |
bool excludedComposition; |
|
510 |
||
511 |
// computed position of unicode property set |
|
512 |
int propertyIndex; |
|
513 |
}; |
|
514 |
||
515 |
enum UniDataFields { |
|
516 |
UD_Value, |
|
517 |
UD_Name, |
|
518 |
UD_Category, |
|
519 |
UD_CombiningClass, |
|
520 |
UD_BidiCategory, |
|
521 |
UD_Decomposition, |
|
522 |
UD_DecimalDigitValue, |
|
523 |
UD_DigitValue, |
|
524 |
UD_NumericValue, |
|
525 |
UD_Mirrored, |
|
526 |
UD_OldName, |
|
527 |
UD_Comment, |
|
528 |
UD_UpperCase, |
|
529 |
UD_LowerCase, |
|
530 |
UD_TitleCase |
|
531 |
}; |
|
532 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
533 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
534 |
static QHash<QByteArray, QChar::Category> categoryMap; |
0 | 535 |
|
536 |
static void initCategoryMap() |
|
537 |
{ |
|
538 |
struct Cat { |
|
539 |
QChar::Category cat; |
|
540 |
const char *name; |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
541 |
} categories[] = { |
0 | 542 |
{ QChar::Mark_NonSpacing, "Mn" }, |
543 |
{ QChar::Mark_SpacingCombining, "Mc" }, |
|
544 |
{ QChar::Mark_Enclosing, "Me" }, |
|
545 |
||
546 |
{ QChar::Number_DecimalDigit, "Nd" }, |
|
547 |
{ QChar::Number_Letter, "Nl" }, |
|
548 |
{ QChar::Number_Other, "No" }, |
|
549 |
||
550 |
{ QChar::Separator_Space, "Zs" }, |
|
551 |
{ QChar::Separator_Line, "Zl" }, |
|
552 |
{ QChar::Separator_Paragraph, "Zp" }, |
|
553 |
||
554 |
{ QChar::Other_Control, "Cc" }, |
|
555 |
{ QChar::Other_Format, "Cf" }, |
|
556 |
{ QChar::Other_Surrogate, "Cs" }, |
|
557 |
{ QChar::Other_PrivateUse, "Co" }, |
|
558 |
{ QChar::Other_NotAssigned, "Cn" }, |
|
559 |
||
560 |
{ QChar::Letter_Uppercase, "Lu" }, |
|
561 |
{ QChar::Letter_Lowercase, "Ll" }, |
|
562 |
{ QChar::Letter_Titlecase, "Lt" }, |
|
563 |
{ QChar::Letter_Modifier, "Lm" }, |
|
564 |
{ QChar::Letter_Other, "Lo" }, |
|
565 |
||
566 |
{ QChar::Punctuation_Connector, "Pc" }, |
|
567 |
{ QChar::Punctuation_Dash, "Pd" }, |
|
568 |
{ QChar::Punctuation_Open, "Ps" }, |
|
569 |
{ QChar::Punctuation_Close, "Pe" }, |
|
570 |
{ QChar::Punctuation_InitialQuote, "Pi" }, |
|
571 |
{ QChar::Punctuation_FinalQuote, "Pf" }, |
|
572 |
{ QChar::Punctuation_Other, "Po" }, |
|
573 |
||
574 |
{ QChar::Symbol_Math, "Sm" }, |
|
575 |
{ QChar::Symbol_Currency, "Sc" }, |
|
576 |
{ QChar::Symbol_Modifier, "Sk" }, |
|
577 |
{ QChar::Symbol_Other, "So" }, |
|
578 |
{ QChar::NoCategory, 0 } |
|
579 |
}; |
|
580 |
Cat *c = categories; |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
581 |
while (c->name) { |
0 | 582 |
categoryMap.insert(c->name, c->cat); |
583 |
++c; |
|
584 |
} |
|
585 |
} |
|
586 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
587 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
588 |
static QHash<QByteArray, QChar::Direction> directionMap; |
0 | 589 |
|
590 |
static void initDirectionMap() |
|
591 |
{ |
|
592 |
struct Dir { |
|
593 |
QChar::Direction dir; |
|
594 |
const char *name; |
|
595 |
} directions[] = { |
|
596 |
{ QChar::DirL, "L" }, |
|
597 |
{ QChar::DirR, "R" }, |
|
598 |
{ QChar::DirEN, "EN" }, |
|
599 |
{ QChar::DirES, "ES" }, |
|
600 |
{ QChar::DirET, "ET" }, |
|
601 |
{ QChar::DirAN, "AN" }, |
|
602 |
{ QChar::DirCS, "CS" }, |
|
603 |
{ QChar::DirB, "B" }, |
|
604 |
{ QChar::DirS, "S" }, |
|
605 |
{ QChar::DirWS, "WS" }, |
|
606 |
{ QChar::DirON, "ON" }, |
|
607 |
{ QChar::DirLRE, "LRE" }, |
|
608 |
{ QChar::DirLRO, "LRO" }, |
|
609 |
{ QChar::DirAL, "AL" }, |
|
610 |
{ QChar::DirRLE, "RLE" }, |
|
611 |
{ QChar::DirRLO, "RLO" }, |
|
612 |
{ QChar::DirPDF, "PDF" }, |
|
613 |
{ QChar::DirNSM, "NSM" }, |
|
614 |
{ QChar::DirBN, "BN" }, |
|
615 |
{ QChar::DirL, 0 } |
|
616 |
}; |
|
617 |
Dir *d = directions; |
|
618 |
while (d->name) { |
|
619 |
directionMap.insert(d->name, d->dir); |
|
620 |
++d; |
|
621 |
} |
|
622 |
} |
|
623 |
||
624 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
625 |
static QHash<QByteArray, QChar::Decomposition> decompositionMap; |
0 | 626 |
|
627 |
static void initDecompositionMap() |
|
628 |
{ |
|
629 |
struct Dec { |
|
630 |
QChar::Decomposition dec; |
|
631 |
const char *name; |
|
632 |
} decompositions[] = { |
|
633 |
{ QChar::Canonical, "<canonical>" }, |
|
634 |
{ QChar::Font, "<font>" }, |
|
635 |
{ QChar::NoBreak, "<noBreak>" }, |
|
636 |
{ QChar::Initial, "<initial>" }, |
|
637 |
{ QChar::Medial, "<medial>" }, |
|
638 |
{ QChar::Final, "<final>" }, |
|
639 |
{ QChar::Isolated, "<isolated>" }, |
|
640 |
{ QChar::Circle, "<circle>" }, |
|
641 |
{ QChar::Super, "<super>" }, |
|
642 |
{ QChar::Sub, "<sub>" }, |
|
643 |
{ QChar::Vertical, "<vertical>" }, |
|
644 |
{ QChar::Wide, "<wide>" }, |
|
645 |
{ QChar::Narrow, "<narrow>" }, |
|
646 |
{ QChar::Small, "<small>" }, |
|
647 |
{ QChar::Square, "<square>" }, |
|
648 |
{ QChar::Compat, "<compat>" }, |
|
649 |
{ QChar::Fraction, "<fraction>" }, |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
650 |
{ QChar::NoDecomposition, 0 } |
0 | 651 |
}; |
652 |
Dec *d = decompositions; |
|
653 |
while (d->name) { |
|
654 |
decompositionMap.insert(d->name, d->dec); |
|
655 |
++d; |
|
656 |
} |
|
657 |
} |
|
658 |
||
659 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
660 |
static QHash<int, UnicodeData> unicodeData; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
661 |
static QList<PropertyFlags> uniqueProperties; |
0 | 662 |
|
663 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
664 |
static QHash<int, int> decompositionLength; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
665 |
static int highestComposedCharacter = 0; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
666 |
static int numLigatures = 0; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
667 |
static int highestLigature = 0; |
0 | 668 |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
669 |
struct Ligature { |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
670 |
ushort u1; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
671 |
ushort u2; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
672 |
ushort ligature; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
673 |
}; |
0 | 674 |
// we need them sorted after the first component for fast lookup |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
675 |
bool operator < (const Ligature &l1, const Ligature &l2) |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
676 |
{ return l1.u1 < l2.u1; } |
0 | 677 |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
678 |
static QHash<ushort, QList<Ligature> > ligatureHashes; |
0 | 679 |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
680 |
static QHash<int, int> combiningClassUsage; |
0 | 681 |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
682 |
static int maxLowerCaseDiff = 0; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
683 |
static int maxUpperCaseDiff = 0; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
684 |
static int maxTitleCaseDiff = 0; |
0 | 685 |
|
686 |
static void readUnicodeData() |
|
687 |
{ |
|
688 |
QFile f("data/UnicodeData.txt"); |
|
689 |
if (!f.exists()) |
|
690 |
qFatal("Couldn't find UnicodeData.txt"); |
|
691 |
||
692 |
f.open(QFile::ReadOnly); |
|
693 |
||
694 |
while (!f.atEnd()) { |
|
695 |
QByteArray line; |
|
696 |
line.resize(1024); |
|
697 |
int len = f.readLine(line.data(), 1024); |
|
698 |
line.truncate(len-1); |
|
699 |
||
700 |
int comment = line.indexOf('#'); |
|
701 |
if (comment >= 0) |
|
702 |
line = line.left(comment); |
|
703 |
if (line.isEmpty()) |
|
704 |
continue; |
|
705 |
||
706 |
QList<QByteArray> properties = line.split(';'); |
|
707 |
bool ok; |
|
708 |
int codepoint = properties[UD_Value].toInt(&ok, 16); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
709 |
Q_ASSERT(ok); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
710 |
Q_ASSERT(codepoint <= LAST_CODEPOINT); |
0 | 711 |
int lastCodepoint = codepoint; |
712 |
||
713 |
QByteArray name = properties[UD_Name]; |
|
714 |
if (name.startsWith('<') && name.contains("First")) { |
|
715 |
QByteArray nextLine; |
|
716 |
nextLine.resize(1024); |
|
717 |
f.readLine(nextLine.data(), 1024); |
|
718 |
QList<QByteArray> properties = nextLine.split(';'); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
719 |
Q_ASSERT(properties[UD_Name].startsWith('<') && properties[UD_Name].contains("Last")); |
0 | 720 |
lastCodepoint = properties[UD_Value].toInt(&ok, 16); |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
721 |
Q_ASSERT(ok); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
722 |
Q_ASSERT(lastCodepoint <= LAST_CODEPOINT); |
0 | 723 |
} |
724 |
||
725 |
UnicodeData data(codepoint); |
|
726 |
data.p.category = categoryMap.value(properties[UD_Category], QChar::NoCategory); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
727 |
if (data.p.category == QChar::NoCategory) |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
728 |
qFatal("unassigned char category: %s", properties[UD_Category].constData()); |
0 | 729 |
data.p.combiningClass = properties[UD_CombiningClass].toInt(); |
730 |
||
731 |
if (!combiningClassUsage.contains(data.p.combiningClass)) |
|
732 |
combiningClassUsage[data.p.combiningClass] = 1; |
|
733 |
else |
|
734 |
++combiningClassUsage[data.p.combiningClass]; |
|
735 |
||
736 |
data.p.direction = directionMap.value(properties[UD_BidiCategory], data.p.direction); |
|
737 |
||
738 |
if (!properties[UD_UpperCase].isEmpty()) { |
|
739 |
int upperCase = properties[UD_UpperCase].toInt(&ok, 16); |
|
740 |
Q_ASSERT(ok); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
741 |
if (qAbs(upperCase - codepoint) >= (1<<14)) |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
742 |
qWarning() << "upperCaseDiff exceeded (" << hex << codepoint << "->" << upperCase << ")"; |
0 | 743 |
data.p.upperCaseDiff = upperCase - codepoint; |
744 |
maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(data.p.upperCaseDiff)); |
|
745 |
if (codepoint > 0xffff) { |
|
746 |
// if the condition below doesn't hold anymore we need to modify our case folding code |
|
747 |
//qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0)); |
|
748 |
Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(upperCase)); |
|
749 |
} |
|
750 |
} |
|
751 |
if (!properties[UD_LowerCase].isEmpty()) { |
|
752 |
int lowerCase = properties[UD_LowerCase].toInt(&ok, 16); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
753 |
Q_ASSERT(ok); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
754 |
if (qAbs(lowerCase - codepoint) >= (1<<14)) |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
755 |
qWarning() << "lowerCaseDiff exceeded (" << hex << codepoint << "->" << lowerCase << ")"; |
0 | 756 |
data.p.lowerCaseDiff = lowerCase - codepoint; |
757 |
maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(data.p.lowerCaseDiff)); |
|
758 |
if (codepoint > 0xffff) { |
|
759 |
// if the condition below doesn't hold anymore we need to modify our case folding code |
|
760 |
//qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0)); |
|
761 |
Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(lowerCase)); |
|
762 |
} |
|
763 |
} |
|
764 |
// we want toTitleCase to map to ToUpper in case we don't have any titlecase. |
|
765 |
if (properties[UD_TitleCase].isEmpty()) |
|
766 |
properties[UD_TitleCase] = properties[UD_UpperCase]; |
|
767 |
if (!properties[UD_TitleCase].isEmpty()) { |
|
768 |
int titleCase = properties[UD_TitleCase].toInt(&ok, 16); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
769 |
Q_ASSERT(ok); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
770 |
if (qAbs(titleCase - codepoint) >= (1<<14)) |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
771 |
qWarning() << "titleCaseDiff exceeded (" << hex << codepoint << "->" << titleCase << ")"; |
0 | 772 |
data.p.titleCaseDiff = titleCase - codepoint; |
773 |
maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(data.p.titleCaseDiff)); |
|
774 |
if (codepoint > 0xffff) { |
|
775 |
// if the condition below doesn't hold anymore we need to modify our case folding code |
|
776 |
//qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0)); |
|
777 |
Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(titleCase)); |
|
778 |
} |
|
779 |
} |
|
780 |
||
781 |
if (!properties[UD_DigitValue].isEmpty()) |
|
782 |
data.p.digitValue = properties[UD_DigitValue].toInt(); |
|
783 |
||
784 |
// decompositition |
|
785 |
QByteArray decomposition = properties[UD_Decomposition]; |
|
786 |
if (!decomposition.isEmpty()) { |
|
787 |
highestComposedCharacter = qMax(highestComposedCharacter, codepoint); |
|
788 |
QList<QByteArray> d = decomposition.split(' '); |
|
789 |
if (d[0].contains('<')) { |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
790 |
data.decompositionType = decompositionMap.value(d[0], QChar::NoDecomposition); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
791 |
if (data.decompositionType == QChar::NoDecomposition) |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
792 |
qFatal("unassigned decomposition type: %s", d[0].constData()); |
0 | 793 |
d.takeFirst(); |
794 |
} else { |
|
795 |
data.decompositionType = QChar::Canonical; |
|
796 |
} |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
797 |
for (int i = 0; i < d.size(); ++i) { |
0 | 798 |
data.decomposition.append(d[i].toInt(&ok, 16)); |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
799 |
Q_ASSERT(ok); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
800 |
} |
0 | 801 |
if (!decompositionLength.contains(data.decomposition.size())) |
802 |
decompositionLength[data.decomposition.size()] = 1; |
|
803 |
else |
|
804 |
++decompositionLength[data.decomposition.size()]; |
|
805 |
} |
|
806 |
||
807 |
for (int i = codepoint; i <= lastCodepoint; ++i) |
|
808 |
unicodeData.insert(i, data); |
|
809 |
} |
|
810 |
||
811 |
} |
|
812 |
||
813 |
static int maxMirroredDiff = 0; |
|
814 |
||
815 |
static void readBidiMirroring() |
|
816 |
{ |
|
817 |
QFile f("data/BidiMirroring.txt"); |
|
818 |
if (!f.exists()) |
|
819 |
qFatal("Couldn't find BidiMirroring.txt"); |
|
820 |
||
821 |
f.open(QFile::ReadOnly); |
|
822 |
||
823 |
while (!f.atEnd()) { |
|
824 |
QByteArray line; |
|
825 |
line.resize(1024); |
|
826 |
int len = f.readLine(line.data(), 1024); |
|
827 |
line.resize(len-1); |
|
828 |
||
829 |
int comment = line.indexOf('#'); |
|
830 |
if (comment >= 0) |
|
831 |
line = line.left(comment); |
|
832 |
||
833 |
if (line.isEmpty()) |
|
834 |
continue; |
|
835 |
line = line.replace(" ", ""); |
|
836 |
||
837 |
QList<QByteArray> pair = line.split(';'); |
|
838 |
Q_ASSERT(pair.size() == 2); |
|
839 |
||
840 |
bool ok; |
|
841 |
int codepoint = pair[0].toInt(&ok, 16); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
842 |
Q_ASSERT(ok); |
0 | 843 |
int mirror = pair[1].toInt(&ok, 16); |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
844 |
Q_ASSERT(ok); |
0 | 845 |
|
846 |
UnicodeData d = unicodeData.value(codepoint, UnicodeData(codepoint)); |
|
847 |
d.mirroredChar = mirror; |
|
848 |
d.p.mirrorDiff = d.mirroredChar - codepoint; |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
849 |
maxMirroredDiff = qMax(maxMirroredDiff, qAbs(d.p.mirrorDiff)); |
0 | 850 |
unicodeData.insert(codepoint, d); |
851 |
} |
|
852 |
} |
|
853 |
||
854 |
static void readArabicShaping() |
|
855 |
{ |
|
856 |
QFile f("data/ArabicShaping.txt"); |
|
857 |
if (!f.exists()) |
|
858 |
qFatal("Couldn't find ArabicShaping.txt"); |
|
859 |
||
860 |
f.open(QFile::ReadOnly); |
|
861 |
||
862 |
while (!f.atEnd()) { |
|
863 |
QByteArray line; |
|
864 |
line.resize(1024); |
|
865 |
int len = f.readLine(line.data(), 1024); |
|
866 |
line.resize(len-1); |
|
867 |
||
868 |
int comment = line.indexOf('#'); |
|
869 |
if (comment >= 0) |
|
870 |
line = line.left(comment); |
|
871 |
line = line.trimmed(); |
|
872 |
||
873 |
if (line.isEmpty()) |
|
874 |
continue; |
|
875 |
||
876 |
QList<QByteArray> shaping = line.split(';'); |
|
877 |
Q_ASSERT(shaping.size() == 4); |
|
878 |
||
879 |
bool ok; |
|
880 |
int codepoint = shaping[0].toInt(&ok, 16); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
881 |
Q_ASSERT(ok); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
882 |
|
0 | 883 |
QChar::Joining j = QChar::OtherJoining; |
884 |
QByteArray shape = shaping[2].trimmed(); |
|
885 |
if (shape == "R") |
|
886 |
j = QChar::Right; |
|
887 |
else if (shape == "D") |
|
888 |
j = QChar::Dual; |
|
889 |
else if (shape == "C") |
|
890 |
j = QChar::Center; |
|
891 |
||
892 |
UnicodeData d = unicodeData.value(codepoint, UnicodeData(codepoint)); |
|
893 |
d.p.joining = j; |
|
894 |
unicodeData.insert(codepoint, d); |
|
895 |
} |
|
896 |
} |
|
897 |
||
898 |
static void readDerivedAge() |
|
899 |
{ |
|
900 |
QFile f("data/DerivedAge.txt"); |
|
901 |
if (!f.exists()) |
|
902 |
qFatal("Couldn't find DerivedAge.txt"); |
|
903 |
||
904 |
f.open(QFile::ReadOnly); |
|
905 |
||
906 |
while (!f.atEnd()) { |
|
907 |
QByteArray line; |
|
908 |
line.resize(1024); |
|
909 |
int len = f.readLine(line.data(), 1024); |
|
910 |
line.resize(len-1); |
|
911 |
||
912 |
int comment = line.indexOf('#'); |
|
913 |
if (comment >= 0) |
|
914 |
line = line.left(comment); |
|
915 |
line.replace(" ", ""); |
|
916 |
||
917 |
if (line.isEmpty()) |
|
918 |
continue; |
|
919 |
||
920 |
QList<QByteArray> l = line.split(';'); |
|
921 |
Q_ASSERT(l.size() == 2); |
|
922 |
||
923 |
QByteArray codes = l[0]; |
|
924 |
codes.replace("..", "."); |
|
925 |
QList<QByteArray> cl = codes.split('.'); |
|
926 |
||
927 |
bool ok; |
|
928 |
int from = cl[0].toInt(&ok, 16); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
929 |
Q_ASSERT(ok); |
0 | 930 |
int to = from; |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
931 |
if (cl.size() == 2) { |
0 | 932 |
to = cl[1].toInt(&ok, 16); |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
933 |
Q_ASSERT(ok); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
934 |
} |
0 | 935 |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
936 |
QChar::UnicodeVersion age = age_map.value(l[1].trimmed(), QChar::Unicode_Unassigned); |
0 | 937 |
//qDebug() << hex << from << ".." << to << ba << age; |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
938 |
if (age == QChar::Unicode_Unassigned) |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
939 |
qFatal("unassigned or unhandled age value: %s", l[1].constData()); |
0 | 940 |
|
941 |
for (int codepoint = from; codepoint <= to; ++codepoint) { |
|
942 |
UnicodeData d = unicodeData.value(codepoint, UnicodeData(codepoint)); |
|
943 |
d.p.age = age; |
|
944 |
unicodeData.insert(codepoint, d); |
|
945 |
} |
|
946 |
} |
|
947 |
} |
|
948 |
||
949 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
950 |
static void readDerivedNormalizationProps() |
0 | 951 |
{ |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
952 |
QFile f("data/DerivedNormalizationProps.txt"); |
0 | 953 |
if (!f.exists()) |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
954 |
qFatal("Couldn't find DerivedNormalizationProps.txt"); |
0 | 955 |
|
956 |
f.open(QFile::ReadOnly); |
|
957 |
||
958 |
while (!f.atEnd()) { |
|
959 |
QByteArray line; |
|
960 |
line.resize(1024); |
|
961 |
int len = f.readLine(line.data(), 1024); |
|
962 |
line.resize(len-1); |
|
963 |
||
964 |
int comment = line.indexOf('#'); |
|
965 |
if (comment >= 0) |
|
966 |
line = line.left(comment); |
|
967 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
968 |
if (line.trimmed().isEmpty()) |
0 | 969 |
continue; |
970 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
971 |
QList<QByteArray> l = line.split(';'); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
972 |
Q_ASSERT(l.size() >= 2); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
973 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
974 |
QByteArray propName = l[1].trimmed(); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
975 |
if (propName != "Full_Composition_Exclusion") |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
976 |
// ### |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
977 |
continue; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
978 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
979 |
QByteArray codes = l[0].trimmed(); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
980 |
codes.replace("..", "."); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
981 |
QList<QByteArray> cl = codes.split('.'); |
0 | 982 |
|
983 |
bool ok; |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
984 |
int from = cl[0].toInt(&ok, 16); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
985 |
Q_ASSERT(ok); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
986 |
int to = from; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
987 |
if (cl.size() == 2) { |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
988 |
to = cl[1].toInt(&ok, 16); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
989 |
Q_ASSERT(ok); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
990 |
} |
0 | 991 |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
992 |
for (int codepoint = from; codepoint <= to; ++codepoint) { |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
993 |
UnicodeData d = unicodeData.value(codepoint, UnicodeData(codepoint)); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
994 |
d.excludedComposition = true; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
995 |
unicodeData.insert(codepoint, d); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
996 |
} |
0 | 997 |
} |
998 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
999 |
for (int codepoint = 0; codepoint <= LAST_CODEPOINT; ++codepoint) { |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1000 |
UnicodeData d = unicodeData.value(codepoint, UnicodeData(codepoint)); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1001 |
if (!d.excludedComposition |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1002 |
&& d.decompositionType == QChar::Canonical |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1003 |
&& d.decomposition.size() > 1) { |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1004 |
Q_ASSERT(d.decomposition.size() == 2); |
0 | 1005 |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1006 |
uint part1 = d.decomposition.at(0); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1007 |
uint part2 = d.decomposition.at(1); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1008 |
|
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1009 |
// all non-starters are listed in DerivedNormalizationProps.txt |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1010 |
// and already excluded from composition |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1011 |
Q_ASSERT(unicodeData.value(part1, UnicodeData(part1)).p.combiningClass == 0); |
0 | 1012 |
|
1013 |
++numLigatures; |
|
1014 |
highestLigature = qMax(highestLigature, (int)part1); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1015 |
Ligature l = {(ushort)part1, (ushort)part2, codepoint}; |
0 | 1016 |
ligatureHashes[part2].append(l); |
1017 |
} |
|
1018 |
} |
|
1019 |
} |
|
1020 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1021 |
|
0 | 1022 |
struct NormalizationCorrection { |
1023 |
uint codepoint; |
|
1024 |
uint mapped; |
|
1025 |
uint version; |
|
1026 |
}; |
|
1027 |
||
1028 |
static QByteArray createNormalizationCorrections() |
|
1029 |
{ |
|
1030 |
QFile f("data/NormalizationCorrections.txt"); |
|
1031 |
if (!f.exists()) |
|
1032 |
qFatal("Couldn't find NormalizationCorrections.txt"); |
|
1033 |
||
1034 |
f.open(QFile::ReadOnly); |
|
1035 |
||
1036 |
QByteArray out; |
|
1037 |
||
1038 |
out += "struct NormalizationCorrection {\n" |
|
1039 |
" uint ucs4;\n" |
|
1040 |
" uint old_mapping;\n" |
|
1041 |
" int version;\n" |
|
1042 |
"};\n\n" |
|
1043 |
||
1044 |
"static const NormalizationCorrection uc_normalization_corrections[] = {\n"; |
|
1045 |
||
1046 |
int numCorrections = 0; |
|
1047 |
while (!f.atEnd()) { |
|
1048 |
QByteArray line; |
|
1049 |
line.resize(1024); |
|
1050 |
int len = f.readLine(line.data(), 1024); |
|
1051 |
line.resize(len-1); |
|
1052 |
||
1053 |
int comment = line.indexOf('#'); |
|
1054 |
if (comment >= 0) |
|
1055 |
line = line.left(comment); |
|
1056 |
line.replace(" ", ""); |
|
1057 |
||
1058 |
if (line.isEmpty()) |
|
1059 |
continue; |
|
1060 |
||
1061 |
Q_ASSERT(!line.contains("..")); |
|
1062 |
||
1063 |
QList<QByteArray> fields = line.split(';'); |
|
1064 |
Q_ASSERT(fields.size() == 4); |
|
1065 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1066 |
NormalizationCorrection c = { 0, 0, 0 }; |
0 | 1067 |
bool ok; |
1068 |
c.codepoint = fields.at(0).toInt(&ok, 16); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1069 |
Q_ASSERT(ok); |
0 | 1070 |
c.mapped = fields.at(1).toInt(&ok, 16); |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1071 |
Q_ASSERT(ok); |
0 | 1072 |
if (fields.at(3) == "3.2.0") |
1073 |
c.version = QChar::Unicode_3_2; |
|
1074 |
else if (fields.at(3) == "4.0.0") |
|
1075 |
c.version = QChar::Unicode_4_0; |
|
1076 |
else |
|
1077 |
qFatal("unknown unicode version in NormalizationCorrection.txt"); |
|
1078 |
||
1079 |
out += " { 0x" + QByteArray::number(c.codepoint, 16) + ", 0x" + QByteArray::number(c.mapped, 16) |
|
1080 |
+ ", " + QString::number(c.version) + " },\n"; |
|
1081 |
++numCorrections; |
|
1082 |
} |
|
1083 |
||
1084 |
out += "};\n\n" |
|
1085 |
||
1086 |
"enum { NumNormalizationCorrections = " + QByteArray::number(numCorrections) + " };\n\n"; |
|
1087 |
||
1088 |
return out; |
|
1089 |
} |
|
1090 |
||
1091 |
||
1092 |
static void computeUniqueProperties() |
|
1093 |
{ |
|
1094 |
qDebug("computeUniqueProperties:"); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1095 |
for (int uc = 0; uc <= LAST_CODEPOINT; ++uc) { |
0 | 1096 |
UnicodeData d = unicodeData.value(uc, UnicodeData(uc)); |
1097 |
||
1098 |
int index = uniqueProperties.indexOf(d.p); |
|
1099 |
if (index == -1) { |
|
1100 |
index = uniqueProperties.size(); |
|
1101 |
uniqueProperties.append(d.p); |
|
1102 |
} |
|
1103 |
d.propertyIndex = index; |
|
1104 |
unicodeData.insert(uc, d); |
|
1105 |
} |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1106 |
qDebug(" %d unique unicode properties found", uniqueProperties.size()); |
0 | 1107 |
} |
1108 |
||
1109 |
||
1110 |
static void readLineBreak() |
|
1111 |
{ |
|
1112 |
QFile f("data/LineBreak.txt"); |
|
1113 |
if (!f.exists()) |
|
1114 |
qFatal("Couldn't find LineBreak.txt"); |
|
1115 |
||
1116 |
f.open(QFile::ReadOnly); |
|
1117 |
||
1118 |
while (!f.atEnd()) { |
|
1119 |
QByteArray line; |
|
1120 |
line.resize(1024); |
|
1121 |
int len = f.readLine(line.data(), 1024); |
|
1122 |
line.resize(len-1); |
|
1123 |
||
1124 |
int comment = line.indexOf('#'); |
|
1125 |
if (comment >= 0) |
|
1126 |
line = line.left(comment); |
|
1127 |
line.replace(" ", ""); |
|
1128 |
||
1129 |
if (line.isEmpty()) |
|
1130 |
continue; |
|
1131 |
||
1132 |
QList<QByteArray> l = line.split(';'); |
|
1133 |
Q_ASSERT(l.size() == 2); |
|
1134 |
||
1135 |
QByteArray codes = l[0]; |
|
1136 |
codes.replace("..", "."); |
|
1137 |
QList<QByteArray> cl = codes.split('.'); |
|
1138 |
||
1139 |
bool ok; |
|
1140 |
int from = cl[0].toInt(&ok, 16); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1141 |
Q_ASSERT(ok); |
0 | 1142 |
int to = from; |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1143 |
if (cl.size() == 2) { |
0 | 1144 |
to = cl[1].toInt(&ok, 16); |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1145 |
Q_ASSERT(ok); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1146 |
} |
0 | 1147 |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1148 |
LineBreakClass lb = line_break_map.value(l[1].trimmed(), LineBreak_Unassigned); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1149 |
if (lb == LineBreak_Unassigned) |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1150 |
qFatal("unassigned line break class: %s", l[1].constData()); |
0 | 1151 |
|
1152 |
for (int codepoint = from; codepoint <= to; ++codepoint) { |
|
1153 |
UnicodeData d = unicodeData.value(codepoint, UnicodeData(codepoint)); |
|
1154 |
d.p.line_break_class = lb; |
|
1155 |
unicodeData.insert(codepoint, d); |
|
1156 |
} |
|
1157 |
} |
|
1158 |
} |
|
1159 |
||
1160 |
||
1161 |
static void readSpecialCasing() |
|
1162 |
{ |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1163 |
qDebug() << "Reading SpecialCasing.txt"; |
0 | 1164 |
QFile f("data/SpecialCasing.txt"); |
1165 |
if (!f.exists()) |
|
1166 |
qFatal("Couldn't find SpecialCasing.txt"); |
|
1167 |
||
1168 |
f.open(QFile::ReadOnly); |
|
1169 |
||
1170 |
while (!f.atEnd()) { |
|
1171 |
QByteArray line; |
|
1172 |
line.resize(1024); |
|
1173 |
int len = f.readLine(line.data(), 1024); |
|
1174 |
line.resize(len-1); |
|
1175 |
||
1176 |
int comment = line.indexOf('#'); |
|
1177 |
if (comment >= 0) |
|
1178 |
line = line.left(comment); |
|
1179 |
||
1180 |
if (line.isEmpty()) |
|
1181 |
continue; |
|
1182 |
||
1183 |
QList<QByteArray> l = line.split(';'); |
|
1184 |
||
1185 |
QByteArray condition = l.size() < 5 ? QByteArray() : l[4].trimmed(); |
|
1186 |
if (!condition.isEmpty()) |
|
1187 |
// ##### |
|
1188 |
continue; |
|
1189 |
||
1190 |
bool ok; |
|
1191 |
int codepoint = l[0].trimmed().toInt(&ok, 16); |
|
1192 |
Q_ASSERT(ok); |
|
1193 |
Q_ASSERT(codepoint <= 0xffff); |
|
1194 |
||
1195 |
// qDebug() << "codepoint" << hex << codepoint; |
|
1196 |
// qDebug() << line; |
|
1197 |
||
1198 |
QList<QByteArray> lower = l[1].trimmed().split(' '); |
|
1199 |
QList<int> lowerMap; |
|
1200 |
for (int i = 0; i < lower.size(); ++i) { |
|
1201 |
bool ok; |
|
1202 |
lowerMap.append(lower.at(i).toInt(&ok, 16)); |
|
1203 |
Q_ASSERT(ok); |
|
1204 |
} |
|
1205 |
||
1206 |
QList<QByteArray> title = l[2].trimmed().split(' '); |
|
1207 |
QList<int> titleMap; |
|
1208 |
for (int i = 0; i < title.size(); ++i) { |
|
1209 |
bool ok; |
|
1210 |
titleMap.append(title.at(i).toInt(&ok, 16)); |
|
1211 |
Q_ASSERT(ok); |
|
1212 |
} |
|
1213 |
||
1214 |
QList<QByteArray> upper = l[3].trimmed().split(' '); |
|
1215 |
QList<int> upperMap; |
|
1216 |
for (int i = 0; i < upper.size(); ++i) { |
|
1217 |
bool ok; |
|
1218 |
upperMap.append(upper.at(i).toInt(&ok, 16)); |
|
1219 |
Q_ASSERT(ok); |
|
1220 |
} |
|
1221 |
||
1222 |
||
1223 |
UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint)); |
|
1224 |
||
1225 |
Q_ASSERT(lowerMap.size() > 1 || lowerMap.at(0) == codepoint + ud.p.lowerCaseDiff); |
|
1226 |
Q_ASSERT(titleMap.size() > 1 || titleMap.at(0) == codepoint + ud.p.titleCaseDiff); |
|
1227 |
Q_ASSERT(upperMap.size() > 1 || upperMap.at(0) == codepoint + ud.p.upperCaseDiff); |
|
1228 |
||
1229 |
if (lowerMap.size() > 1) { |
|
1230 |
ud.p.lowerCaseSpecial = true; |
|
1231 |
ud.p.lowerCaseDiff = appendToSpecialCaseMap(lowerMap); |
|
1232 |
} |
|
1233 |
if (titleMap.size() > 1) { |
|
1234 |
ud.p.titleCaseSpecial = true; |
|
1235 |
ud.p.titleCaseDiff = appendToSpecialCaseMap(titleMap); |
|
1236 |
} |
|
1237 |
if (upperMap.size() > 1) { |
|
1238 |
ud.p.upperCaseSpecial = true; |
|
1239 |
ud.p.upperCaseDiff = appendToSpecialCaseMap(upperMap);; |
|
1240 |
} |
|
1241 |
||
1242 |
unicodeData.insert(codepoint, ud); |
|
1243 |
} |
|
1244 |
} |
|
1245 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1246 |
static int maxCaseFoldDiff = 0; |
0 | 1247 |
|
1248 |
static void readCaseFolding() |
|
1249 |
{ |
|
1250 |
qDebug() << "Reading CaseFolding.txt"; |
|
1251 |
QFile f("data/CaseFolding.txt"); |
|
1252 |
if (!f.exists()) |
|
1253 |
qFatal("Couldn't find CaseFolding.txt"); |
|
1254 |
||
1255 |
f.open(QFile::ReadOnly); |
|
1256 |
||
1257 |
while (!f.atEnd()) { |
|
1258 |
QByteArray line; |
|
1259 |
line.resize(1024); |
|
1260 |
int len = f.readLine(line.data(), 1024); |
|
1261 |
line.resize(len-1); |
|
1262 |
||
1263 |
int comment = line.indexOf('#'); |
|
1264 |
if (comment >= 0) |
|
1265 |
line = line.left(comment); |
|
1266 |
||
1267 |
if (line.isEmpty()) |
|
1268 |
continue; |
|
1269 |
||
1270 |
QList<QByteArray> l = line.split(';'); |
|
1271 |
||
1272 |
bool ok; |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1273 |
int codepoint = l[0].trimmed().toInt(&ok, 16); |
0 | 1274 |
Q_ASSERT(ok); |
1275 |
||
1276 |
||
1277 |
l[1] = l[1].trimmed(); |
|
1278 |
if (l[1] == "F" || l[1] == "T") |
|
1279 |
continue; |
|
1280 |
||
1281 |
// qDebug() << "codepoint" << hex << codepoint; |
|
1282 |
// qDebug() << line; |
|
1283 |
QList<QByteArray> fold = l[2].trimmed().split(' '); |
|
1284 |
QList<int> foldMap; |
|
1285 |
for (int i = 0; i < fold.size(); ++i) { |
|
1286 |
bool ok; |
|
1287 |
foldMap.append(fold.at(i).toInt(&ok, 16)); |
|
1288 |
Q_ASSERT(ok); |
|
1289 |
} |
|
1290 |
||
1291 |
UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint)); |
|
1292 |
if (foldMap.size() == 1) { |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1293 |
if (qAbs(foldMap.at(0) - codepoint) >= (1<<14)) |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1294 |
qWarning() << "caseFoldDiff exceeded (" << hex << codepoint << "->" << foldMap.at(0) << ")"; |
0 | 1295 |
ud.p.caseFoldDiff = foldMap.at(0) - codepoint; |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1296 |
maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(ud.p.caseFoldDiff)); |
0 | 1297 |
if (codepoint > 0xffff) { |
1298 |
// if the condition below doesn't hold anymore we need to modify our case folding code |
|
1299 |
//qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0)); |
|
1300 |
Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(foldMap.at(0))); |
|
1301 |
} |
|
1302 |
if (foldMap.at(0) != codepoint + ud.p.lowerCaseDiff) |
|
1303 |
qDebug() << hex << codepoint; |
|
1304 |
} else { |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1305 |
qFatal("we currently don't support full case foldings"); |
0 | 1306 |
// qDebug() << "special" << hex << foldMap; |
1307 |
ud.p.caseFoldSpecial = true; |
|
1308 |
ud.p.caseFoldDiff = appendToSpecialCaseMap(foldMap); |
|
1309 |
} |
|
1310 |
unicodeData.insert(codepoint, ud); |
|
1311 |
} |
|
1312 |
} |
|
1313 |
||
1314 |
static void readGraphemeBreak() |
|
1315 |
{ |
|
1316 |
qDebug() << "Reading GraphemeBreakProperty.txt"; |
|
1317 |
QFile f("data/GraphemeBreakProperty.txt"); |
|
1318 |
if (!f.exists()) |
|
1319 |
qFatal("Couldn't find GraphemeBreakProperty.txt"); |
|
1320 |
||
1321 |
f.open(QFile::ReadOnly); |
|
1322 |
||
1323 |
while (!f.atEnd()) { |
|
1324 |
QByteArray line; |
|
1325 |
line.resize(1024); |
|
1326 |
int len = f.readLine(line.data(), 1024); |
|
1327 |
line.resize(len-1); |
|
1328 |
||
1329 |
int comment = line.indexOf('#'); |
|
1330 |
if (comment >= 0) |
|
1331 |
line = line.left(comment); |
|
1332 |
||
1333 |
if (line.isEmpty()) |
|
1334 |
continue; |
|
1335 |
||
1336 |
QList<QByteArray> l = line.split(';'); |
|
1337 |
||
1338 |
QByteArray codes = l[0].trimmed(); |
|
1339 |
codes.replace("..", "."); |
|
1340 |
QList<QByteArray> cl = codes.split('.'); |
|
1341 |
||
1342 |
bool ok; |
|
1343 |
int from = cl[0].toInt(&ok, 16); |
|
1344 |
Q_ASSERT(ok); |
|
1345 |
int to = from; |
|
1346 |
if (cl.size() == 2) { |
|
1347 |
to = cl[1].toInt(&ok, 16); |
|
1348 |
Q_ASSERT(ok); |
|
1349 |
} |
|
1350 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1351 |
GraphemeBreak brk = grapheme_break_map.value(l[1].trimmed(), GraphemeBreak_Unassigned); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1352 |
if (brk == GraphemeBreak_Unassigned) |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1353 |
qFatal("unassigned grapheme break class: %s", l[1].constData()); |
0 | 1354 |
|
1355 |
for (int codepoint = from; codepoint <= to; ++codepoint) { |
|
1356 |
UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint)); |
|
1357 |
ud.p.graphemeBreak = brk; |
|
1358 |
unicodeData.insert(codepoint, ud); |
|
1359 |
} |
|
1360 |
} |
|
1361 |
} |
|
1362 |
||
1363 |
static void readWordBreak() |
|
1364 |
{ |
|
1365 |
qDebug() << "Reading WordBreakProperty.txt"; |
|
1366 |
QFile f("data/WordBreakProperty.txt"); |
|
1367 |
if (!f.exists()) |
|
1368 |
qFatal("Couldn't find WordBreakProperty.txt"); |
|
1369 |
||
1370 |
f.open(QFile::ReadOnly); |
|
1371 |
||
1372 |
while (!f.atEnd()) { |
|
1373 |
QByteArray line; |
|
1374 |
line.resize(1024); |
|
1375 |
int len = f.readLine(line.data(), 1024); |
|
1376 |
line.resize(len-1); |
|
1377 |
||
1378 |
int comment = line.indexOf('#'); |
|
1379 |
if (comment >= 0) |
|
1380 |
line = line.left(comment); |
|
1381 |
||
1382 |
if (line.isEmpty()) |
|
1383 |
continue; |
|
1384 |
||
1385 |
QList<QByteArray> l = line.split(';'); |
|
1386 |
||
1387 |
QByteArray codes = l[0].trimmed(); |
|
1388 |
codes.replace("..", "."); |
|
1389 |
QList<QByteArray> cl = codes.split('.'); |
|
1390 |
||
1391 |
bool ok; |
|
1392 |
int from = cl[0].toInt(&ok, 16); |
|
1393 |
Q_ASSERT(ok); |
|
1394 |
int to = from; |
|
1395 |
if (cl.size() == 2) { |
|
1396 |
to = cl[1].toInt(&ok, 16); |
|
1397 |
Q_ASSERT(ok); |
|
1398 |
} |
|
1399 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1400 |
WordBreak brk = word_break_map.value(l[1].trimmed(), WordBreak_Unassigned); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1401 |
if (brk == WordBreak_Unassigned) |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1402 |
qFatal("unassigned word break class: %s", l[1].constData()); |
0 | 1403 |
|
1404 |
for (int codepoint = from; codepoint <= to; ++codepoint) { |
|
1405 |
UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint)); |
|
1406 |
ud.p.wordBreak = brk; |
|
1407 |
unicodeData.insert(codepoint, ud); |
|
1408 |
} |
|
1409 |
} |
|
1410 |
} |
|
1411 |
||
1412 |
static void readSentenceBreak() |
|
1413 |
{ |
|
1414 |
qDebug() << "Reading SentenceBreakProperty.txt"; |
|
1415 |
QFile f("data/SentenceBreakProperty.txt"); |
|
1416 |
if (!f.exists()) |
|
1417 |
qFatal("Couldn't find SentenceBreakProperty.txt"); |
|
1418 |
||
1419 |
f.open(QFile::ReadOnly); |
|
1420 |
||
1421 |
while (!f.atEnd()) { |
|
1422 |
QByteArray line; |
|
1423 |
line.resize(1024); |
|
1424 |
int len = f.readLine(line.data(), 1024); |
|
1425 |
line.resize(len-1); |
|
1426 |
||
1427 |
int comment = line.indexOf('#'); |
|
1428 |
if (comment >= 0) |
|
1429 |
line = line.left(comment); |
|
1430 |
||
1431 |
if (line.isEmpty()) |
|
1432 |
continue; |
|
1433 |
||
1434 |
QList<QByteArray> l = line.split(';'); |
|
1435 |
||
1436 |
QByteArray codes = l[0].trimmed(); |
|
1437 |
codes.replace("..", "."); |
|
1438 |
QList<QByteArray> cl = codes.split('.'); |
|
1439 |
||
1440 |
bool ok; |
|
1441 |
int from = cl[0].toInt(&ok, 16); |
|
1442 |
Q_ASSERT(ok); |
|
1443 |
int to = from; |
|
1444 |
if (cl.size() == 2) { |
|
1445 |
to = cl[1].toInt(&ok, 16); |
|
1446 |
Q_ASSERT(ok); |
|
1447 |
} |
|
1448 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1449 |
SentenceBreak brk = sentence_break_map.value(l[1].trimmed(), SentenceBreak_Unassigned); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1450 |
if (brk == SentenceBreak_Unassigned) |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1451 |
qFatal("unassigned sentence break class: %s", l[1].constData()); |
0 | 1452 |
|
1453 |
for (int codepoint = from; codepoint <= to; ++codepoint) { |
|
1454 |
UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint)); |
|
1455 |
ud.p.sentenceBreak = brk; |
|
1456 |
unicodeData.insert(codepoint, ud); |
|
1457 |
} |
|
1458 |
} |
|
1459 |
} |
|
1460 |
||
1461 |
#if 0 |
|
1462 |
// this piece of code does full case folding and comparison. We currently |
|
1463 |
// don't use it, since this gives lots of issues with things as case insensitive |
|
1464 |
// search and replace. |
|
1465 |
static inline void foldCase(uint ch, ushort *out) |
|
1466 |
{ |
|
1467 |
const QUnicodeTables::Properties *p = qGetProp(ch); |
|
1468 |
if (!p->caseFoldSpecial) { |
|
1469 |
*(out++) = ch + p->caseFoldDiff; |
|
1470 |
} else { |
|
1471 |
const ushort *folded = specialCaseMap + p->caseFoldDiff; |
|
1472 |
while (*folded) |
|
1473 |
*out++ = *folded++; |
|
1474 |
} |
|
1475 |
*out = 0; |
|
1476 |
} |
|
1477 |
||
1478 |
static int ucstricmp(const ushort *a, const ushort *ae, const ushort *b, const ushort *be) |
|
1479 |
{ |
|
1480 |
if (a == b) |
|
1481 |
return 0; |
|
1482 |
if (a == 0) |
|
1483 |
return 1; |
|
1484 |
if (b == 0) |
|
1485 |
return -1; |
|
1486 |
||
1487 |
while (a != ae && b != be) { |
|
1488 |
const QUnicodeTables::Properties *pa = qGetProp(*a); |
|
1489 |
const QUnicodeTables::Properties *pb = qGetProp(*b); |
|
1490 |
if (pa->caseFoldSpecial | pb->caseFoldSpecial) |
|
1491 |
goto special; |
|
1492 |
int diff = (int)(*a + pa->caseFoldDiff) - (int)(*b + pb->caseFoldDiff); |
|
1493 |
if ((diff)) |
|
1494 |
return diff; |
|
1495 |
++a; |
|
1496 |
++b; |
|
1497 |
} |
|
1498 |
} |
|
1499 |
if (a == ae) { |
|
1500 |
if (b == be) |
|
1501 |
return 0; |
|
1502 |
return -1; |
|
1503 |
} |
|
1504 |
return 1; |
|
1505 |
special: |
|
1506 |
ushort abuf[SPECIAL_CASE_MAX_LEN + 1]; |
|
1507 |
ushort bbuf[SPECIAL_CASE_MAX_LEN + 1]; |
|
1508 |
abuf[0] = bbuf[0] = 0; |
|
1509 |
ushort *ap = abuf; |
|
1510 |
ushort *bp = bbuf; |
|
1511 |
while (1) { |
|
1512 |
if (!*ap) { |
|
1513 |
if (a == ae) { |
|
1514 |
if (!*bp && b == be) |
|
1515 |
return 0; |
|
1516 |
return -1; |
|
1517 |
} |
|
1518 |
foldCase(*(a++), abuf); |
|
1519 |
ap = abuf; |
|
1520 |
} |
|
1521 |
if (!*bp) { |
|
1522 |
if (b == be) |
|
1523 |
return 1; |
|
1524 |
foldCase(*(b++), bbuf); |
|
1525 |
bp = bbuf; |
|
1526 |
} |
|
1527 |
if (*ap != *bp) |
|
1528 |
return (int)*ap - (int)*bp; |
|
1529 |
++ap; |
|
1530 |
++bp; |
|
1531 |
} |
|
1532 |
} |
|
1533 |
||
1534 |
||
1535 |
static int ucstricmp(const ushort *a, const ushort *ae, const uchar *b) |
|
1536 |
{ |
|
1537 |
if (a == 0) |
|
1538 |
return 1; |
|
1539 |
if (b == 0) |
|
1540 |
return -1; |
|
1541 |
||
1542 |
while (a != ae && *b) { |
|
1543 |
const QUnicodeTables::Properties *pa = qGetProp(*a); |
|
1544 |
const QUnicodeTables::Properties *pb = qGetProp((ushort)*b); |
|
1545 |
if (pa->caseFoldSpecial | pb->caseFoldSpecial) |
|
1546 |
goto special; |
|
1547 |
int diff = (int)(*a + pa->caseFoldDiff) - (int)(*b + pb->caseFoldDiff); |
|
1548 |
if ((diff)) |
|
1549 |
return diff; |
|
1550 |
++a; |
|
1551 |
++b; |
|
1552 |
} |
|
1553 |
if (a == ae) { |
|
1554 |
if (!*b) |
|
1555 |
return 0; |
|
1556 |
return -1; |
|
1557 |
} |
|
1558 |
return 1; |
|
1559 |
||
1560 |
special: |
|
1561 |
ushort abuf[SPECIAL_CASE_MAX_LEN + 1]; |
|
1562 |
ushort bbuf[SPECIAL_CASE_MAX_LEN + 1]; |
|
1563 |
abuf[0] = bbuf[0] = 0; |
|
1564 |
ushort *ap = abuf; |
|
1565 |
ushort *bp = bbuf; |
|
1566 |
while (1) { |
|
1567 |
if (!*ap) { |
|
1568 |
if (a == ae) { |
|
1569 |
if (!*bp && !*b) |
|
1570 |
return 0; |
|
1571 |
return -1; |
|
1572 |
} |
|
1573 |
foldCase(*(a++), abuf); |
|
1574 |
ap = abuf; |
|
1575 |
} |
|
1576 |
if (!*bp) { |
|
1577 |
if (!*b) |
|
1578 |
return 1; |
|
1579 |
foldCase(*(b++), bbuf); |
|
1580 |
bp = bbuf; |
|
1581 |
} |
|
1582 |
if (*ap != *bp) |
|
1583 |
return (int)*ap - (int)*bp; |
|
1584 |
++ap; |
|
1585 |
++bp; |
|
1586 |
} |
|
1587 |
} |
|
1588 |
#endif |
|
1589 |
||
1590 |
#if 0 |
|
1591 |
static QList<QByteArray> blockNames; |
|
1592 |
struct BlockInfo |
|
1593 |
{ |
|
1594 |
int blockIndex; |
|
1595 |
int firstCodePoint; |
|
1596 |
int lastCodePoint; |
|
1597 |
}; |
|
1598 |
static QList<BlockInfo> blockInfoList; |
|
1599 |
||
1600 |
static void readBlocks() |
|
1601 |
{ |
|
1602 |
QFile f("data/Blocks.txt"); |
|
1603 |
if (!f.exists()) |
|
1604 |
qFatal("Couldn't find Blocks.txt"); |
|
1605 |
||
1606 |
f.open(QFile::ReadOnly); |
|
1607 |
||
1608 |
while (!f.atEnd()) { |
|
1609 |
QByteArray line = f.readLine(); |
|
1610 |
line.resize(line.size() - 1); |
|
1611 |
||
1612 |
int comment = line.indexOf("#"); |
|
1613 |
if (comment >= 0) |
|
1614 |
line = line.left(comment); |
|
1615 |
||
1616 |
line.replace(" ", ""); |
|
1617 |
||
1618 |
if (line.isEmpty()) |
|
1619 |
continue; |
|
1620 |
||
1621 |
int semicolon = line.indexOf(';'); |
|
1622 |
Q_ASSERT(semicolon >= 0); |
|
1623 |
QByteArray codePoints = line.left(semicolon); |
|
1624 |
QByteArray blockName = line.mid(semicolon + 1); |
|
1625 |
||
1626 |
int blockIndex = blockNames.indexOf(blockName); |
|
1627 |
if (blockIndex < 0) { |
|
1628 |
blockNames.append(blockName); |
|
1629 |
blockIndex = blockNames.indexOf(blockName); |
|
1630 |
Q_ASSERT(blockIndex >= 0); |
|
1631 |
} |
|
1632 |
||
1633 |
int dotdot = codePoints.indexOf(".."); |
|
1634 |
Q_ASSERT(dotdot >= 0); |
|
1635 |
bool unused; |
|
1636 |
int first = codePoints.left(dotdot).toInt(&unused, 16); |
|
1637 |
int last = codePoints.mid(dotdot + 2).toInt(&unused, 16); |
|
1638 |
||
1639 |
BlockInfo blockInfo = { blockIndex, first, last }; |
|
1640 |
blockInfoList.append(blockInfo); |
|
1641 |
} |
|
1642 |
} |
|
1643 |
#endif |
|
1644 |
||
1645 |
static QList<QByteArray> scriptNames; |
|
1646 |
static QHash<int, int> scriptAssignment; |
|
1647 |
static QHash<int, int> scriptHash; |
|
1648 |
||
1649 |
struct ExtraBlock { |
|
1650 |
int block; |
|
1651 |
QVector<int> vector; |
|
1652 |
}; |
|
1653 |
||
1654 |
static QList<ExtraBlock> extraBlockList; |
|
1655 |
||
1656 |
||
1657 |
static void readScripts() |
|
1658 |
{ |
|
1659 |
scriptNames.append("Common"); |
|
1660 |
||
1661 |
static const char *files[] = { |
|
1662 |
"data/ScriptsInitial.txt", |
|
1663 |
"data/Scripts.txt", |
|
1664 |
"data/ScriptsCorrections.txt" |
|
1665 |
}; |
|
1666 |
enum { fileCount = sizeof(files) / sizeof(const char *) }; |
|
1667 |
||
1668 |
for (int i = 0; i < fileCount; ++i) { |
|
1669 |
QFile f(files[i]); |
|
1670 |
if (!f.exists()) |
|
1671 |
qFatal("Couldn't find %s", files[i]); |
|
1672 |
||
1673 |
||
1674 |
f.open(QFile::ReadOnly); |
|
1675 |
||
1676 |
while (!f.atEnd()) { |
|
1677 |
QByteArray line = f.readLine(); |
|
1678 |
line.resize(line.size() - 1); |
|
1679 |
||
1680 |
int comment = line.indexOf("#"); |
|
1681 |
if (comment >= 0) |
|
1682 |
line = line.left(comment); |
|
1683 |
||
1684 |
line.replace(" ", ""); |
|
1685 |
line.replace("_", ""); |
|
1686 |
||
1687 |
if (line.isEmpty()) |
|
1688 |
continue; |
|
1689 |
||
1690 |
int semicolon = line.indexOf(';'); |
|
1691 |
Q_ASSERT(semicolon >= 0); |
|
1692 |
QByteArray codePoints = line.left(semicolon); |
|
1693 |
QByteArray scriptName = line.mid(semicolon + 1); |
|
1694 |
||
1695 |
int scriptIndex = scriptNames.indexOf(scriptName); |
|
1696 |
if (scriptIndex < 0) { |
|
1697 |
scriptNames.append(scriptName); |
|
1698 |
scriptIndex = scriptNames.indexOf(scriptName); |
|
1699 |
Q_ASSERT(scriptIndex >= 0); |
|
1700 |
} |
|
1701 |
||
1702 |
int dotdot = codePoints.indexOf(".."); |
|
1703 |
bool unused; |
|
1704 |
int first = -1, last = -1; |
|
1705 |
if (dotdot >= 0) { |
|
1706 |
first = codePoints.left(dotdot).toInt(&unused, 16); |
|
1707 |
last = codePoints.mid(dotdot + 2).toInt(&unused, 16); |
|
1708 |
} else { |
|
1709 |
first = codePoints.toInt(&unused, 16); |
|
1710 |
} |
|
1711 |
||
1712 |
if (last != -1) { |
|
1713 |
for (int i = first; i <= last; ++i) |
|
1714 |
scriptAssignment[i] = scriptIndex; |
|
1715 |
} else { |
|
1716 |
scriptAssignment[first] = scriptIndex; |
|
1717 |
} |
|
1718 |
} |
|
1719 |
} |
|
1720 |
} |
|
1721 |
||
1722 |
||
1723 |
static int scriptSentinel = 0; |
|
1724 |
||
1725 |
QByteArray createScriptEnumDeclaration() |
|
1726 |
{ |
|
1727 |
static const char *specialScripts[] = { |
|
1728 |
"Common", |
|
1729 |
"Arabic", |
|
1730 |
"Armenian", |
|
1731 |
"Bengali", |
|
1732 |
"Cyrillic", |
|
1733 |
"Devanagari", |
|
1734 |
"Georgian", |
|
1735 |
"Greek", |
|
1736 |
"Gujarati", |
|
1737 |
"Gurmukhi", |
|
1738 |
"Hangul", |
|
1739 |
"Hebrew", |
|
1740 |
"Kannada", |
|
1741 |
"Khmer", |
|
1742 |
"Lao", |
|
1743 |
"Malayalam", |
|
1744 |
"Myanmar", |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1745 |
"Nko", |
0 | 1746 |
"Ogham", |
1747 |
"Oriya", |
|
1748 |
"Runic", |
|
1749 |
"Sinhala", |
|
1750 |
"Syriac", |
|
1751 |
"Tamil", |
|
1752 |
"Telugu", |
|
1753 |
"Thaana", |
|
1754 |
"Thai", |
|
1755 |
"Tibetan", |
|
1756 |
"Inherited" |
|
1757 |
}; |
|
1758 |
const int specialScriptsCount = sizeof(specialScripts) / sizeof(const char *); |
|
1759 |
||
1760 |
// generate script enum |
|
1761 |
QByteArray declaration; |
|
1762 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1763 |
declaration += " // See http://www.unicode.org/reports/tr24/tr24-5.html\n"; |
0 | 1764 |
declaration += " enum Script {\n Common"; |
1765 |
||
1766 |
int uniqueScripts = 1; // Common |
|
1767 |
||
1768 |
// output the ones with special processing first |
|
1769 |
for (int i = 1; i < scriptNames.size(); ++i) { |
|
1770 |
QByteArray scriptName = scriptNames.at(i); |
|
1771 |
// does the script require special processing? |
|
1772 |
bool special = false; |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1773 |
for (int s = 0; s < specialScriptsCount; ++s) { |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1774 |
if (scriptName == specialScripts[s]) { |
0 | 1775 |
special = true; |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1776 |
break; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1777 |
} |
0 | 1778 |
} |
1779 |
if (!special) { |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1780 |
scriptHash[i] = 0; // alias for 'Common' |
0 | 1781 |
continue; |
1782 |
} else { |
|
1783 |
++uniqueScripts; |
|
1784 |
scriptHash[i] = i; |
|
1785 |
} |
|
1786 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1787 |
if (scriptName != "Inherited") { |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1788 |
declaration += ",\n "; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1789 |
declaration += scriptName; |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1790 |
} |
0 | 1791 |
} |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1792 |
declaration += ",\n Inherited"; |
0 | 1793 |
declaration += ",\n ScriptCount = Inherited"; |
1794 |
||
1795 |
// output the ones that are an alias for 'Common' |
|
1796 |
for (int i = 1; i < scriptNames.size(); ++i) { |
|
1797 |
if (scriptHash.value(i) != 0) |
|
1798 |
continue; |
|
1799 |
declaration += ",\n "; |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1800 |
declaration += scriptNames.at(i); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1801 |
declaration += " = Common"; |
0 | 1802 |
} |
1803 |
||
1804 |
declaration += "\n };\n"; |
|
1805 |
||
1806 |
scriptSentinel = ((uniqueScripts + 16) / 32) * 32; // a multiple of 32 |
|
1807 |
declaration += " enum { ScriptSentinel = "; |
|
1808 |
declaration += QByteArray::number(scriptSentinel); |
|
1809 |
declaration += " };\n\n"; |
|
1810 |
return declaration; |
|
1811 |
} |
|
1812 |
||
1813 |
QByteArray createScriptTableDeclaration() |
|
1814 |
{ |
|
1815 |
Q_ASSERT(scriptSentinel > 0); |
|
1816 |
||
1817 |
QByteArray declaration; |
|
1818 |
||
1819 |
const int unicodeBlockCount = 512; // number of unicode blocks |
|
1820 |
const int unicodeBlockSize = 128; // size of each block |
|
1821 |
declaration = "enum { UnicodeBlockCount = "; |
|
1822 |
declaration += QByteArray::number(unicodeBlockCount); |
|
1823 |
declaration += " }; // number of unicode blocks\n"; |
|
1824 |
declaration += "enum { UnicodeBlockSize = "; |
|
1825 |
declaration += QByteArray::number(unicodeBlockSize); |
|
1826 |
declaration += " }; // size of each block\n\n"; |
|
1827 |
||
1828 |
// script table |
|
1829 |
declaration += "namespace QUnicodeTables {\n\nstatic const unsigned char uc_scripts[] = {\n"; |
|
1830 |
for (int i = 0; i < unicodeBlockCount; ++i) { |
|
1831 |
int block = (((i << 7) & 0xff00) | ((i & 1) * 0x80)); |
|
1832 |
int blockAssignment[unicodeBlockSize]; |
|
1833 |
for (int x = 0; x < unicodeBlockSize; ++x) { |
|
1834 |
int codePoint = (i << 7) | x; |
|
1835 |
blockAssignment[x] = scriptAssignment.value(codePoint, 0); |
|
1836 |
} |
|
1837 |
bool allTheSame = true; |
|
1838 |
const int originalScript = blockAssignment[0]; |
|
1839 |
const int script = scriptHash.value(originalScript); |
|
1840 |
for (int x = 1; allTheSame && x < unicodeBlockSize; ++x) { |
|
1841 |
const int s = scriptHash.value(blockAssignment[x]); |
|
1842 |
if (s != script) |
|
1843 |
allTheSame = false; |
|
1844 |
} |
|
1845 |
||
1846 |
if (allTheSame) { |
|
1847 |
declaration += " "; |
|
1848 |
declaration += scriptNames.value(originalScript); |
|
1849 |
declaration += ", /* U+"; |
|
1850 |
declaration += QByteArray::number(block, 16).rightJustified(4, '0'); |
|
1851 |
declaration += '-'; |
|
1852 |
declaration += |
|
1853 |
QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0'); |
|
1854 |
declaration += " */\n"; |
|
1855 |
} else { |
|
1856 |
const int value = extraBlockList.size() + scriptSentinel; |
|
1857 |
const int offset = |
|
1858 |
((value - scriptSentinel) * unicodeBlockSize) + unicodeBlockCount; |
|
1859 |
||
1860 |
declaration += " "; |
|
1861 |
declaration += QByteArray::number(value); |
|
1862 |
declaration += ", /* U+"; |
|
1863 |
declaration += QByteArray::number(block, 16).rightJustified(4, '0'); |
|
1864 |
declaration += '-'; |
|
1865 |
declaration += |
|
1866 |
QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0'); |
|
1867 |
declaration += " at offset "; |
|
1868 |
declaration += QByteArray::number(offset); |
|
1869 |
declaration += " */\n"; |
|
1870 |
||
1871 |
ExtraBlock extraBlock; |
|
1872 |
extraBlock.block = block; |
|
1873 |
extraBlock.vector.resize(unicodeBlockSize); |
|
1874 |
for (int x = 0; x < unicodeBlockSize; ++x) |
|
1875 |
extraBlock.vector[x] = blockAssignment[x]; |
|
1876 |
||
1877 |
extraBlockList.append(extraBlock); |
|
1878 |
} |
|
1879 |
} |
|
1880 |
||
1881 |
for (int i = 0; i < extraBlockList.size(); ++i) { |
|
1882 |
const int value = i + scriptSentinel; |
|
1883 |
const int offset = |
|
1884 |
((value - scriptSentinel) * unicodeBlockSize) + unicodeBlockCount; |
|
1885 |
const ExtraBlock &extraBlock = extraBlockList.at(i); |
|
1886 |
const int block = extraBlock.block; |
|
1887 |
||
1888 |
declaration += "\n\n /* U+"; |
|
1889 |
declaration += QByteArray::number(block, 16).rightJustified(4, '0'); |
|
1890 |
declaration += '-'; |
|
1891 |
declaration += |
|
1892 |
QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0'); |
|
1893 |
declaration += " at offset "; |
|
1894 |
declaration += QByteArray::number(offset); |
|
1895 |
declaration += " */\n "; |
|
1896 |
||
1897 |
for (int x = 0; x < extraBlock.vector.size(); ++x) { |
|
1898 |
const int o = extraBlock.vector.at(x); |
|
1899 |
||
1900 |
declaration += scriptNames.value(o); |
|
1901 |
if (x < extraBlock.vector.size() - 1 || i < extraBlockList.size() - 1) |
|
1902 |
declaration += ','; |
|
1903 |
if ((x & 7) == 7 && x < extraBlock.vector.size() - 1) |
|
1904 |
declaration += "\n "; |
|
1905 |
else |
|
1906 |
declaration += ' '; |
|
1907 |
} |
|
1908 |
} |
|
1909 |
declaration += "\n};\n\n} // namespace QUnicodeTables\n\n"; |
|
1910 |
||
1911 |
qDebug("createScriptTableDeclaration: table size is %d bytes", |
|
1912 |
unicodeBlockCount + (extraBlockList.size() * unicodeBlockSize)); |
|
1913 |
||
1914 |
return declaration; |
|
1915 |
} |
|
1916 |
||
1917 |
#if 0 |
|
1918 |
static void dump(int from, int to) |
|
1919 |
{ |
|
1920 |
for (int i = from; i <= to; ++i) { |
|
1921 |
UnicodeData d = unicodeData.value(i, UnicodeData(i)); |
|
1922 |
qDebug("0x%04x: cat=%d combining=%d dir=%d case=%x mirror=%x joining=%d age=%d", |
|
1923 |
i, d.p.category, d.p.combiningClass, d.p.direction, d.otherCase, d.mirroredChar, d.p.joining, d.p.age); |
|
1924 |
if (d.decompositionType != QChar::NoDecomposition) { |
|
1925 |
qDebug(" decomposition: type=%d, length=%d, first=%x", d.decompositionType, d.decomposition.size(), |
|
1926 |
d.decomposition[0]); |
|
1927 |
} |
|
1928 |
} |
|
1929 |
qDebug(" "); |
|
1930 |
} |
|
1931 |
#endif |
|
1932 |
||
1933 |
struct PropertyBlock { |
|
1934 |
PropertyBlock() { index = -1; } |
|
1935 |
int index; |
|
1936 |
QList<int> properties; |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1937 |
bool operator==(const PropertyBlock &other) |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1938 |
{ return properties == other.properties; } |
0 | 1939 |
}; |
1940 |
||
1941 |
static QByteArray createPropertyInfo() |
|
1942 |
{ |
|
1943 |
qDebug("createPropertyInfo:"); |
|
1944 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1945 |
const int BMP_BLOCKSIZE = 32; |
0 | 1946 |
const int BMP_SHIFT = 5; |
1947 |
const int BMP_END = 0x11000; |
|
1948 |
const int SMP_END = 0x110000; |
|
1949 |
const int SMP_BLOCKSIZE = 256; |
|
1950 |
const int SMP_SHIFT = 8; |
|
1951 |
||
1952 |
QList<PropertyBlock> blocks; |
|
1953 |
QList<int> blockMap; |
|
1954 |
||
1955 |
int used = 0; |
|
1956 |
||
1957 |
for (int block = 0; block < BMP_END/BMP_BLOCKSIZE; ++block) { |
|
1958 |
PropertyBlock b; |
|
1959 |
for (int i = 0; i < BMP_BLOCKSIZE; ++i) { |
|
1960 |
int uc = block*BMP_BLOCKSIZE + i; |
|
1961 |
UnicodeData d = unicodeData.value(uc, UnicodeData(uc)); |
|
1962 |
b.properties.append(d.propertyIndex); |
|
1963 |
} |
|
1964 |
int index = blocks.indexOf(b); |
|
1965 |
if (index == -1) { |
|
1966 |
index = blocks.size(); |
|
1967 |
b.index = used; |
|
1968 |
used += BMP_BLOCKSIZE; |
|
1969 |
blocks.append(b); |
|
1970 |
} |
|
1971 |
blockMap.append(blocks.at(index).index); |
|
1972 |
} |
|
1973 |
||
1974 |
int bmp_blocks = blocks.size(); |
|
1975 |
Q_ASSERT(blockMap.size() == BMP_END/BMP_BLOCKSIZE); |
|
1976 |
||
1977 |
for (int block = BMP_END/SMP_BLOCKSIZE; block < SMP_END/SMP_BLOCKSIZE; ++block) { |
|
1978 |
PropertyBlock b; |
|
1979 |
for (int i = 0; i < SMP_BLOCKSIZE; ++i) { |
|
1980 |
int uc = block*SMP_BLOCKSIZE + i; |
|
1981 |
UnicodeData d = unicodeData.value(uc, UnicodeData(uc)); |
|
1982 |
b.properties.append(d.propertyIndex); |
|
1983 |
} |
|
1984 |
int index = blocks.indexOf(b); |
|
1985 |
if (index == -1) { |
|
1986 |
index = blocks.size(); |
|
1987 |
b.index = used; |
|
1988 |
used += SMP_BLOCKSIZE; |
|
1989 |
blocks.append(b); |
|
1990 |
} |
|
1991 |
blockMap.append(blocks.at(index).index); |
|
1992 |
} |
|
1993 |
||
1994 |
int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*2; |
|
1995 |
int bmp_trie = BMP_END/BMP_BLOCKSIZE*2; |
|
1996 |
int bmp_mem = bmp_block_data + bmp_trie; |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
1997 |
qDebug(" %d unique blocks in BMP.", blocks.size()); |
0 | 1998 |
qDebug(" block data uses: %d bytes", bmp_block_data); |
1999 |
qDebug(" trie data uses : %d bytes", bmp_trie); |
|
2000 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2001 |
int smp_block_data = (blocks.size() - bmp_blocks)*SMP_BLOCKSIZE*2; |
0 | 2002 |
int smp_trie = (SMP_END-BMP_END)/SMP_BLOCKSIZE*2; |
2003 |
int smp_mem = smp_block_data + smp_trie; |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2004 |
qDebug(" %d unique blocks in SMP.", blocks.size()-bmp_blocks); |
0 | 2005 |
qDebug(" block data uses: %d bytes", smp_block_data); |
2006 |
qDebug(" trie data uses : %d bytes", smp_trie); |
|
2007 |
||
2008 |
qDebug("\n properties use : %d bytes", uniqueProperties.size()*20); |
|
2009 |
qDebug(" memory usage: %d bytes", bmp_mem+smp_mem + uniqueProperties.size()*20); |
|
2010 |
||
2011 |
QByteArray out; |
|
2012 |
out += "static const unsigned short uc_property_trie[] = {\n"; |
|
2013 |
||
2014 |
// first write the map |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2015 |
out += " // 0 - 0x" + QByteArray::number(BMP_END, 16); |
0 | 2016 |
for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) { |
2017 |
if (!(i % 8)) { |
|
2018 |
if (out.endsWith(' ')) |
|
2019 |
out.chop(1); |
|
2020 |
if (!((i*BMP_BLOCKSIZE) % 0x1000)) |
|
2021 |
out += "\n"; |
|
2022 |
out += "\n "; |
|
2023 |
} |
|
2024 |
out += QByteArray::number(blockMap.at(i) + blockMap.size()); |
|
2025 |
out += ", "; |
|
2026 |
} |
|
2027 |
if (out.endsWith(' ')) |
|
2028 |
out.chop(1); |
|
2029 |
out += "\n\n // 0x" + QByteArray::number(BMP_END, 16) + " - 0x" + QByteArray::number(SMP_END, 16) + "\n";; |
|
2030 |
for (int i = BMP_END/BMP_BLOCKSIZE; i < blockMap.size(); ++i) { |
|
2031 |
if (!(i % 8)) { |
|
2032 |
if (out.endsWith(' ')) |
|
2033 |
out.chop(1); |
|
2034 |
if (!(i % (0x10000/SMP_BLOCKSIZE))) |
|
2035 |
out += "\n"; |
|
2036 |
out += "\n "; |
|
2037 |
} |
|
2038 |
out += QByteArray::number(blockMap.at(i) + blockMap.size()); |
|
2039 |
out += ", "; |
|
2040 |
} |
|
2041 |
if (out.endsWith(' ')) |
|
2042 |
out.chop(1); |
|
2043 |
out += "\n"; |
|
2044 |
// write the data |
|
2045 |
for (int i = 0; i < blocks.size(); ++i) { |
|
2046 |
if (out.endsWith(' ')) |
|
2047 |
out.chop(1); |
|
2048 |
out += "\n"; |
|
2049 |
const PropertyBlock &b = blocks.at(i); |
|
2050 |
for (int j = 0; j < b.properties.size(); ++j) { |
|
2051 |
if (!(j % 8)) { |
|
2052 |
if (out.endsWith(' ')) |
|
2053 |
out.chop(1); |
|
2054 |
out += "\n "; |
|
2055 |
} |
|
2056 |
out += QByteArray::number(b.properties.at(j)); |
|
2057 |
out += ", "; |
|
2058 |
} |
|
2059 |
} |
|
2060 |
||
2061 |
// we reserve one bit more than in the assert below for the sign |
|
2062 |
Q_ASSERT(maxMirroredDiff < (1<<12)); |
|
2063 |
Q_ASSERT(maxLowerCaseDiff < (1<<14)); |
|
2064 |
Q_ASSERT(maxUpperCaseDiff < (1<<14)); |
|
2065 |
Q_ASSERT(maxTitleCaseDiff < (1<<14)); |
|
2066 |
Q_ASSERT(maxCaseFoldDiff < (1<<14)); |
|
2067 |
||
2068 |
if (out.endsWith(' ')) |
|
2069 |
out.chop(1); |
|
2070 |
out += "\n};\n\n" |
|
2071 |
||
2072 |
"#define GET_PROP_INDEX(ucs4) \\\n" |
|
2073 |
" (ucs4 < 0x" + QByteArray::number(BMP_END, 16) + " \\\n" |
|
2074 |
" ? (uc_property_trie[uc_property_trie[ucs4>>" + QByteArray::number(BMP_SHIFT) + |
|
2075 |
"] + (ucs4 & 0x" + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")]) \\\n" |
|
2076 |
" : (uc_property_trie[uc_property_trie[((ucs4 - 0x" + QByteArray::number(BMP_END, 16) + |
|
2077 |
")>>" + QByteArray::number(SMP_SHIFT) + ") + 0x" + QByteArray::number(BMP_END/BMP_BLOCKSIZE, 16) + "]" |
|
2078 |
" + (ucs4 & 0x" + QByteArray::number(SMP_BLOCKSIZE-1, 16) + ")]))\n\n" |
|
2079 |
"#define GET_PROP_INDEX_UCS2(ucs2) \\\n" |
|
2080 |
"(uc_property_trie[uc_property_trie[ucs2>>" + QByteArray::number(BMP_SHIFT) + |
|
2081 |
"] + (ucs2 & 0x" + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")])\n\n" |
|
2082 |
||
2083 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2084 |
"static const QUnicodeTables::Properties uc_properties[] = {\n"; |
0 | 2085 |
|
2086 |
// keep in sync with the property declaration |
|
2087 |
for (int i = 0; i < uniqueProperties.size(); ++i) { |
|
2088 |
PropertyFlags p = uniqueProperties.at(i); |
|
2089 |
out += " { "; |
|
2090 |
// " ushort category : 8;\n" |
|
2091 |
out += QByteArray::number( p.category ); |
|
2092 |
out += ", "; |
|
2093 |
// " ushort line_break_class : 8;\n" |
|
2094 |
out += QByteArray::number( p.line_break_class ); |
|
2095 |
out += ", "; |
|
2096 |
// " ushort direction : 8;\n" |
|
2097 |
out += QByteArray::number( p.direction ); |
|
2098 |
out += ", "; |
|
2099 |
// " ushort combiningClass :8;\n" |
|
2100 |
out += QByteArray::number( p.combiningClass ); |
|
2101 |
out += ", "; |
|
2102 |
// " ushort joining : 2;\n" |
|
2103 |
out += QByteArray::number( p.joining ); |
|
2104 |
out += ", "; |
|
2105 |
// " signed short digitValue : 6;\n /* 5 needed */" |
|
2106 |
out += QByteArray::number( p.digitValue ); |
|
2107 |
out += ", "; |
|
2108 |
// " ushort unicodeVersion : 4;\n" |
|
2109 |
out += QByteArray::number( p.age ); |
|
2110 |
out += ", "; |
|
2111 |
// " ushort lowerCaseSpecial : 1;\n" |
|
2112 |
// " ushort upperCaseSpecial : 1;\n" |
|
2113 |
// " ushort titleCaseSpecial : 1;\n" |
|
2114 |
// " ushort caseFoldSpecial : 1;\n" |
|
2115 |
out += QByteArray::number( p.lowerCaseSpecial ); |
|
2116 |
out += ", "; |
|
2117 |
out += QByteArray::number( p.upperCaseSpecial ); |
|
2118 |
out += ", "; |
|
2119 |
out += QByteArray::number( p.titleCaseSpecial ); |
|
2120 |
out += ", "; |
|
2121 |
out += QByteArray::number( p.caseFoldSpecial ); |
|
2122 |
out += ", "; |
|
2123 |
// " signed short mirrorDiff : 16;\n" |
|
2124 |
// " signed short lowerCaseDiff : 16;\n" |
|
2125 |
// " signed short upperCaseDiff : 16;\n" |
|
2126 |
// " signed short titleCaseDiff : 16;\n" |
|
2127 |
// " signed short caseFoldDiff : 16;\n" |
|
2128 |
out += QByteArray::number( p.mirrorDiff ); |
|
2129 |
out += ", "; |
|
2130 |
out += QByteArray::number( p.lowerCaseDiff ); |
|
2131 |
out += ", "; |
|
2132 |
out += QByteArray::number( p.upperCaseDiff ); |
|
2133 |
out += ", "; |
|
2134 |
out += QByteArray::number( p.titleCaseDiff ); |
|
2135 |
out += ", "; |
|
2136 |
out += QByteArray::number( p.caseFoldDiff ); |
|
2137 |
out += ", "; |
|
2138 |
out += QByteArray::number( p.graphemeBreak ); |
|
2139 |
out += ", "; |
|
2140 |
out += QByteArray::number( p.wordBreak ); |
|
2141 |
out += ", "; |
|
2142 |
out += QByteArray::number( p.sentenceBreak ); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2143 |
out += " },\n"; |
0 | 2144 |
} |
2145 |
out += "};\n\n"; |
|
2146 |
||
2147 |
out += "static inline const QUnicodeTables::Properties *qGetProp(uint ucs4)\n" |
|
2148 |
"{\n" |
|
2149 |
" int index = GET_PROP_INDEX(ucs4);\n" |
|
2150 |
" return uc_properties + index;\n" |
|
2151 |
"}\n" |
|
2152 |
"\n" |
|
2153 |
"static inline const QUnicodeTables::Properties *qGetProp(ushort ucs2)\n" |
|
2154 |
"{\n" |
|
2155 |
" int index = GET_PROP_INDEX_UCS2(ucs2);\n" |
|
2156 |
" return uc_properties + index;\n" |
|
2157 |
"}\n" |
|
2158 |
"\n" |
|
2159 |
"Q_CORE_EXPORT const QUnicodeTables::Properties * QT_FASTCALL QUnicodeTables::properties(uint ucs4)\n" |
|
2160 |
"{\n" |
|
2161 |
" int index = GET_PROP_INDEX(ucs4);\n" |
|
2162 |
" return uc_properties + index;\n" |
|
2163 |
"}\n" |
|
2164 |
"\n" |
|
2165 |
"Q_CORE_EXPORT const QUnicodeTables::Properties * QT_FASTCALL QUnicodeTables::properties(ushort ucs2)\n" |
|
2166 |
"{\n" |
|
2167 |
" int index = GET_PROP_INDEX_UCS2(ucs2);\n" |
|
2168 |
" return uc_properties + index;\n" |
|
2169 |
"}\n\n"; |
|
2170 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2171 |
out += "static const ushort specialCaseMap[] = {\n "; |
0 | 2172 |
for (int i = 0; i < specialCaseMap.size(); ++i) { |
2173 |
out += QByteArray(" 0x") + QByteArray::number(specialCaseMap.at(i), 16); |
|
2174 |
if (i < specialCaseMap.size() - 1) |
|
2175 |
out += ","; |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2176 |
if (!specialCaseMap.at(i)) |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2177 |
out += "\n "; |
0 | 2178 |
} |
2179 |
out += "\n};\n"; |
|
2180 |
out += "#define SPECIAL_CASE_MAX_LEN " + QByteArray::number(specialCaseMaxLen) + "\n\n"; |
|
2181 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2182 |
qDebug("Special case map uses : %d bytes", specialCaseMap.size()*2); |
0 | 2183 |
|
2184 |
return out; |
|
2185 |
} |
|
2186 |
||
2187 |
||
2188 |
struct DecompositionBlock { |
|
2189 |
DecompositionBlock() { index = -1; } |
|
2190 |
int index; |
|
2191 |
QList<int> decompositionPositions; |
|
2192 |
bool operator ==(const DecompositionBlock &other) |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2193 |
{ return decompositionPositions == other.decompositionPositions; } |
0 | 2194 |
}; |
2195 |
||
2196 |
static QByteArray createCompositionInfo() |
|
2197 |
{ |
|
2198 |
qDebug("createCompositionInfo:"); |
|
2199 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2200 |
const int BMP_BLOCKSIZE = 16; |
0 | 2201 |
const int BMP_SHIFT = 4; |
2202 |
const int BMP_END = 0x3400; // start of Han |
|
2203 |
const int SMP_END = 0x30000; |
|
2204 |
const int SMP_BLOCKSIZE = 256; |
|
2205 |
const int SMP_SHIFT = 8; |
|
2206 |
||
2207 |
if(SMP_END <= highestComposedCharacter) |
|
2208 |
qFatal("end of table smaller than highest composed character at %x", highestComposedCharacter); |
|
2209 |
||
2210 |
QList<DecompositionBlock> blocks; |
|
2211 |
QList<int> blockMap; |
|
2212 |
QList<unsigned short> decompositions; |
|
2213 |
||
2214 |
int used = 0; |
|
2215 |
int tableIndex = 0; |
|
2216 |
||
2217 |
for (int block = 0; block < BMP_END/BMP_BLOCKSIZE; ++block) { |
|
2218 |
DecompositionBlock b; |
|
2219 |
for (int i = 0; i < BMP_BLOCKSIZE; ++i) { |
|
2220 |
int uc = block*BMP_BLOCKSIZE + i; |
|
2221 |
UnicodeData d = unicodeData.value(uc, UnicodeData(uc)); |
|
2222 |
if (!d.decomposition.isEmpty()) { |
|
2223 |
int utf16Chars = 0; |
|
2224 |
for (int j = 0; j < d.decomposition.size(); ++j) |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2225 |
utf16Chars += d.decomposition.at(j) >= 0x10000 ? 2 : 1; |
0 | 2226 |
decompositions.append(d.decompositionType + (utf16Chars<<8)); |
2227 |
for (int j = 0; j < d.decomposition.size(); ++j) { |
|
2228 |
int code = d.decomposition.at(j); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2229 |
if (code >= 0x10000) { |
0 | 2230 |
// save as surrogate pair |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2231 |
ushort high = QChar::highSurrogate(code); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2232 |
ushort low = QChar::lowSurrogate(code); |
0 | 2233 |
decompositions.append(high); |
2234 |
decompositions.append(low); |
|
2235 |
} else { |
|
2236 |
decompositions.append(code); |
|
2237 |
} |
|
2238 |
} |
|
2239 |
b.decompositionPositions.append(tableIndex); |
|
2240 |
tableIndex += utf16Chars + 1; |
|
2241 |
} else { |
|
2242 |
b.decompositionPositions.append(0xffff); |
|
2243 |
} |
|
2244 |
} |
|
2245 |
int index = blocks.indexOf(b); |
|
2246 |
if (index == -1) { |
|
2247 |
index = blocks.size(); |
|
2248 |
b.index = used; |
|
2249 |
used += BMP_BLOCKSIZE; |
|
2250 |
blocks.append(b); |
|
2251 |
} |
|
2252 |
blockMap.append(blocks.at(index).index); |
|
2253 |
} |
|
2254 |
||
2255 |
int bmp_blocks = blocks.size(); |
|
2256 |
Q_ASSERT(blockMap.size() == BMP_END/BMP_BLOCKSIZE); |
|
2257 |
||
2258 |
for (int block = BMP_END/SMP_BLOCKSIZE; block < SMP_END/SMP_BLOCKSIZE; ++block) { |
|
2259 |
DecompositionBlock b; |
|
2260 |
for (int i = 0; i < SMP_BLOCKSIZE; ++i) { |
|
2261 |
int uc = block*SMP_BLOCKSIZE + i; |
|
2262 |
UnicodeData d = unicodeData.value(uc, UnicodeData(uc)); |
|
2263 |
if (!d.decomposition.isEmpty()) { |
|
2264 |
int utf16Chars = 0; |
|
2265 |
for (int j = 0; j < d.decomposition.size(); ++j) |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2266 |
utf16Chars += d.decomposition.at(j) >= 0x10000 ? 2 : 1; |
0 | 2267 |
decompositions.append(d.decompositionType + (utf16Chars<<8)); |
2268 |
for (int j = 0; j < d.decomposition.size(); ++j) { |
|
2269 |
int code = d.decomposition.at(j); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2270 |
if (code >= 0x10000) { |
0 | 2271 |
// save as surrogate pair |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2272 |
ushort high = QChar::highSurrogate(code); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2273 |
ushort low = QChar::lowSurrogate(code); |
0 | 2274 |
decompositions.append(high); |
2275 |
decompositions.append(low); |
|
2276 |
} else { |
|
2277 |
decompositions.append(code); |
|
2278 |
} |
|
2279 |
} |
|
2280 |
b.decompositionPositions.append(tableIndex); |
|
2281 |
tableIndex += utf16Chars + 1; |
|
2282 |
} else { |
|
2283 |
b.decompositionPositions.append(0xffff); |
|
2284 |
} |
|
2285 |
} |
|
2286 |
int index = blocks.indexOf(b); |
|
2287 |
if (index == -1) { |
|
2288 |
index = blocks.size(); |
|
2289 |
b.index = used; |
|
2290 |
used += SMP_BLOCKSIZE; |
|
2291 |
blocks.append(b); |
|
2292 |
} |
|
2293 |
blockMap.append(blocks.at(index).index); |
|
2294 |
} |
|
2295 |
||
2296 |
int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*2; |
|
2297 |
int bmp_trie = BMP_END/BMP_BLOCKSIZE*2; |
|
2298 |
int bmp_mem = bmp_block_data + bmp_trie; |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2299 |
qDebug(" %d unique blocks in BMP.", blocks.size()); |
0 | 2300 |
qDebug(" block data uses: %d bytes", bmp_block_data); |
2301 |
qDebug(" trie data uses : %d bytes", bmp_trie); |
|
2302 |
qDebug(" memory usage: %d bytes", bmp_mem); |
|
2303 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2304 |
int smp_block_data = (blocks.size() - bmp_blocks)*SMP_BLOCKSIZE*2; |
0 | 2305 |
int smp_trie = (SMP_END-BMP_END)/SMP_BLOCKSIZE*2; |
2306 |
int smp_mem = smp_block_data + smp_trie; |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2307 |
qDebug(" %d unique blocks in SMP.", blocks.size()-bmp_blocks); |
0 | 2308 |
qDebug(" block data uses: %d bytes", smp_block_data); |
2309 |
qDebug(" trie data uses : %d bytes", smp_trie); |
|
2310 |
||
2311 |
qDebug("\n decomposition table use : %d bytes", decompositions.size()*2); |
|
2312 |
qDebug(" memory usage: %d bytes", bmp_mem+smp_mem + decompositions.size()*2); |
|
2313 |
||
2314 |
QByteArray out; |
|
2315 |
||
2316 |
out += "static const unsigned short uc_decomposition_trie[] = {\n"; |
|
2317 |
||
2318 |
// first write the map |
|
2319 |
out += " // 0 - 0x" + QByteArray::number(BMP_END, 16); |
|
2320 |
for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) { |
|
2321 |
if (!(i % 8)) { |
|
2322 |
if (out.endsWith(' ')) |
|
2323 |
out.chop(1); |
|
2324 |
if (!((i*BMP_BLOCKSIZE) % 0x1000)) |
|
2325 |
out += "\n"; |
|
2326 |
out += "\n "; |
|
2327 |
} |
|
2328 |
out += QByteArray::number(blockMap.at(i) + blockMap.size()); |
|
2329 |
out += ", "; |
|
2330 |
} |
|
2331 |
if (out.endsWith(' ')) |
|
2332 |
out.chop(1); |
|
2333 |
out += "\n\n // 0x" + QByteArray::number(BMP_END, 16) + " - 0x" + QByteArray::number(SMP_END, 16) + "\n";; |
|
2334 |
for (int i = BMP_END/BMP_BLOCKSIZE; i < blockMap.size(); ++i) { |
|
2335 |
if (!(i % 8)) { |
|
2336 |
if (out.endsWith(' ')) |
|
2337 |
out.chop(1); |
|
2338 |
if (!(i % (0x10000/SMP_BLOCKSIZE))) |
|
2339 |
out += "\n"; |
|
2340 |
out += "\n "; |
|
2341 |
} |
|
2342 |
out += QByteArray::number(blockMap.at(i) + blockMap.size()); |
|
2343 |
out += ", "; |
|
2344 |
} |
|
2345 |
if (out.endsWith(' ')) |
|
2346 |
out.chop(1); |
|
2347 |
out += "\n"; |
|
2348 |
// write the data |
|
2349 |
for (int i = 0; i < blocks.size(); ++i) { |
|
2350 |
if (out.endsWith(' ')) |
|
2351 |
out.chop(1); |
|
2352 |
out += "\n"; |
|
2353 |
const DecompositionBlock &b = blocks.at(i); |
|
2354 |
for (int j = 0; j < b.decompositionPositions.size(); ++j) { |
|
2355 |
if (!(j % 8)) { |
|
2356 |
if (out.endsWith(' ')) |
|
2357 |
out.chop(1); |
|
2358 |
out += "\n "; |
|
2359 |
} |
|
2360 |
out += "0x" + QByteArray::number(b.decompositionPositions.at(j), 16); |
|
2361 |
out += ", "; |
|
2362 |
} |
|
2363 |
} |
|
2364 |
||
2365 |
if (out.endsWith(' ')) |
|
2366 |
out.chop(1); |
|
2367 |
out += "\n};\n\n" |
|
2368 |
||
2369 |
"#define GET_DECOMPOSITION_INDEX(ucs4) \\\n" |
|
2370 |
" (ucs4 < 0x" + QByteArray::number(BMP_END, 16) + " \\\n" |
|
2371 |
" ? (uc_decomposition_trie[uc_decomposition_trie[ucs4>>" + QByteArray::number(BMP_SHIFT) + |
|
2372 |
"] + (ucs4 & 0x" + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")]) \\\n" |
|
2373 |
" : (ucs4 < 0x" + QByteArray::number(SMP_END, 16) + "\\\n" |
|
2374 |
" ? uc_decomposition_trie[uc_decomposition_trie[((ucs4 - 0x" + QByteArray::number(BMP_END, 16) + |
|
2375 |
")>>" + QByteArray::number(SMP_SHIFT) + ") + 0x" + QByteArray::number(BMP_END/BMP_BLOCKSIZE, 16) + "]" |
|
2376 |
" + (ucs4 & 0x" + QByteArray::number(SMP_BLOCKSIZE-1, 16) + ")]\\\n" |
|
2377 |
" : 0xffff))\n\n" |
|
2378 |
||
2379 |
"static const unsigned short uc_decomposition_map[] = {\n"; |
|
2380 |
||
2381 |
for (int i = 0; i < decompositions.size(); ++i) { |
|
2382 |
if (!(i % 8)) { |
|
2383 |
if (out.endsWith(' ')) |
|
2384 |
out.chop(1); |
|
2385 |
out += "\n "; |
|
2386 |
} |
|
2387 |
out += "0x" + QByteArray::number(decompositions.at(i), 16); |
|
2388 |
out += ", "; |
|
2389 |
} |
|
2390 |
||
2391 |
if (out.endsWith(' ')) |
|
2392 |
out.chop(1); |
|
2393 |
out += "\n};\n\n"; |
|
2394 |
||
2395 |
return out; |
|
2396 |
} |
|
2397 |
||
2398 |
static QByteArray createLigatureInfo() |
|
2399 |
{ |
|
2400 |
qDebug("createLigatureInfo: numLigatures=%d", numLigatures); |
|
2401 |
||
2402 |
QList<DecompositionBlock> blocks; |
|
2403 |
QList<int> blockMap; |
|
2404 |
QList<unsigned short> ligatures; |
|
2405 |
||
2406 |
const int BMP_BLOCKSIZE = 32; |
|
2407 |
const int BMP_SHIFT = 5; |
|
2408 |
const int BMP_END = 0x3100; |
|
2409 |
Q_ASSERT(highestLigature < BMP_END); |
|
2410 |
||
2411 |
int used = 0; |
|
2412 |
int tableIndex = 0; |
|
2413 |
||
2414 |
for (int block = 0; block < BMP_END/BMP_BLOCKSIZE; ++block) { |
|
2415 |
DecompositionBlock b; |
|
2416 |
for (int i = 0; i < BMP_BLOCKSIZE; ++i) { |
|
2417 |
int uc = block*BMP_BLOCKSIZE + i; |
|
2418 |
QList<Ligature> l = ligatureHashes.value(uc); |
|
2419 |
if (!l.isEmpty()) { |
|
2420 |
b.decompositionPositions.append(tableIndex); |
|
2421 |
qSort(l); |
|
2422 |
||
2423 |
ligatures.append(l.size()); |
|
2424 |
for (int i = 0; i < l.size(); ++i) { |
|
2425 |
Q_ASSERT(l.at(i).u2 == uc); |
|
2426 |
ligatures.append(l.at(i).u1); |
|
2427 |
ligatures.append(l.at(i).ligature); |
|
2428 |
} |
|
2429 |
tableIndex += 2*l.size() + 1; |
|
2430 |
} else { |
|
2431 |
b.decompositionPositions.append(0xffff); |
|
2432 |
} |
|
2433 |
} |
|
2434 |
int index = blocks.indexOf(b); |
|
2435 |
if (index == -1) { |
|
2436 |
index = blocks.size(); |
|
2437 |
b.index = used; |
|
2438 |
used += BMP_BLOCKSIZE; |
|
2439 |
blocks.append(b); |
|
2440 |
} |
|
2441 |
blockMap.append(blocks.at(index).index); |
|
2442 |
} |
|
2443 |
||
2444 |
int bmp_blocks = blocks.size(); |
|
2445 |
Q_ASSERT(blockMap.size() == BMP_END/BMP_BLOCKSIZE); |
|
2446 |
||
2447 |
int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*2; |
|
2448 |
int bmp_trie = BMP_END/BMP_BLOCKSIZE*2; |
|
2449 |
int bmp_mem = bmp_block_data + bmp_trie; |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2450 |
qDebug(" %d unique blocks in BMP.", blocks.size()); |
0 | 2451 |
qDebug(" block data uses: %d bytes", bmp_block_data); |
2452 |
qDebug(" trie data uses : %d bytes", bmp_trie); |
|
2453 |
qDebug(" ligature data uses : %d bytes", ligatures.size()*2); |
|
2454 |
qDebug(" memory usage: %d bytes", bmp_mem + ligatures.size() * 2); |
|
2455 |
||
2456 |
QByteArray out; |
|
2457 |
||
2458 |
||
2459 |
out += "static const unsigned short uc_ligature_trie[] = {\n"; |
|
2460 |
||
2461 |
// first write the map |
|
2462 |
out += " // 0 - 0x" + QByteArray::number(BMP_END, 16); |
|
2463 |
for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) { |
|
2464 |
if (!(i % 8)) { |
|
2465 |
if (out.endsWith(' ')) |
|
2466 |
out.chop(1); |
|
2467 |
if (!((i*BMP_BLOCKSIZE) % 0x1000)) |
|
2468 |
out += "\n"; |
|
2469 |
out += "\n "; |
|
2470 |
} |
|
2471 |
out += QByteArray::number(blockMap.at(i) + blockMap.size()); |
|
2472 |
out += ", "; |
|
2473 |
} |
|
2474 |
if (out.endsWith(' ')) |
|
2475 |
out.chop(1); |
|
2476 |
out += "\n"; |
|
2477 |
// write the data |
|
2478 |
for (int i = 0; i < blocks.size(); ++i) { |
|
2479 |
if (out.endsWith(' ')) |
|
2480 |
out.chop(1); |
|
2481 |
out += "\n"; |
|
2482 |
const DecompositionBlock &b = blocks.at(i); |
|
2483 |
for (int j = 0; j < b.decompositionPositions.size(); ++j) { |
|
2484 |
if (!(j % 8)) { |
|
2485 |
if (out.endsWith(' ')) |
|
2486 |
out.chop(1); |
|
2487 |
out += "\n "; |
|
2488 |
} |
|
2489 |
out += "0x" + QByteArray::number(b.decompositionPositions.at(j), 16); |
|
2490 |
out += ", "; |
|
2491 |
} |
|
2492 |
} |
|
2493 |
if (out.endsWith(' ')) |
|
2494 |
out.chop(1); |
|
2495 |
out += "\n};\n\n" |
|
2496 |
||
2497 |
"#define GET_LIGATURE_INDEX(u2) " |
|
2498 |
"(u2 < 0x" + QByteArray::number(BMP_END, 16) + " ? " |
|
2499 |
"uc_ligature_trie[uc_ligature_trie[u2>>" + QByteArray::number(BMP_SHIFT) + |
|
2500 |
"] + (u2 & 0x" + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")] : 0xffff);\n\n" |
|
2501 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2502 |
"static const unsigned short uc_ligature_map[] = {\n"; |
0 | 2503 |
|
2504 |
for (int i = 0; i < ligatures.size(); ++i) { |
|
2505 |
if (!(i % 8)) { |
|
2506 |
if (out.endsWith(' ')) |
|
2507 |
out.chop(1); |
|
2508 |
out += "\n "; |
|
2509 |
} |
|
2510 |
out += "0x" + QByteArray::number(ligatures.at(i), 16); |
|
2511 |
out += ", "; |
|
2512 |
} |
|
2513 |
||
2514 |
if (out.endsWith(' ')) |
|
2515 |
out.chop(1); |
|
2516 |
out += "\n};\n\n"; |
|
2517 |
||
2518 |
return out; |
|
2519 |
} |
|
2520 |
||
2521 |
QByteArray createCasingInfo() |
|
2522 |
{ |
|
2523 |
QByteArray out; |
|
2524 |
||
2525 |
out += "struct CasingInfo {\n" |
|
2526 |
" uint codePoint : 16;\n" |
|
2527 |
" uint flags : 8;\n" |
|
2528 |
" uint offset : 8;\n" |
|
2529 |
"};\n\n"; |
|
2530 |
||
2531 |
return out; |
|
2532 |
} |
|
2533 |
||
2534 |
int main(int, char **) |
|
2535 |
{ |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2536 |
initAgeMap(); |
0 | 2537 |
initCategoryMap(); |
2538 |
initDirectionMap(); |
|
2539 |
initDecompositionMap(); |
|
2540 |
initGraphemeBreak(); |
|
2541 |
initWordBreak(); |
|
2542 |
initSentenceBreak(); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2543 |
initLineBreak(); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2544 |
|
0 | 2545 |
readUnicodeData(); |
2546 |
readBidiMirroring(); |
|
2547 |
readArabicShaping(); |
|
2548 |
readDerivedAge(); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2549 |
readDerivedNormalizationProps(); |
0 | 2550 |
readSpecialCasing(); |
2551 |
readCaseFolding(); |
|
2552 |
// readBlocks(); |
|
2553 |
readScripts(); |
|
2554 |
readGraphemeBreak(); |
|
2555 |
readWordBreak(); |
|
2556 |
readSentenceBreak(); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2557 |
readLineBreak(); |
0 | 2558 |
|
2559 |
computeUniqueProperties(); |
|
2560 |
QByteArray properties = createPropertyInfo(); |
|
2561 |
QByteArray compositions = createCompositionInfo(); |
|
2562 |
QByteArray ligatures = createLigatureInfo(); |
|
2563 |
QByteArray normalizationCorrections = createNormalizationCorrections(); |
|
2564 |
QByteArray scriptEnumDeclaration = createScriptEnumDeclaration(); |
|
2565 |
QByteArray scriptTableDeclaration = createScriptTableDeclaration(); |
|
2566 |
||
2567 |
QByteArray header = |
|
2568 |
"/****************************************************************************\n" |
|
2569 |
"**\n" |
|
18
2f34d5167611
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
2570 |
"** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).\n" |
0 | 2571 |
"** All rights reserved.\n" |
2572 |
"** Contact: Nokia Corporation (qt-info@nokia.com)\n" |
|
2573 |
"**\n" |
|
2574 |
"** This file is part of the QtCore module of the Qt Toolkit.\n" |
|
2575 |
"**\n" |
|
2576 |
"** $QT_BEGIN_LICENSE:LGPL$\n" |
|
2577 |
"** No Commercial Usage\n" |
|
2578 |
"** This file contains pre-release code and may not be distributed.\n" |
|
2579 |
"** You may use this file in accordance with the terms and conditions\n" |
|
2580 |
"** contained in the Technology Preview License Agreement accompanying\n" |
|
2581 |
"** this package.\n" |
|
2582 |
"**\n" |
|
2583 |
"** GNU Lesser General Public License Usage\n" |
|
2584 |
"** Alternatively, this file may be used under the terms of the GNU Lesser\n" |
|
2585 |
"** General Public License version 2.1 as published by the Free Software\n" |
|
2586 |
"** Foundation and appearing in the file LICENSE.LGPL included in the\n" |
|
2587 |
"** packaging of this file. Please review the following information to\n" |
|
2588 |
"** ensure the GNU Lesser General Public License version 2.1 requirements\n" |
|
2589 |
"** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.\n" |
|
2590 |
"**\n" |
|
2591 |
"** In addition, as a special exception, Nokia gives you certain additional\n" |
|
2592 |
"** rights. These rights are described in the Nokia Qt LGPL Exception\n" |
|
2593 |
"** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.\n" |
|
2594 |
"**\n" |
|
2595 |
"** If you have questions regarding the use of this file, please contact\n" |
|
2596 |
"** Nokia at qt-info@nokia.com.\n" |
|
2597 |
"**\n" |
|
2598 |
"**\n" |
|
2599 |
"**\n" |
|
2600 |
"**\n" |
|
2601 |
"**\n" |
|
2602 |
"**\n" |
|
2603 |
"**\n" |
|
2604 |
"**\n" |
|
2605 |
"** $QT_END_LICENSE$\n" |
|
2606 |
"**\n" |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2607 |
"****************************************************************************/\n\n"; |
0 | 2608 |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2609 |
QByteArray note = |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2610 |
"/* This file is autogenerated from the Unicode "DATA_VERSION_S" database. Do not edit */\n\n"; |
0 | 2611 |
|
2612 |
QByteArray warning = |
|
2613 |
"//\n" |
|
2614 |
"// W A R N I N G\n" |
|
2615 |
"// -------------\n" |
|
2616 |
"//\n" |
|
2617 |
"// This file is not part of the Qt API. It exists for the convenience\n" |
|
2618 |
"// of internal files. This header file may change from version to version\n" |
|
2619 |
"// without notice, or even be removed.\n" |
|
2620 |
"//\n" |
|
2621 |
"// We mean it.\n" |
|
2622 |
"//\n\n"; |
|
2623 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2624 |
QFile f("../../src/corelib/tools/qunicodetables.cpp"); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2625 |
f.open(QFile::WriteOnly|QFile::Truncate); |
0 | 2626 |
f.write(header); |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2627 |
f.write(note); |
0 | 2628 |
f.write("QT_BEGIN_NAMESPACE\n\n"); |
2629 |
f.write(properties); |
|
2630 |
f.write(compositions); |
|
2631 |
f.write(ligatures); |
|
2632 |
f.write(normalizationCorrections); |
|
2633 |
f.write(scriptTableDeclaration); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2634 |
f.write("QT_END_NAMESPACE\n"); |
0 | 2635 |
f.close(); |
2636 |
||
2637 |
f.setFileName("../../src/corelib/tools/qunicodetables_p.h"); |
|
2638 |
f.open(QFile::WriteOnly | QFile::Truncate); |
|
2639 |
f.write(header); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2640 |
f.write(note); |
0 | 2641 |
f.write(warning); |
2642 |
f.write("#ifndef QUNICODETABLES_P_H\n" |
|
2643 |
"#define QUNICODETABLES_P_H\n\n" |
|
2644 |
"#include <QtCore/qchar.h>\n\n" |
|
2645 |
"QT_BEGIN_NAMESPACE\n\n"); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2646 |
f.write("#define UNICODE_DATA_VERSION "DATA_VERSION_STR"\n\n"); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2647 |
f.write("#define UNICODE_LAST_CODEPOINT "LAST_CODEPOINT_STR"\n\n"); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2648 |
f.write("namespace QUnicodeTables {\n\n"); |
0 | 2649 |
f.write(property_string); |
2650 |
f.write("\n"); |
|
2651 |
f.write(scriptEnumDeclaration); |
|
2652 |
f.write("\n"); |
|
2653 |
f.write(lineBreakClass); |
|
2654 |
f.write("\n"); |
|
2655 |
f.write(grapheme_break_string); |
|
2656 |
f.write("\n"); |
|
2657 |
f.write(word_break_string); |
|
2658 |
f.write("\n"); |
|
2659 |
f.write(sentence_break_string); |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2660 |
f.write("\n"); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2661 |
f.write(methods); |
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2662 |
f.write("} // namespace QUnicodeTables\n\n" |
0 | 2663 |
"QT_END_NAMESPACE\n\n" |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2664 |
"#endif // QUNICODETABLES_P_H\n"); |
0 | 2665 |
f.close(); |
2666 |
||
2667 |
qDebug() << "maxMirroredDiff = " << hex << maxMirroredDiff; |
|
2668 |
qDebug() << "maxLowerCaseDiff = " << hex << maxLowerCaseDiff; |
|
2669 |
qDebug() << "maxUpperCaseDiff = " << hex << maxUpperCaseDiff; |
|
2670 |
qDebug() << "maxTitleCaseDiff = " << hex << maxTitleCaseDiff; |
|
2671 |
qDebug() << "maxCaseFoldDiff = " << hex << maxCaseFoldDiff; |
|
2672 |
#if 0 |
|
2673 |
// dump(0, 0x7f); |
|
2674 |
// dump(0x620, 0x640); |
|
2675 |
// dump(0x10000, 0x10020); |
|
2676 |
// dump(0x10800, 0x10820); |
|
2677 |
||
2678 |
qDebug("decompositionLength used:"); |
|
2679 |
int totalcompositions = 0; |
|
2680 |
int sum = 0; |
|
2681 |
for (int i = 1; i < 20; ++i) { |
|
2682 |
qDebug(" length %d used %d times", i, decompositionLength.value(i, 0)); |
|
2683 |
totalcompositions += i*decompositionLength.value(i, 0); |
|
2684 |
sum += decompositionLength.value(i, 0); |
|
2685 |
} |
|
2686 |
qDebug(" len decomposition map %d, average length %f, num composed chars %d", |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
18
diff
changeset
|
2687 |
totalcompositions, (float)totalcompositions/(float)sum, sum); |
0 | 2688 |
qDebug("highest composed character %x", highestComposedCharacter); |
2689 |
qDebug("num ligatures = %d highest=%x, maxLength=%d", numLigatures, highestLigature, longestLigature); |
|
2690 |
||
2691 |
qBubbleSort(ligatures); |
|
2692 |
for (int i = 0; i < ligatures.size(); ++i) |
|
2693 |
qDebug("%s", ligatures.at(i).data()); |
|
2694 |
||
2695 |
// qDebug("combiningClass usage:"); |
|
2696 |
// int numClasses = 0; |
|
2697 |
// for (int i = 0; i < 255; ++i) { |
|
2698 |
// int num = combiningClassUsage.value(i, 0); |
|
2699 |
// if (num) { |
|
2700 |
// ++numClasses; |
|
2701 |
// qDebug(" combiningClass %d used %d times", i, num); |
|
2702 |
// } |
|
2703 |
// } |
|
2704 |
// qDebug("total of %d combining classes used", numClasses); |
|
2705 |
||
2706 |
#endif |
|
2707 |
} |