author | Eckhart Koeppen <eckhart.koppen@nokia.com> |
Fri, 16 Apr 2010 11:39:52 +0300 | |
branch | RCL_3 |
changeset 9 | 740e5562c97f |
parent 4 | 3b1da2848fc7 |
permissions | -rw-r--r-- |
0 | 1 |
/**************************************************************************** |
2 |
** |
|
4
3b1da2848fc7
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
3 |
** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). |
0 | 4 |
** All rights reserved. |
5 |
** Contact: Nokia Corporation (qt-info@nokia.com) |
|
6 |
** |
|
7 |
** This file is part of the plugins of the Qt Toolkit. |
|
8 |
** |
|
9 |
** $QT_BEGIN_LICENSE:LGPL$ |
|
10 |
** No Commercial Usage |
|
11 |
** This file contains pre-release code and may not be distributed. |
|
12 |
** You may use this file in accordance with the terms and conditions |
|
13 |
** contained in the Technology Preview License Agreement accompanying |
|
14 |
** this package. |
|
15 |
** |
|
16 |
** GNU Lesser General Public License Usage |
|
17 |
** Alternatively, this file may be used under the terms of the GNU Lesser |
|
18 |
** General Public License version 2.1 as published by the Free Software |
|
19 |
** Foundation and appearing in the file LICENSE.LGPL included in the |
|
20 |
** packaging of this file. Please review the following information to |
|
21 |
** ensure the GNU Lesser General Public License version 2.1 requirements |
|
22 |
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
|
23 |
** |
|
24 |
** In addition, as a special exception, Nokia gives you certain additional |
|
25 |
** rights. These rights are described in the Nokia Qt LGPL Exception |
|
26 |
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
|
27 |
** |
|
28 |
** If you have questions regarding the use of this file, please contact |
|
29 |
** Nokia at qt-info@nokia.com. |
|
30 |
** |
|
31 |
** |
|
32 |
** |
|
33 |
** |
|
34 |
** |
|
35 |
** |
|
36 |
** |
|
37 |
** |
|
38 |
** $QT_END_LICENSE$ |
|
39 |
** |
|
40 |
****************************************************************************/ |
|
41 |
||
42 |
// Most of the code here was originally written by Serika Kurusugawa, |
|
43 |
// a.k.a. Junji Takagi, and is included in Qt with the author's permission |
|
44 |
// and the grateful thanks of the Qt team. |
|
45 |
||
46 |
/*! \class QJisCodec |
|
47 |
\reentrant |
|
48 |
\internal |
|
49 |
*/ |
|
50 |
||
51 |
#include "qjiscodec.h" |
|
52 |
#include "qlist.h" |
|
53 |
||
54 |
QT_BEGIN_NAMESPACE |
|
55 |
||
56 |
#ifndef QT_NO_TEXTCODEC |
|
57 |
enum { |
|
58 |
Esc = 0x1b, |
|
59 |
So = 0x0e, // Shift Out |
|
60 |
Si = 0x0f, // Shift In |
|
61 |
||
62 |
ReverseSolidus = 0x5c, |
|
63 |
YenSign = 0x5c, |
|
64 |
Tilde = 0x7e, |
|
65 |
Overline = 0x7e |
|
66 |
}; |
|
67 |
||
68 |
#define IsKana(c) (((c) >= 0xa1) && ((c) <= 0xdf)) |
|
69 |
#define IsJisChar(c) (((c) >= 0x21) && ((c) <= 0x7e)) |
|
70 |
||
71 |
#define QValidChar(u) ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter)) |
|
72 |
||
73 |
enum Iso2022State{ Ascii, MinState = Ascii, |
|
74 |
JISX0201_Latin, JISX0201_Kana, |
|
75 |
JISX0208_1978, JISX0208_1983, |
|
76 |
JISX0212, MaxState = JISX0212, |
|
77 |
UnknownState }; |
|
78 |
||
79 |
static const char Esc_CHARS[] = "()*+-./"; |
|
80 |
||
81 |
static const char Esc_Ascii[] = {Esc, '(', 'B', 0 }; |
|
82 |
static const char Esc_JISX0201_Latin[] = {Esc, '(', 'J', 0 }; |
|
83 |
static const char Esc_JISX0201_Kana[] = {Esc, '(', 'I', 0 }; |
|
84 |
static const char Esc_JISX0208_1978[] = {Esc, '$', '@', 0 }; |
|
85 |
static const char Esc_JISX0208_1983[] = {Esc, '$', 'B', 0 }; |
|
86 |
static const char Esc_JISX0212[] = {Esc, '$', '(', 'D', 0 }; |
|
87 |
static const char * const Esc_SEQ[] = { Esc_Ascii, |
|
88 |
Esc_JISX0201_Latin, |
|
89 |
Esc_JISX0201_Kana, |
|
90 |
Esc_JISX0208_1978, |
|
91 |
Esc_JISX0208_1983, |
|
92 |
Esc_JISX0212 }; |
|
93 |
||
94 |
/*! \internal */ |
|
95 |
QJisCodec::QJisCodec() : conv(QJpUnicodeConv::newConverter(QJpUnicodeConv::Default)) |
|
96 |
{ |
|
97 |
} |
|
98 |
||
99 |
||
100 |
/*! \internal */ |
|
101 |
QJisCodec::~QJisCodec() |
|
102 |
{ |
|
103 |
delete (QJpUnicodeConv*)conv; |
|
104 |
conv = 0; |
|
105 |
} |
|
106 |
||
107 |
QByteArray QJisCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *cs) const |
|
108 |
{ |
|
109 |
char replacement = '?'; |
|
110 |
if (cs) { |
|
111 |
if (cs->flags & ConvertInvalidToNull) |
|
112 |
replacement = 0; |
|
113 |
} |
|
114 |
int invalid = 0; |
|
115 |
||
116 |
QByteArray result; |
|
117 |
Iso2022State state = Ascii; |
|
118 |
Iso2022State prev = Ascii; |
|
119 |
for (int i = 0; i < len; i++) { |
|
120 |
QChar ch = uc[i]; |
|
121 |
uint j; |
|
122 |
if (ch.row() == 0x00 && ch.cell() < 0x80) { |
|
123 |
// Ascii |
|
124 |
if (state != JISX0201_Latin || |
|
125 |
ch.cell() == ReverseSolidus || ch.cell() == Tilde) { |
|
126 |
state = Ascii; |
|
127 |
} |
|
128 |
j = ch.cell(); |
|
129 |
} else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) { |
|
130 |
if (j < 0x80) { |
|
131 |
// JIS X 0201 Latin |
|
132 |
if (state != Ascii || |
|
133 |
ch.cell() == YenSign || ch.cell() == Overline) { |
|
134 |
state = JISX0201_Latin; |
|
135 |
} |
|
136 |
} else { |
|
137 |
// JIS X 0201 Kana |
|
138 |
state = JISX0201_Kana; |
|
139 |
j &= 0x7f; |
|
140 |
} |
|
141 |
} else if ((j = conv->unicodeToJisx0208(ch.row(), ch.cell())) != 0) { |
|
142 |
// JIS X 0208 |
|
143 |
state = JISX0208_1983; |
|
144 |
} else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) { |
|
145 |
// JIS X 0212 |
|
146 |
state = JISX0212; |
|
147 |
} else { |
|
148 |
// Invalid |
|
149 |
state = UnknownState; |
|
150 |
j = replacement; |
|
151 |
++invalid; |
|
152 |
} |
|
153 |
if (state != prev) { |
|
154 |
if (state == UnknownState) { |
|
155 |
result += Esc_Ascii; |
|
156 |
} else { |
|
157 |
result += Esc_SEQ[state - MinState]; |
|
158 |
} |
|
159 |
prev = state; |
|
160 |
} |
|
161 |
if (j < 0x0100) { |
|
162 |
result += j & 0xff; |
|
163 |
} else { |
|
164 |
result += (j >> 8) & 0xff; |
|
165 |
result += j & 0xff; |
|
166 |
} |
|
167 |
} |
|
168 |
if (prev != Ascii) { |
|
169 |
result += Esc_Ascii; |
|
170 |
} |
|
171 |
||
172 |
if (cs) { |
|
173 |
cs->invalidChars += invalid; |
|
174 |
} |
|
175 |
return result; |
|
176 |
} |
|
177 |
||
178 |
QString QJisCodec::convertToUnicode(const char* chars, int len, ConverterState *cs) const |
|
179 |
{ |
|
180 |
uchar buf[4] = {0, 0, 0, 0}; |
|
181 |
int nbuf = 0; |
|
182 |
Iso2022State state = Ascii, prev = Ascii; |
|
183 |
bool esc = false; |
|
184 |
QChar replacement = QChar::ReplacementCharacter; |
|
185 |
if (cs) { |
|
186 |
if (cs->flags & ConvertInvalidToNull) |
|
187 |
replacement = QChar::Null; |
|
188 |
nbuf = cs->remainingChars; |
|
189 |
buf[0] = (cs->state_data[0] >> 24) & 0xff; |
|
190 |
buf[1] = (cs->state_data[0] >> 16) & 0xff; |
|
191 |
buf[2] = (cs->state_data[0] >> 8) & 0xff; |
|
192 |
buf[3] = (cs->state_data[0] >> 0) & 0xff; |
|
193 |
state = (Iso2022State)((cs->state_data[1] >> 0) & 0xff); |
|
194 |
prev = (Iso2022State)((cs->state_data[1] >> 8) & 0xff); |
|
195 |
esc = cs->state_data[2]; |
|
196 |
} |
|
197 |
int invalid = 0; |
|
198 |
||
199 |
QString result; |
|
200 |
for (int i=0; i<len; i++) { |
|
201 |
uchar ch = chars[i]; |
|
202 |
if (esc) { |
|
203 |
// Escape sequence |
|
204 |
state = UnknownState; |
|
205 |
switch (nbuf) { |
|
206 |
case 0: |
|
207 |
if (ch == '$' || strchr(Esc_CHARS, ch)) { |
|
208 |
buf[nbuf++] = ch; |
|
209 |
} else { |
|
210 |
nbuf = 0; |
|
211 |
esc = false; |
|
212 |
} |
|
213 |
break; |
|
214 |
case 1: |
|
215 |
if (buf[0] == '$') { |
|
216 |
if (strchr(Esc_CHARS, ch)) { |
|
217 |
buf[nbuf++] = ch; |
|
218 |
} else { |
|
219 |
switch (ch) { |
|
220 |
case '@': |
|
221 |
state = JISX0208_1978; // Esc $ @ |
|
222 |
break; |
|
223 |
case 'B': |
|
224 |
state = JISX0208_1983; // Esc $ B |
|
225 |
break; |
|
226 |
} |
|
227 |
nbuf = 0; |
|
228 |
esc = false; |
|
229 |
} |
|
230 |
} else { |
|
231 |
if (buf[0] == '(') { |
|
232 |
switch (ch) { |
|
233 |
case 'B': |
|
234 |
state = Ascii; // Esc (B |
|
235 |
break; |
|
236 |
case 'I': |
|
237 |
state = JISX0201_Kana; // Esc (I |
|
238 |
break; |
|
239 |
case 'J': |
|
240 |
state = JISX0201_Latin; // Esc (J |
|
241 |
break; |
|
242 |
} |
|
243 |
} |
|
244 |
nbuf = 0; |
|
245 |
esc = false; |
|
246 |
} |
|
247 |
break; |
|
248 |
case 2: |
|
249 |
if (buf[1] == '(') { |
|
250 |
switch (ch) { |
|
251 |
case 'D': |
|
252 |
state = JISX0212; // Esc $ (D |
|
253 |
break; |
|
254 |
} |
|
255 |
} |
|
256 |
nbuf = 0; |
|
257 |
esc = false; |
|
258 |
break; |
|
259 |
} |
|
260 |
} else { |
|
261 |
if (ch == Esc) { |
|
262 |
// Escape sequence |
|
263 |
nbuf = 0; |
|
264 |
esc = true; |
|
265 |
} else if (ch == So) { |
|
266 |
// Shift out |
|
267 |
prev = state; |
|
268 |
state = JISX0201_Kana; |
|
269 |
nbuf = 0; |
|
270 |
} else if (ch == Si) { |
|
271 |
// Shift in |
|
272 |
if (prev == Ascii || prev == JISX0201_Latin) { |
|
273 |
state = prev; |
|
274 |
} else { |
|
275 |
state = Ascii; |
|
276 |
} |
|
277 |
nbuf = 0; |
|
278 |
} else { |
|
279 |
uint u; |
|
280 |
switch (nbuf) { |
|
281 |
case 0: |
|
282 |
switch (state) { |
|
283 |
case Ascii: |
|
284 |
if (ch < 0x80) { |
|
285 |
result += QLatin1Char(ch); |
|
286 |
break; |
|
287 |
} |
|
288 |
/* fall through */ |
|
289 |
case JISX0201_Latin: |
|
290 |
u = conv->jisx0201ToUnicode(ch); |
|
291 |
result += QValidChar(u); |
|
292 |
break; |
|
293 |
case JISX0201_Kana: |
|
294 |
u = conv->jisx0201ToUnicode(ch | 0x80); |
|
295 |
result += QValidChar(u); |
|
296 |
break; |
|
297 |
case JISX0208_1978: |
|
298 |
case JISX0208_1983: |
|
299 |
case JISX0212: |
|
300 |
buf[nbuf++] = ch; |
|
301 |
break; |
|
302 |
default: |
|
303 |
result += QChar::ReplacementCharacter; |
|
304 |
break; |
|
305 |
} |
|
306 |
break; |
|
307 |
case 1: |
|
308 |
switch (state) { |
|
309 |
case JISX0208_1978: |
|
310 |
case JISX0208_1983: |
|
311 |
u = conv->jisx0208ToUnicode(buf[0] & 0x7f, ch & 0x7f); |
|
312 |
result += QValidChar(u); |
|
313 |
break; |
|
314 |
case JISX0212: |
|
315 |
u = conv->jisx0212ToUnicode(buf[0] & 0x7f, ch & 0x7f); |
|
316 |
result += QValidChar(u); |
|
317 |
break; |
|
318 |
default: |
|
319 |
result += replacement; |
|
320 |
++invalid; |
|
321 |
break; |
|
322 |
} |
|
323 |
nbuf = 0; |
|
324 |
break; |
|
325 |
} |
|
326 |
} |
|
327 |
} |
|
328 |
} |
|
329 |
||
330 |
if (cs) { |
|
331 |
cs->remainingChars = nbuf; |
|
332 |
cs->invalidChars += invalid; |
|
333 |
cs->state_data[0] = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3]; |
|
334 |
cs->state_data[1] = (prev << 8) + state; |
|
335 |
cs->state_data[2] = esc; |
|
336 |
} |
|
337 |
||
338 |
return result; |
|
339 |
} |
|
340 |
||
341 |
||
342 |
||
343 |
/*! \internal */ |
|
344 |
int QJisCodec::_mibEnum() |
|
345 |
{ |
|
346 |
return 39; |
|
347 |
} |
|
348 |
||
349 |
/*! \internal */ |
|
350 |
QByteArray QJisCodec::_name() |
|
351 |
{ |
|
352 |
return "ISO-2022-JP"; |
|
353 |
} |
|
354 |
||
355 |
/*! |
|
356 |
Returns the codec's mime name. |
|
357 |
*/ |
|
358 |
QList<QByteArray> QJisCodec::_aliases() |
|
359 |
{ |
|
360 |
QList<QByteArray> list; |
|
361 |
list << "JIS7"; // Qt 3 compat |
|
362 |
return list; |
|
363 |
} |
|
364 |
||
365 |
#endif // QT_NO_TEXTCODEC |
|
366 |
||
367 |
QT_END_NAMESPACE |