|
1 /**************************************************************************** |
|
2 ** |
|
3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). |
|
4 ** All rights reserved. |
|
5 ** Contact: Nokia Corporation (qt-info@nokia.com) |
|
6 ** |
|
7 ** This file is part of the plugins of the Qt Toolkit. |
|
8 ** |
|
9 ** $QT_BEGIN_LICENSE:LGPL$ |
|
10 ** No Commercial Usage |
|
11 ** This file contains pre-release code and may not be distributed. |
|
12 ** You may use this file in accordance with the terms and conditions |
|
13 ** contained in the Technology Preview License Agreement accompanying |
|
14 ** this package. |
|
15 ** |
|
16 ** GNU Lesser General Public License Usage |
|
17 ** Alternatively, this file may be used under the terms of the GNU Lesser |
|
18 ** General Public License version 2.1 as published by the Free Software |
|
19 ** Foundation and appearing in the file LICENSE.LGPL included in the |
|
20 ** packaging of this file. Please review the following information to |
|
21 ** ensure the GNU Lesser General Public License version 2.1 requirements |
|
22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
|
23 ** |
|
24 ** In addition, as a special exception, Nokia gives you certain additional |
|
25 ** rights. These rights are described in the Nokia Qt LGPL Exception |
|
26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
|
27 ** |
|
28 ** If you have questions regarding the use of this file, please contact |
|
29 ** Nokia at qt-info@nokia.com. |
|
30 ** |
|
31 ** |
|
32 ** |
|
33 ** |
|
34 ** |
|
35 ** |
|
36 ** |
|
37 ** |
|
38 ** $QT_END_LICENSE$ |
|
39 ** |
|
40 ****************************************************************************/ |
|
41 |
|
42 // Most of the code here was originally written by Serika Kurusugawa, |
|
43 // a.k.a. Junji Takagi, and is included in Qt with the author's permission |
|
44 // and the grateful thanks of the Qt team. |
|
45 |
|
46 /*! \class QJisCodec |
|
47 \reentrant |
|
48 \internal |
|
49 */ |
|
50 |
|
51 #include "qjiscodec.h" |
|
52 #include "qlist.h" |
|
53 |
|
54 QT_BEGIN_NAMESPACE |
|
55 |
|
56 #ifndef QT_NO_TEXTCODEC |
|
57 enum { |
|
58 Esc = 0x1b, |
|
59 So = 0x0e, // Shift Out |
|
60 Si = 0x0f, // Shift In |
|
61 |
|
62 ReverseSolidus = 0x5c, |
|
63 YenSign = 0x5c, |
|
64 Tilde = 0x7e, |
|
65 Overline = 0x7e |
|
66 }; |
|
67 |
|
68 #define IsKana(c) (((c) >= 0xa1) && ((c) <= 0xdf)) |
|
69 #define IsJisChar(c) (((c) >= 0x21) && ((c) <= 0x7e)) |
|
70 |
|
71 #define QValidChar(u) ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter)) |
|
72 |
|
73 enum Iso2022State{ Ascii, MinState = Ascii, |
|
74 JISX0201_Latin, JISX0201_Kana, |
|
75 JISX0208_1978, JISX0208_1983, |
|
76 JISX0212, MaxState = JISX0212, |
|
77 UnknownState }; |
|
78 |
|
79 static const char Esc_CHARS[] = "()*+-./"; |
|
80 |
|
81 static const char Esc_Ascii[] = {Esc, '(', 'B', 0 }; |
|
82 static const char Esc_JISX0201_Latin[] = {Esc, '(', 'J', 0 }; |
|
83 static const char Esc_JISX0201_Kana[] = {Esc, '(', 'I', 0 }; |
|
84 static const char Esc_JISX0208_1978[] = {Esc, '$', '@', 0 }; |
|
85 static const char Esc_JISX0208_1983[] = {Esc, '$', 'B', 0 }; |
|
86 static const char Esc_JISX0212[] = {Esc, '$', '(', 'D', 0 }; |
|
87 static const char * const Esc_SEQ[] = { Esc_Ascii, |
|
88 Esc_JISX0201_Latin, |
|
89 Esc_JISX0201_Kana, |
|
90 Esc_JISX0208_1978, |
|
91 Esc_JISX0208_1983, |
|
92 Esc_JISX0212 }; |
|
93 |
|
94 /*! \internal */ |
|
95 QJisCodec::QJisCodec() : conv(QJpUnicodeConv::newConverter(QJpUnicodeConv::Default)) |
|
96 { |
|
97 } |
|
98 |
|
99 |
|
100 /*! \internal */ |
|
101 QJisCodec::~QJisCodec() |
|
102 { |
|
103 delete (QJpUnicodeConv*)conv; |
|
104 conv = 0; |
|
105 } |
|
106 |
|
107 QByteArray QJisCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *cs) const |
|
108 { |
|
109 char replacement = '?'; |
|
110 if (cs) { |
|
111 if (cs->flags & ConvertInvalidToNull) |
|
112 replacement = 0; |
|
113 } |
|
114 int invalid = 0; |
|
115 |
|
116 QByteArray result; |
|
117 Iso2022State state = Ascii; |
|
118 Iso2022State prev = Ascii; |
|
119 for (int i = 0; i < len; i++) { |
|
120 QChar ch = uc[i]; |
|
121 uint j; |
|
122 if (ch.row() == 0x00 && ch.cell() < 0x80) { |
|
123 // Ascii |
|
124 if (state != JISX0201_Latin || |
|
125 ch.cell() == ReverseSolidus || ch.cell() == Tilde) { |
|
126 state = Ascii; |
|
127 } |
|
128 j = ch.cell(); |
|
129 } else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) { |
|
130 if (j < 0x80) { |
|
131 // JIS X 0201 Latin |
|
132 if (state != Ascii || |
|
133 ch.cell() == YenSign || ch.cell() == Overline) { |
|
134 state = JISX0201_Latin; |
|
135 } |
|
136 } else { |
|
137 // JIS X 0201 Kana |
|
138 state = JISX0201_Kana; |
|
139 j &= 0x7f; |
|
140 } |
|
141 } else if ((j = conv->unicodeToJisx0208(ch.row(), ch.cell())) != 0) { |
|
142 // JIS X 0208 |
|
143 state = JISX0208_1983; |
|
144 } else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) { |
|
145 // JIS X 0212 |
|
146 state = JISX0212; |
|
147 } else { |
|
148 // Invalid |
|
149 state = UnknownState; |
|
150 j = replacement; |
|
151 ++invalid; |
|
152 } |
|
153 if (state != prev) { |
|
154 if (state == UnknownState) { |
|
155 result += Esc_Ascii; |
|
156 } else { |
|
157 result += Esc_SEQ[state - MinState]; |
|
158 } |
|
159 prev = state; |
|
160 } |
|
161 if (j < 0x0100) { |
|
162 result += j & 0xff; |
|
163 } else { |
|
164 result += (j >> 8) & 0xff; |
|
165 result += j & 0xff; |
|
166 } |
|
167 } |
|
168 if (prev != Ascii) { |
|
169 result += Esc_Ascii; |
|
170 } |
|
171 |
|
172 if (cs) { |
|
173 cs->invalidChars += invalid; |
|
174 } |
|
175 return result; |
|
176 } |
|
177 |
|
178 QString QJisCodec::convertToUnicode(const char* chars, int len, ConverterState *cs) const |
|
179 { |
|
180 uchar buf[4] = {0, 0, 0, 0}; |
|
181 int nbuf = 0; |
|
182 Iso2022State state = Ascii, prev = Ascii; |
|
183 bool esc = false; |
|
184 QChar replacement = QChar::ReplacementCharacter; |
|
185 if (cs) { |
|
186 if (cs->flags & ConvertInvalidToNull) |
|
187 replacement = QChar::Null; |
|
188 nbuf = cs->remainingChars; |
|
189 buf[0] = (cs->state_data[0] >> 24) & 0xff; |
|
190 buf[1] = (cs->state_data[0] >> 16) & 0xff; |
|
191 buf[2] = (cs->state_data[0] >> 8) & 0xff; |
|
192 buf[3] = (cs->state_data[0] >> 0) & 0xff; |
|
193 state = (Iso2022State)((cs->state_data[1] >> 0) & 0xff); |
|
194 prev = (Iso2022State)((cs->state_data[1] >> 8) & 0xff); |
|
195 esc = cs->state_data[2]; |
|
196 } |
|
197 int invalid = 0; |
|
198 |
|
199 QString result; |
|
200 for (int i=0; i<len; i++) { |
|
201 uchar ch = chars[i]; |
|
202 if (esc) { |
|
203 // Escape sequence |
|
204 state = UnknownState; |
|
205 switch (nbuf) { |
|
206 case 0: |
|
207 if (ch == '$' || strchr(Esc_CHARS, ch)) { |
|
208 buf[nbuf++] = ch; |
|
209 } else { |
|
210 nbuf = 0; |
|
211 esc = false; |
|
212 } |
|
213 break; |
|
214 case 1: |
|
215 if (buf[0] == '$') { |
|
216 if (strchr(Esc_CHARS, ch)) { |
|
217 buf[nbuf++] = ch; |
|
218 } else { |
|
219 switch (ch) { |
|
220 case '@': |
|
221 state = JISX0208_1978; // Esc $ @ |
|
222 break; |
|
223 case 'B': |
|
224 state = JISX0208_1983; // Esc $ B |
|
225 break; |
|
226 } |
|
227 nbuf = 0; |
|
228 esc = false; |
|
229 } |
|
230 } else { |
|
231 if (buf[0] == '(') { |
|
232 switch (ch) { |
|
233 case 'B': |
|
234 state = Ascii; // Esc (B |
|
235 break; |
|
236 case 'I': |
|
237 state = JISX0201_Kana; // Esc (I |
|
238 break; |
|
239 case 'J': |
|
240 state = JISX0201_Latin; // Esc (J |
|
241 break; |
|
242 } |
|
243 } |
|
244 nbuf = 0; |
|
245 esc = false; |
|
246 } |
|
247 break; |
|
248 case 2: |
|
249 if (buf[1] == '(') { |
|
250 switch (ch) { |
|
251 case 'D': |
|
252 state = JISX0212; // Esc $ (D |
|
253 break; |
|
254 } |
|
255 } |
|
256 nbuf = 0; |
|
257 esc = false; |
|
258 break; |
|
259 } |
|
260 } else { |
|
261 if (ch == Esc) { |
|
262 // Escape sequence |
|
263 nbuf = 0; |
|
264 esc = true; |
|
265 } else if (ch == So) { |
|
266 // Shift out |
|
267 prev = state; |
|
268 state = JISX0201_Kana; |
|
269 nbuf = 0; |
|
270 } else if (ch == Si) { |
|
271 // Shift in |
|
272 if (prev == Ascii || prev == JISX0201_Latin) { |
|
273 state = prev; |
|
274 } else { |
|
275 state = Ascii; |
|
276 } |
|
277 nbuf = 0; |
|
278 } else { |
|
279 uint u; |
|
280 switch (nbuf) { |
|
281 case 0: |
|
282 switch (state) { |
|
283 case Ascii: |
|
284 if (ch < 0x80) { |
|
285 result += QLatin1Char(ch); |
|
286 break; |
|
287 } |
|
288 /* fall through */ |
|
289 case JISX0201_Latin: |
|
290 u = conv->jisx0201ToUnicode(ch); |
|
291 result += QValidChar(u); |
|
292 break; |
|
293 case JISX0201_Kana: |
|
294 u = conv->jisx0201ToUnicode(ch | 0x80); |
|
295 result += QValidChar(u); |
|
296 break; |
|
297 case JISX0208_1978: |
|
298 case JISX0208_1983: |
|
299 case JISX0212: |
|
300 buf[nbuf++] = ch; |
|
301 break; |
|
302 default: |
|
303 result += QChar::ReplacementCharacter; |
|
304 break; |
|
305 } |
|
306 break; |
|
307 case 1: |
|
308 switch (state) { |
|
309 case JISX0208_1978: |
|
310 case JISX0208_1983: |
|
311 u = conv->jisx0208ToUnicode(buf[0] & 0x7f, ch & 0x7f); |
|
312 result += QValidChar(u); |
|
313 break; |
|
314 case JISX0212: |
|
315 u = conv->jisx0212ToUnicode(buf[0] & 0x7f, ch & 0x7f); |
|
316 result += QValidChar(u); |
|
317 break; |
|
318 default: |
|
319 result += replacement; |
|
320 ++invalid; |
|
321 break; |
|
322 } |
|
323 nbuf = 0; |
|
324 break; |
|
325 } |
|
326 } |
|
327 } |
|
328 } |
|
329 |
|
330 if (cs) { |
|
331 cs->remainingChars = nbuf; |
|
332 cs->invalidChars += invalid; |
|
333 cs->state_data[0] = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3]; |
|
334 cs->state_data[1] = (prev << 8) + state; |
|
335 cs->state_data[2] = esc; |
|
336 } |
|
337 |
|
338 return result; |
|
339 } |
|
340 |
|
341 |
|
342 |
|
343 /*! \internal */ |
|
344 int QJisCodec::_mibEnum() |
|
345 { |
|
346 return 39; |
|
347 } |
|
348 |
|
349 /*! \internal */ |
|
350 QByteArray QJisCodec::_name() |
|
351 { |
|
352 return "ISO-2022-JP"; |
|
353 } |
|
354 |
|
355 /*! |
|
356 Returns the codec's mime name. |
|
357 */ |
|
358 QList<QByteArray> QJisCodec::_aliases() |
|
359 { |
|
360 QList<QByteArray> list; |
|
361 list << "JIS7"; // Qt 3 compat |
|
362 return list; |
|
363 } |
|
364 |
|
365 #endif // QT_NO_TEXTCODEC |
|
366 |
|
367 QT_END_NAMESPACE |