58 #define Is2ndByteIn4Bytes(c) (InRange((c), 0x30, 0x39)) |
58 #define Is2ndByteIn4Bytes(c) (InRange((c), 0x30, 0x39)) |
59 #define Is2ndByte(c) (Is2ndByteIn2Bytes(c) || Is2ndByteIn4Bytes(c)) |
59 #define Is2ndByte(c) (Is2ndByteIn2Bytes(c) || Is2ndByteIn4Bytes(c)) |
60 #define Is3rdByte(c) (InRange((c), 0x81, 0xFE)) |
60 #define Is3rdByte(c) (InRange((c), 0x81, 0xFE)) |
61 #define Is4thByte(c) (InRange((c), 0x30, 0x39)) |
61 #define Is4thByte(c) (InRange((c), 0x30, 0x39)) |
62 |
62 |
63 #define QValidChar(u) ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter)) |
63 #define qValidChar(u) ((u) ? (u) : static_cast<ushort>(QChar::ReplacementCharacter)) |
64 |
64 |
65 /* User-defined areas: UDA 1: 0xAAA1 - 0xAFFE (564/0) |
65 /* User-defined areas: UDA 1: 0xAAA1 - 0xAFFE (564/0) |
66 UDA 2: 0xF8A1 - 0xFEFE (658/0) |
66 UDA 2: 0xF8A1 - 0xFEFE (658/0) |
67 UDA 3: 0xA140 - 0xA7A0 (672/0) */ |
67 UDA 3: 0xA140 - 0xA7A0 (672/0) */ |
68 #define IsUDA1(a, b) (InRange((a), 0xAA, 0xAF) && InRange((b), 0xA1, 0xFE)) |
68 #define IsUDA1(a, b) (InRange((a), 0xAA, 0xAF) && InRange((b), 0xA1, 0xFE)) |
158 |
158 |
159 QString QGb18030Codec::convertToUnicode(const char* chars, int len, ConverterState *state) const |
159 QString QGb18030Codec::convertToUnicode(const char* chars, int len, ConverterState *state) const |
160 { |
160 { |
161 uchar buf[4]; |
161 uchar buf[4]; |
162 int nbuf = 0; |
162 int nbuf = 0; |
163 QChar replacement = QChar::ReplacementCharacter; |
163 ushort replacement = QChar::ReplacementCharacter; |
164 if (state) { |
164 if (state) { |
165 if (state->flags & ConvertInvalidToNull) |
165 if (state->flags & ConvertInvalidToNull) |
166 replacement = QChar::Null; |
166 replacement = QChar::Null; |
167 nbuf = state->remainingChars; |
167 nbuf = state->remainingChars; |
168 buf[0] = (state->state_data[0] >> 24) & 0xff; |
168 buf[0] = (state->state_data[0] >> 24) & 0xff; |
171 buf[3] = (state->state_data[0] >> 0) & 0xff; |
171 buf[3] = (state->state_data[0] >> 0) & 0xff; |
172 } |
172 } |
173 int invalid = 0; |
173 int invalid = 0; |
174 |
174 |
175 QString result; |
175 QString result; |
|
176 result.resize(len); |
|
177 int unicodeLen = 0; |
|
178 ushort *const resultData = reinterpret_cast<ushort*>(result.data()); |
176 //qDebug("QGb18030Decoder::toUnicode(const char* chars, int len = %d)", len); |
179 //qDebug("QGb18030Decoder::toUnicode(const char* chars, int len = %d)", len); |
177 for (int i = 0; i < len; i++) { |
180 for (int i = 0; i < len; i++) { |
178 uchar ch = chars[i]; |
181 uchar ch = chars[i]; |
179 switch (nbuf) { |
182 switch (nbuf) { |
180 case 0: |
183 case 0: |
181 if (ch < 0x80) { |
184 if (ch < 0x80) { |
182 // ASCII |
185 // ASCII |
183 result += QLatin1Char(ch); |
186 resultData[unicodeLen] = ch; |
|
187 ++unicodeLen; |
184 } else if (Is1stByte(ch)) { |
188 } else if (Is1stByte(ch)) { |
185 // GB18030? |
189 // GB18030? |
186 buf[0] = ch; |
190 buf[0] = ch; |
187 nbuf = 1; |
191 nbuf = 1; |
188 } else { |
192 } else { |
189 // Invalid |
193 // Invalid |
190 result += replacement; |
194 resultData[unicodeLen] = replacement; |
|
195 ++unicodeLen; |
191 ++invalid; |
196 ++invalid; |
192 } |
197 } |
193 break; |
198 break; |
194 case 1: |
199 case 1: |
195 // GB18030 2 bytes |
200 // GB18030 2 bytes |
196 if (Is2ndByteIn2Bytes(ch)) { |
201 if (Is2ndByteIn2Bytes(ch)) { |
197 buf[1] = ch; |
202 buf[1] = ch; |
198 int clen = 2; |
203 int clen = 2; |
199 uint u = qt_Gb18030ToUnicode(buf, clen); |
204 uint u = qt_Gb18030ToUnicode(buf, clen); |
200 if (clen == 2) { |
205 if (clen == 2) { |
201 result += QValidChar(u); |
206 resultData[unicodeLen] = qValidChar(static_cast<ushort>(u)); |
|
207 ++unicodeLen; |
202 } else { |
208 } else { |
203 result += replacement; |
209 resultData[unicodeLen] = replacement; |
|
210 ++unicodeLen; |
204 ++invalid; |
211 ++invalid; |
205 } |
212 } |
206 nbuf = 0; |
213 nbuf = 0; |
207 } else if (Is2ndByteIn4Bytes(ch)) { |
214 } else if (Is2ndByteIn4Bytes(ch)) { |
208 buf[1] = ch; |
215 buf[1] = ch; |
209 nbuf = 2; |
216 nbuf = 2; |
210 } else { |
217 } else { |
211 // Error |
218 // Error |
212 result += replacement; |
219 resultData[unicodeLen] = replacement; |
|
220 ++unicodeLen; |
213 ++invalid; |
221 ++invalid; |
214 nbuf = 0; |
222 nbuf = 0; |
215 } |
223 } |
216 break; |
224 break; |
217 case 2: |
225 case 2: |
218 // GB18030 3 bytes |
226 // GB18030 3 bytes |
219 if (Is3rdByte(ch)) { |
227 if (Is3rdByte(ch)) { |
220 buf[2] = ch; |
228 buf[2] = ch; |
221 nbuf = 3; |
229 nbuf = 3; |
222 } else { |
230 } else { |
223 result += replacement; |
231 resultData[unicodeLen] = replacement; |
|
232 ++unicodeLen; |
224 ++invalid; |
233 ++invalid; |
225 nbuf = 0; |
234 nbuf = 0; |
226 } |
235 } |
227 break; |
236 break; |
228 case 3: |
237 case 3: |
230 if (Is4thByte(ch)) { |
239 if (Is4thByte(ch)) { |
231 buf[3] = ch; |
240 buf[3] = ch; |
232 int clen = 4; |
241 int clen = 4; |
233 uint u = qt_Gb18030ToUnicode(buf, clen); |
242 uint u = qt_Gb18030ToUnicode(buf, clen); |
234 if (clen == 4) { |
243 if (clen == 4) { |
235 result += QValidChar(u); |
244 resultData[unicodeLen] = qValidChar(u); |
|
245 ++unicodeLen; |
236 } else { |
246 } else { |
237 result += replacement; |
247 resultData[unicodeLen] = replacement; |
|
248 ++unicodeLen; |
238 ++invalid; |
249 ++invalid; |
239 } |
250 } |
240 } else { |
251 } else { |
241 result += replacement; |
252 resultData[unicodeLen] = replacement; |
|
253 ++unicodeLen; |
242 ++invalid; |
254 ++invalid; |
243 } |
255 } |
244 nbuf = 0; |
256 nbuf = 0; |
245 break; |
257 break; |
246 } |
258 } |
247 } |
259 } |
|
260 result.resize(unicodeLen); |
|
261 |
248 if (state) { |
262 if (state) { |
249 state->remainingChars = nbuf; |
263 state->remainingChars = nbuf; |
250 state->state_data[0] = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3]; |
264 state->state_data[0] = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3]; |
251 state->invalidChars += invalid; |
265 state->invalidChars += invalid; |
252 } |
266 } |
443 |
457 |
444 QString QGb2312Codec::convertToUnicode(const char* chars, int len, ConverterState *state) const |
458 QString QGb2312Codec::convertToUnicode(const char* chars, int len, ConverterState *state) const |
445 { |
459 { |
446 uchar buf[2]; |
460 uchar buf[2]; |
447 int nbuf = 0; |
461 int nbuf = 0; |
448 QChar replacement = QChar::ReplacementCharacter; |
462 ushort replacement = QChar::ReplacementCharacter; |
449 if (state) { |
463 if (state) { |
450 if (state->flags & ConvertInvalidToNull) |
464 if (state->flags & ConvertInvalidToNull) |
451 replacement = QChar::Null; |
465 replacement = QChar::Null; |
452 nbuf = state->remainingChars; |
466 nbuf = state->remainingChars; |
453 buf[0] = state->state_data[0]; |
467 buf[0] = state->state_data[0]; |
454 buf[1] = state->state_data[1]; |
468 buf[1] = state->state_data[1]; |
455 } |
469 } |
456 int invalid = 0; |
470 int invalid = 0; |
457 |
471 |
458 QString result; |
472 QString result; |
|
473 result.resize(len); |
|
474 int unicodeLen = 0; |
|
475 ushort *const resultData = reinterpret_cast<ushort*>(result.data()); |
459 //qDebug("QGb2312Decoder::toUnicode(const char* chars, int len = %d)", len); |
476 //qDebug("QGb2312Decoder::toUnicode(const char* chars, int len = %d)", len); |
460 for (int i=0; i<len; i++) { |
477 for (int i=0; i<len; i++) { |
461 uchar ch = chars[i]; |
478 uchar ch = chars[i]; |
462 switch (nbuf) { |
479 switch (nbuf) { |
463 case 0: |
480 case 0: |
464 if (ch < 0x80) { |
481 if (ch < 0x80) { |
465 // ASCII |
482 // ASCII |
466 result += QLatin1Char(ch); |
483 resultData[unicodeLen] = ch; |
|
484 ++unicodeLen; |
467 } else if (IsByteInGb2312(ch)) { |
485 } else if (IsByteInGb2312(ch)) { |
468 // GB2312 1st byte? |
486 // GB2312 1st byte? |
469 buf[0] = ch; |
487 buf[0] = ch; |
470 nbuf = 1; |
488 nbuf = 1; |
471 } else { |
489 } else { |
472 // Invalid |
490 // Invalid |
473 result += replacement; |
491 resultData[unicodeLen] = replacement; |
|
492 ++unicodeLen; |
474 ++invalid; |
493 ++invalid; |
475 } |
494 } |
476 break; |
495 break; |
477 case 1: |
496 case 1: |
478 // GB2312 2nd byte |
497 // GB2312 2nd byte |
479 if (IsByteInGb2312(ch)) { |
498 if (IsByteInGb2312(ch)) { |
480 buf[1] = ch; |
499 buf[1] = ch; |
481 int clen = 2; |
500 int clen = 2; |
482 uint u = qt_Gb18030ToUnicode(buf, clen); |
501 uint u = qt_Gb18030ToUnicode(buf, clen); |
483 if (clen == 2) { |
502 if (clen == 2) { |
484 result += QValidChar(u); |
503 resultData[unicodeLen] = qValidChar(static_cast<ushort>(u)); |
|
504 ++unicodeLen; |
485 } else { |
505 } else { |
486 result += replacement; |
506 resultData[unicodeLen] = replacement; |
|
507 ++unicodeLen; |
487 ++invalid; |
508 ++invalid; |
488 } |
509 } |
489 nbuf = 0; |
510 nbuf = 0; |
490 } else { |
511 } else { |
491 // Error |
512 // Error |
492 result += replacement; |
513 resultData[unicodeLen] = replacement; |
|
514 ++unicodeLen; |
493 ++invalid; |
515 ++invalid; |
494 nbuf = 0; |
516 nbuf = 0; |
495 } |
517 } |
496 break; |
518 break; |
497 } |
519 } |
498 } |
520 } |
|
521 result.resize(unicodeLen); |
499 |
522 |
500 if (state) { |
523 if (state) { |
501 state->remainingChars = nbuf; |
524 state->remainingChars = nbuf; |
502 state->state_data[0] = buf[0]; |
525 state->state_data[0] = buf[0]; |
503 state->state_data[1] = buf[1]; |
526 state->state_data[1] = buf[1]; |