|
1 // Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // Implementation of the Standard Compression Scheme for Unicode. |
|
15 // This code is compiled only in the Unicode build. |
|
16 // |
|
17 // |
|
18 |
|
19 #ifdef _UNICODE |
|
20 |
|
21 #include <s32ucmp.h> |
|
22 |
|
23 const TUint32 TUnicodeCompressionState::iStaticWindow[EStaticWindows] = |
|
24 { |
|
25 0x0000, // tags |
|
26 0x0080, // Latin-1 supplement |
|
27 0x0100, // Latin Extended-A |
|
28 0x0300, // Combining Diacritics |
|
29 0x2000, // General Punctuation |
|
30 0x2080, // Currency Symbols |
|
31 0x2100, // Letterlike Symbols and Number Forms |
|
32 0x3000 // CJK Symbols and Punctuation |
|
33 }; |
|
34 |
|
35 const TUint32 TUnicodeCompressionState::iDynamicWindowDefault[EDynamicWindows] = |
|
36 { |
|
37 0x0080, // Latin-1 supplement |
|
38 0x00C0, // parts of Latin-1 supplement and Latin Extended-A |
|
39 0x0400, // Cyrillic |
|
40 0x0600, // Arabic |
|
41 0x0900, // Devanagari |
|
42 0x3040, // Hiragana |
|
43 0x30A0, // Katakana |
|
44 0xFF00 // Fullwidth ASCII |
|
45 }; |
|
46 |
|
47 const TUint16 TUnicodeCompressionState::iSpecialBase[ESpecialBases] = |
|
48 { |
|
49 0x00C0, // Latin 1 letters (not symbols) and some of Extended-A |
|
50 0x0250, // IPA extensions |
|
51 0x0370, // Greek |
|
52 0x0530, // Armenian |
|
53 0x3040, // Hiragana |
|
54 0x30A0, // Katakana |
|
55 0xFF60 // Halfwidth katakana |
|
56 }; |
|
57 |
|
58 // Single-byte mode tag values |
|
59 const TUint8 SQ0 = 0x01; // <byte> quote from window 0 |
|
60 const TUint8 SDX = 0x0B; // <hbyte> <lbyte> define window in expansion area |
|
61 const TUint8 SQU = 0x0E; // <hbyte> <lbyte> quote Unicode value |
|
62 const TUint8 SCU = 0x0F; // switch to Unicode mode |
|
63 const TUint8 SC0 = 0x10; // select dynamic window 0 |
|
64 const TUint8 SD0 = 0x18; // <byte> set dynamic window 0 index to <byte> and select it |
|
65 |
|
66 // Unicode mode tag values |
|
67 const TUint8 UC0 = 0xE0; // select dynamic window 0 and switch to single-byte mode |
|
68 const TUint8 UD0 = 0xE8; // <byte> set dynamic window 0 index to <byte>, select it and switch to |
|
69 // single-byte mode |
|
70 const TUint8 UQU = 0xF0; // <hbyte>, <lbyte> quote Unicode value |
|
71 const TUint8 UDX = 0xF1; // <hbyte>, <lbyte> define window in expansion area and switch to single-byte mode |
|
72 |
|
73 TUnicodeCompressionState::TUnicodeCompressionState(): |
|
74 iUnicodeWords(0), |
|
75 iMaxUnicodeWords(0), |
|
76 iCompressedBytes(0), |
|
77 iMaxCompressedBytes(0) |
|
78 { |
|
79 Reset(); |
|
80 } |
|
81 |
|
82 void TUnicodeCompressionState::Reset() |
|
83 { |
|
84 iUnicodeMode = FALSE; |
|
85 iActiveWindowBase = 0x0080; |
|
86 for (int i = 0; i < EDynamicWindows; i++) |
|
87 iDynamicWindow[i] = iDynamicWindowDefault[i]; |
|
88 } |
|
89 |
|
90 |
|
91 // Return the index of the static window that contains this code, if any, or -1 if there is none. |
|
92 TInt TUnicodeCompressionState::StaticWindowIndex(TUint16 aCode) |
|
93 { |
|
94 for (TInt i = 0; i < EStaticWindows; i++) |
|
95 if (aCode >= iStaticWindow[i] && aCode < iStaticWindow[i] + 128) |
|
96 return i; |
|
97 return -1; |
|
98 } |
|
99 |
|
100 /* |
|
101 If aCode can be accommodated in one of the legal dynamic windows, return the index of that window |
|
102 in the offset table. If not return KErrNotFound. |
|
103 */ |
|
104 TInt TUnicodeCompressionState::DynamicWindowOffsetIndex(TUint16 aCode) |
|
105 { |
|
106 if (aCode < 0x0080) |
|
107 return KErrNotFound; |
|
108 if (aCode >= 0x3400 && aCode <= 0xDFFF) |
|
109 return KErrNotFound; |
|
110 |
|
111 /* |
|
112 Prefer sections that cross half-block boundaries. These are better adapted to actual text. |
|
113 They are represented by offset indices 0xf9..0xff. |
|
114 */ |
|
115 for (int i = 0; i < ESpecialBases; i++) |
|
116 if (aCode >= iSpecialBase[i] && aCode < iSpecialBase[i] + 128) |
|
117 return 0xF9 + i; |
|
118 |
|
119 /* |
|
120 Offset indices 0x01..0x67 represent half blocks from 0x0080 to 0x3380 and |
|
121 0x68..0xA7 represent half blocks from 0xE000 to 0xFF80. |
|
122 */ |
|
123 if (aCode >= 0xE000) |
|
124 aCode -= 0xAC00; |
|
125 return aCode / 0x80; |
|
126 } |
|
127 |
|
128 // Return the base of the window represented by offset index <n>. Return 0 if the offset index is illegal. |
|
129 TUint32 TUnicodeCompressionState::DynamicWindowBase(TInt aOffsetIndex) |
|
130 { |
|
131 if (aOffsetIndex >= 0xF9 && aOffsetIndex <= 0xFF) |
|
132 { |
|
133 /* |
|
134 WARNING: don't optimise the following two lines by replacing them with |
|
135 'return iSpecialBase[aOffsetIndex - 0xF9];'. To do so would re-introduce an error |
|
136 in ARM builds caused by optimisation and consequent erroneous fixing up |
|
137 of the array base: see defect EDNGASR-4AGJQX in ER5U defects. |
|
138 */ |
|
139 int special_base_index = aOffsetIndex - 0xF9; |
|
140 return iSpecialBase[special_base_index]; |
|
141 } |
|
142 if (aOffsetIndex >= 0x01 && aOffsetIndex <= 0x67) |
|
143 return aOffsetIndex * 0x80; |
|
144 if (aOffsetIndex >= 0x68 && aOffsetIndex <= 0xA7) |
|
145 return aOffsetIndex * 0x80 + 0xAC00; |
|
146 return 0; |
|
147 } |
|
148 |
|
149 TBool TUnicodeCompressionState::EncodeAsIs(TUint16 aCode) |
|
150 { |
|
151 return aCode == 0x0000 || aCode == 0x0009 || aCode == 0x000A || aCode == 0x000D || |
|
152 (aCode >= 0x0020 && aCode <= 0x007F); |
|
153 } |
|
154 |
|
155 void TUnicodeCompressionState::Panic(TPanic aPanic) |
|
156 { |
|
157 User::Panic(_L("ucmp"),aPanic); |
|
158 } |
|
159 |
|
160 EXPORT_C TUnicodeCompressor::TUnicodeCompressor(): |
|
161 iInputBufferStart(0), |
|
162 iInputBufferSize(0), |
|
163 iOutputBufferStart(0), |
|
164 iOutputBufferSize(0), |
|
165 iDynamicWindowIndex(0), |
|
166 iOutputStream(NULL), |
|
167 iOutputPointer(NULL), |
|
168 iInput(NULL) |
|
169 { |
|
170 } |
|
171 |
|
172 EXPORT_C void TUnicodeCompressor::CompressL(RWriteStream& aOutput,MUnicodeSource& aInput, |
|
173 TInt aMaxOutputBytes,TInt aMaxInputWords, |
|
174 TInt* aOutputBytes,TInt* aInputWords) |
|
175 { |
|
176 DoCompressL(&aOutput,NULL,&aInput,aMaxOutputBytes,aMaxInputWords,aOutputBytes,aInputWords); |
|
177 } |
|
178 |
|
179 EXPORT_C void TUnicodeCompressor::CompressL(TUint8* aOutput,MUnicodeSource& aInput, |
|
180 TInt aMaxOutputBytes,TInt aMaxInputWords, |
|
181 TInt* aOutputBytes,TInt* aInputWords) |
|
182 { |
|
183 DoCompressL(NULL,aOutput,&aInput,aMaxOutputBytes,aMaxInputWords,aOutputBytes,aInputWords); |
|
184 } |
|
185 |
|
186 EXPORT_C TInt TUnicodeCompressor::FlushL(RWriteStream& aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes) |
|
187 { |
|
188 DoCompressL(&aOutput,NULL,NULL,aMaxOutputBytes,0,&aOutputBytes,NULL); |
|
189 return iOutputBufferSize; |
|
190 } |
|
191 |
|
192 EXPORT_C TInt TUnicodeCompressor::FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes) |
|
193 { |
|
194 DoCompressL(NULL,aOutput,NULL,aMaxOutputBytes,0,&aOutputBytes,NULL); |
|
195 return iOutputBufferSize; |
|
196 } |
|
197 |
|
198 EXPORT_C TInt TUnicodeCompressor::CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords) |
|
199 { |
|
200 TInt bytes; |
|
201 TUnicodeCompressor c; |
|
202 c.DoCompressL(NULL,NULL,&aInput,KMaxTInt,aInputWords,&bytes,NULL); |
|
203 return bytes; |
|
204 } |
|
205 |
|
206 // Compress until input or output is exhausted or an exception occurs. |
|
207 void TUnicodeCompressor::DoCompressL(RWriteStream* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput, |
|
208 TInt aMaxOutputBytes,TInt aMaxInputWords, |
|
209 TInt* aOutputBytes,TInt* aInputWords) |
|
210 { |
|
211 iOutputStream = aOutputStream; |
|
212 iOutputPointer = aOutputPointer; |
|
213 iInput = aInput; |
|
214 iMaxCompressedBytes = aMaxOutputBytes; |
|
215 iMaxUnicodeWords = aMaxInputWords; |
|
216 iCompressedBytes = iUnicodeWords = 0; |
|
217 FlushOutputBufferL(); |
|
218 if (iInput) |
|
219 { |
|
220 while (iUnicodeWords < iMaxUnicodeWords && iCompressedBytes < iMaxCompressedBytes) |
|
221 { |
|
222 TUint16 x = iInput->ReadUnicodeValueL(); |
|
223 TAction action(x); |
|
224 iInputBuffer[(iInputBufferStart + iInputBufferSize) % EMaxInputBufferSize] = action; |
|
225 iInputBufferSize++; |
|
226 iUnicodeWords++; |
|
227 if (iInputBufferSize == EMaxInputBufferSize) |
|
228 WriteRunL(); |
|
229 } |
|
230 } |
|
231 FlushInputBufferL(); |
|
232 if (aOutputBytes) |
|
233 *aOutputBytes = iCompressedBytes; |
|
234 if (aInputWords) |
|
235 *aInputWords = iUnicodeWords; |
|
236 } |
|
237 |
|
238 TUnicodeCompressor::TAction::TAction(TUint16 aCode): |
|
239 iCode(aCode) |
|
240 { |
|
241 if (TUnicodeCompressionState::EncodeAsIs(aCode)) |
|
242 iTreatment = EPlainASCII; |
|
243 else |
|
244 { |
|
245 iTreatment = TUnicodeCompressionState::DynamicWindowOffsetIndex(aCode); |
|
246 if (iTreatment == -1) |
|
247 { |
|
248 iTreatment = TUnicodeCompressionState::StaticWindowIndex(aCode); |
|
249 if (iTreatment == -1) |
|
250 iTreatment = EPlainUnicode; |
|
251 else |
|
252 iTreatment += EFirstStatic; |
|
253 } |
|
254 } |
|
255 } |
|
256 |
|
257 void TUnicodeCompressor::WriteCharacterFromBuffer() |
|
258 { |
|
259 const TAction& action = iInputBuffer[iInputBufferStart]; |
|
260 iInputBufferSize--; |
|
261 iInputBufferStart = (iInputBufferStart + 1) % EMaxInputBufferSize; |
|
262 WriteCharacter(action); |
|
263 } |
|
264 |
|
265 void TUnicodeCompressor::FlushInputBufferL() |
|
266 { |
|
267 while (iInputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes) |
|
268 WriteRunL(); |
|
269 } |
|
270 |
|
271 void TUnicodeCompressor::WriteRunL() |
|
272 { |
|
273 // Write out any leading characters that can be passed through. |
|
274 if (!iUnicodeMode) |
|
275 while (iInputBufferSize > 0) |
|
276 { |
|
277 const TAction& action = iInputBuffer[iInputBufferStart]; |
|
278 if (action.iTreatment == TAction::EPlainASCII || |
|
279 (action.iCode >= iActiveWindowBase && action.iCode < iActiveWindowBase + 128)) |
|
280 WriteCharacterFromBuffer(); |
|
281 else |
|
282 break; |
|
283 } |
|
284 |
|
285 // Write a run of characters that cannot be passed through. |
|
286 int i; |
|
287 if (iInputBufferSize > 0) |
|
288 { |
|
289 /* |
|
290 Find a run of characters with the same treatment and select that treatment |
|
291 if the run has more than one character. |
|
292 */ |
|
293 int treatment = iInputBuffer[iInputBufferStart].iTreatment; |
|
294 int next_treatment = treatment; |
|
295 int run_size = 1; |
|
296 for (i = 1; i < iInputBufferSize; i++) |
|
297 { |
|
298 int index = (iInputBufferStart + i) % EMaxInputBufferSize; |
|
299 next_treatment = iInputBuffer[index].iTreatment; |
|
300 if (next_treatment != treatment) |
|
301 break; |
|
302 run_size++; |
|
303 } |
|
304 if (run_size > 1) |
|
305 SelectTreatment(treatment); |
|
306 for (i = 0; i < run_size; i++) |
|
307 WriteCharacterFromBuffer(); |
|
308 } |
|
309 |
|
310 FlushOutputBufferL(); |
|
311 } |
|
312 |
|
313 void TUnicodeCompressor::FlushOutputBufferL() |
|
314 { |
|
315 while (iOutputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes) |
|
316 { |
|
317 TUint8 byte = iOutputBuffer[iOutputBufferStart]; |
|
318 if (iOutputPointer) |
|
319 *iOutputPointer++ = byte; |
|
320 else if (iOutputStream) |
|
321 iOutputStream->WriteUint8L(byte); |
|
322 iCompressedBytes++; |
|
323 iOutputBufferSize--; |
|
324 iOutputBufferStart = (iOutputBufferStart + 1) % EMaxOutputBufferSize; |
|
325 } |
|
326 } |
|
327 |
|
328 void TUnicodeCompressor::SelectTreatment(TInt aTreatment) |
|
329 { |
|
330 if (aTreatment == TAction::EPlainUnicode) |
|
331 { |
|
332 // Switch to Unicode mode if not there already. |
|
333 if (!iUnicodeMode) |
|
334 { |
|
335 WriteByte(SCU); |
|
336 iUnicodeMode = TRUE; |
|
337 } |
|
338 return; |
|
339 } |
|
340 |
|
341 if (aTreatment == TAction::EPlainASCII) |
|
342 { |
|
343 // Switch to single-byte mode, using the current dynamic window, if not there already. |
|
344 if (iUnicodeMode) |
|
345 { |
|
346 WriteByte(UC0 + iDynamicWindowIndex); |
|
347 iUnicodeMode = FALSE; |
|
348 } |
|
349 return; |
|
350 } |
|
351 |
|
352 if (aTreatment >= TAction::EFirstDynamic && aTreatment <= TAction::ELastDynamic) |
|
353 { |
|
354 TUint32 base = DynamicWindowBase(aTreatment); |
|
355 |
|
356 // Switch to the appropriate dynamic window if it is available; if not, redefine and select dynamic window 4. |
|
357 for (int i = 0; i < EDynamicWindows; i++) |
|
358 if (base == iDynamicWindow[i]) |
|
359 { |
|
360 if (iUnicodeMode) |
|
361 WriteByte(UC0 + i); |
|
362 else if (i != iDynamicWindowIndex) |
|
363 WriteByte(SC0 + i); |
|
364 iUnicodeMode = FALSE; |
|
365 iDynamicWindowIndex = i; |
|
366 iActiveWindowBase = base; |
|
367 return; |
|
368 } |
|
369 if (iUnicodeMode) |
|
370 WriteByte(UD0 + 4); |
|
371 else |
|
372 WriteByte(SD0 + 4); |
|
373 iDynamicWindowIndex = 4; |
|
374 iUnicodeMode = FALSE; |
|
375 WriteByte(aTreatment); |
|
376 iDynamicWindow[4] = base; |
|
377 iActiveWindowBase = base; |
|
378 return; |
|
379 } |
|
380 } |
|
381 |
|
382 // Write a character without changing mode or window. |
|
383 void TUnicodeCompressor::WriteCharacter(const TAction& aAction) |
|
384 { |
|
385 if (iUnicodeMode) |
|
386 WriteUCharacter(aAction.iCode); |
|
387 else |
|
388 WriteSCharacter(aAction); |
|
389 } |
|
390 |
|
391 void TUnicodeCompressor::WriteUCharacter(TUint16 aCode) |
|
392 { |
|
393 // Emit the 'quote Unicode' tag if the character would conflict with a tag. |
|
394 if (aCode >= 0xE000 && aCode <= 0xF2FF) |
|
395 WriteByte(UQU); |
|
396 |
|
397 // Write the Unicode value big-end first. |
|
398 WriteByte((aCode >> 8) & 0xFF); |
|
399 WriteByte(aCode & 0xFF); |
|
400 } |
|
401 |
|
402 void TUnicodeCompressor::WriteByte(TUint aByte) |
|
403 { |
|
404 if (iOutputBufferSize >= EMaxOutputBufferSize) |
|
405 Panic(EOutputBufferOverflow); //Panic here is ok as this is a programming error |
|
406 iOutputBuffer[(iOutputBufferStart + iOutputBufferSize) % EMaxOutputBufferSize] = (TUint8)aByte; |
|
407 iOutputBufferSize++; |
|
408 } |
|
409 |
|
410 void TUnicodeCompressor::WriteSCharacter(const TAction& aAction) |
|
411 { |
|
412 // Characters in the range 0x0020..0x007F, plus nul, tab, cr, and lf, can be emitted as their low bytes. |
|
413 if (aAction.iTreatment == TAction::EPlainASCII) |
|
414 { |
|
415 WriteByte(aAction.iCode); |
|
416 return; |
|
417 } |
|
418 |
|
419 // Characters in a static window can be written using SQ<n> plus a byte in the range 0x00-0x7F |
|
420 if (aAction.iTreatment >= TAction::EFirstStatic && aAction.iTreatment <= TAction::ELastStatic) |
|
421 { |
|
422 int window = aAction.iTreatment - TAction::EFirstStatic; |
|
423 WriteByte(SQ0 + window); |
|
424 WriteByte(aAction.iCode); |
|
425 return; |
|
426 } |
|
427 |
|
428 // Characters in the current dynamic window can be written as a byte in the range 0x80-0xFF. |
|
429 if (aAction.iCode >= iActiveWindowBase && aAction.iCode < iActiveWindowBase + 128) |
|
430 { |
|
431 WriteByte(aAction.iCode - iActiveWindowBase + 0x80); |
|
432 return; |
|
433 } |
|
434 |
|
435 // Characters in another dynamic window can be written using SQ<n> plus a byte in the range 0x80-0xFF |
|
436 int i; |
|
437 for (i = 0; i < EDynamicWindows; i++) |
|
438 if (aAction.iCode >= iDynamicWindow[i] && aAction.iCode < iDynamicWindow[i] + 128) |
|
439 { |
|
440 WriteByte(SQ0 + i); |
|
441 WriteByte(aAction.iCode - iDynamicWindow[i] + 0x80); |
|
442 return; |
|
443 } |
|
444 |
|
445 // Other characters can be quoted. |
|
446 WriteByte(SQU); |
|
447 WriteByte((aAction.iCode >> 8) & 0xFF); |
|
448 WriteByte(aAction.iCode & 0xFF); |
|
449 return; |
|
450 } |
|
451 |
|
452 EXPORT_C TUnicodeExpander::TUnicodeExpander(): |
|
453 iInputBufferStart(0), |
|
454 iInputBufferSize(0), |
|
455 iOutputBufferStart(0), |
|
456 iOutputBufferSize(0), |
|
457 iOutput(NULL), |
|
458 iInputStream(NULL), |
|
459 iInputPointer(NULL) |
|
460 { |
|
461 } |
|
462 |
|
463 EXPORT_C void TUnicodeExpander::ExpandL(MUnicodeSink& aOutput,RReadStream& aInput, |
|
464 TInt aMaxOutputWords,TInt aMaxInputBytes, |
|
465 TInt* aOutputWords,TInt* aInputBytes) |
|
466 { |
|
467 DoExpandL(&aOutput,&aInput,NULL,aMaxOutputWords,aMaxInputBytes,aOutputWords,aInputBytes); |
|
468 } |
|
469 |
|
470 EXPORT_C void TUnicodeExpander::ExpandL(MUnicodeSink& aOutput,const TUint8* aInput, |
|
471 TInt aMaxOutputWords,TInt aMaxInputBytes, |
|
472 TInt* aOutputWords,TInt* aInputBytes) |
|
473 { |
|
474 DoExpandL(&aOutput,NULL,aInput,aMaxOutputWords,aMaxInputBytes,aOutputWords,aInputBytes); |
|
475 } |
|
476 |
|
477 EXPORT_C TInt TUnicodeExpander::FlushL(MUnicodeSink& aOutput,TInt aMaxOutputWords,TInt& aOutputWords) |
|
478 { |
|
479 DoExpandL(&aOutput,NULL,NULL,aMaxOutputWords,0,&aOutputWords,NULL); |
|
480 return iOutputBufferSize; |
|
481 } |
|
482 |
|
483 EXPORT_C TInt TUnicodeExpander::ExpandedSizeL(RReadStream& aInput,TInt aInputBytes) |
|
484 { |
|
485 TInt words; |
|
486 TUnicodeExpander e; |
|
487 e.DoExpandL(NULL,&aInput,NULL,KMaxTInt,aInputBytes,&words,NULL); |
|
488 return words; |
|
489 } |
|
490 |
|
491 EXPORT_C TInt TUnicodeExpander::ExpandedSizeL(const TUint8* aInput,TInt aInputBytes) |
|
492 { |
|
493 TInt words; |
|
494 TUnicodeExpander e; |
|
495 e.DoExpandL(NULL,NULL,aInput,KMaxTInt,aInputBytes,&words,NULL); |
|
496 return words; |
|
497 } |
|
498 |
|
499 // Expand until input or output is exhausted or an exception occurs. |
|
500 void TUnicodeExpander::DoExpandL(MUnicodeSink* aOutput,RReadStream* aInputStream,const TUint8* aInputPointer, |
|
501 TInt aMaxOutputWords,TInt aMaxInputBytes, |
|
502 TInt* aOutputWords,TInt* aInputBytes) |
|
503 { |
|
504 iOutput = aOutput; |
|
505 iInputStream = aInputStream; |
|
506 iInputPointer = aInputPointer; |
|
507 iMaxUnicodeWords = aMaxOutputWords; |
|
508 iMaxCompressedBytes = aMaxInputBytes; |
|
509 iUnicodeWords = iCompressedBytes = 0; |
|
510 iInputBufferStart = 0; |
|
511 FlushOutputBufferL(); |
|
512 if (iInputPointer || iInputStream) |
|
513 { |
|
514 while (iUnicodeWords + iOutputBufferSize < iMaxUnicodeWords && iCompressedBytes < iMaxCompressedBytes) |
|
515 HandleByteL(); |
|
516 } |
|
517 if (aOutputWords) |
|
518 *aOutputWords = iUnicodeWords; |
|
519 if (aInputBytes) |
|
520 *aInputBytes = iCompressedBytes; |
|
521 } |
|
522 |
|
523 void TUnicodeExpander::HandleByteL() |
|
524 { |
|
525 TUint8 byte; |
|
526 TBool handled = FALSE; |
|
527 if (ReadByteL(byte)) |
|
528 { |
|
529 if (iUnicodeMode) |
|
530 handled = HandleUByteL(byte); |
|
531 else |
|
532 handled = HandleSByteL(byte); |
|
533 } |
|
534 iInputBufferStart = 0; |
|
535 if (handled) |
|
536 iInputBufferSize = 0; |
|
537 FlushOutputBufferL(); |
|
538 } |
|
539 |
|
540 void TUnicodeExpander::FlushOutputBufferL() |
|
541 { |
|
542 while (iOutputBufferSize > 0 && iUnicodeWords < iMaxUnicodeWords) |
|
543 { |
|
544 if (iOutput) |
|
545 iOutput->WriteUnicodeValueL(iOutputBuffer[iOutputBufferStart]); |
|
546 iUnicodeWords++; |
|
547 iOutputBufferSize--; |
|
548 iOutputBufferStart = (iOutputBufferStart + 1) % EMaxOutputBufferSize; |
|
549 } |
|
550 } |
|
551 |
|
552 TBool TUnicodeExpander::HandleSByteL(TUint8 aByte) |
|
553 { |
|
554 // 'Pass-through' codes. |
|
555 if (TUnicodeCompressionState::EncodeAsIs(aByte)) |
|
556 { |
|
557 WriteChar(aByte); |
|
558 return TRUE; |
|
559 } |
|
560 |
|
561 // Codes 0x80-0xFF select a character from the active window. |
|
562 if (aByte >= 0x80) |
|
563 { |
|
564 WriteChar32(iActiveWindowBase + aByte - 0x80); |
|
565 return TRUE; |
|
566 } |
|
567 |
|
568 // SQU: quote a Unicode character. |
|
569 if (aByte == SQU) |
|
570 return QuoteUnicodeL(); |
|
571 |
|
572 // SCU: switch to Unicode mode. |
|
573 if (aByte == SCU) |
|
574 { |
|
575 iUnicodeMode = TRUE; |
|
576 return TRUE; |
|
577 } |
|
578 |
|
579 // SQn: quote from window n. |
|
580 if (aByte >= SQ0 && aByte <= SQ0 + 7) |
|
581 { |
|
582 int window = aByte - SQ0; |
|
583 TUint8 byte; |
|
584 if (ReadByteL(byte)) |
|
585 { |
|
586 TUint32 c = byte; |
|
587 if (c <= 0x7F) |
|
588 c += iStaticWindow[window]; |
|
589 else |
|
590 c += iDynamicWindow[window] - 0x80; |
|
591 WriteChar32(c); |
|
592 return TRUE; |
|
593 } |
|
594 else |
|
595 return FALSE; |
|
596 } |
|
597 |
|
598 // SCn: switch to dynamic window n. |
|
599 if (aByte >= SC0 && aByte <= SC0 + 7) |
|
600 { |
|
601 iActiveWindowBase = iDynamicWindow[aByte - SC0]; |
|
602 return TRUE; |
|
603 } |
|
604 |
|
605 // SDn: define dynamic window n and switch to it. |
|
606 if (aByte >= SD0 && aByte <= SD0 + 7) |
|
607 return DefineWindowL(aByte - SD0); |
|
608 |
|
609 // SDX: define window in the expansion space. |
|
610 if (aByte == SDX) |
|
611 return DefineExpansionWindowL(); |
|
612 |
|
613 User::Leave(KErrCorrupt); |
|
614 return FALSE; |
|
615 } |
|
616 |
|
617 TBool TUnicodeExpander::HandleUByteL(TUint8 aByte) |
|
618 { |
|
619 // Plain Unicode; get the low byte and emit the Unicode value. |
|
620 if (aByte <= 0xDF || aByte >= 0xF3) |
|
621 { |
|
622 TUint8 lo; |
|
623 if (ReadByteL(lo)) |
|
624 { |
|
625 TUint16 c = (TUint16)((aByte << 8) | lo); |
|
626 WriteChar(c); |
|
627 return TRUE; |
|
628 } |
|
629 else |
|
630 return FALSE; |
|
631 } |
|
632 |
|
633 // Quote a Unicode character that would otherwise conflict with a tag. |
|
634 if (aByte == UQU) |
|
635 return QuoteUnicodeL(); |
|
636 |
|
637 // UCn: change to single byte mode and select window n. |
|
638 if (aByte >= UC0 && aByte <= UC0 + 7) |
|
639 { |
|
640 iUnicodeMode = FALSE; |
|
641 iActiveWindowBase = iDynamicWindow[aByte - UC0]; |
|
642 return TRUE; |
|
643 } |
|
644 |
|
645 // UDn: define dynamic window n and switch to it. |
|
646 if (aByte >= UD0 && aByte <= UD0 + 7) |
|
647 return DefineWindowL(aByte - UD0); |
|
648 |
|
649 // UDX: define window in the expansion space. |
|
650 if (aByte == UDX) |
|
651 return DefineExpansionWindowL(); |
|
652 |
|
653 User::Leave(KErrCorrupt); |
|
654 return FALSE; |
|
655 } |
|
656 |
|
657 TBool TUnicodeExpander::QuoteUnicodeL() |
|
658 { |
|
659 TUint8 hi, lo; |
|
660 if (ReadByteL(hi) && ReadByteL(lo)) |
|
661 { |
|
662 TUint16 c = (TUint16)((hi << 8) | lo); |
|
663 WriteChar(c); |
|
664 return TRUE; |
|
665 } |
|
666 else |
|
667 return FALSE; |
|
668 } |
|
669 |
|
670 TBool TUnicodeExpander::DefineWindowL(TInt aIndex) |
|
671 { |
|
672 TUint8 window; |
|
673 if (ReadByteL(window)) |
|
674 { |
|
675 iUnicodeMode = FALSE; |
|
676 iActiveWindowBase = DynamicWindowBase(window); |
|
677 iDynamicWindow[aIndex] = iActiveWindowBase; |
|
678 return TRUE; |
|
679 } |
|
680 else |
|
681 return FALSE; |
|
682 } |
|
683 |
|
684 TBool TUnicodeExpander::DefineExpansionWindowL() |
|
685 { |
|
686 TUint8 hi, lo; |
|
687 if (ReadByteL(hi) && ReadByteL(lo)) |
|
688 { |
|
689 iUnicodeMode = FALSE; |
|
690 iActiveWindowBase = 0x10000 + (0x80 * ((hi & 0x1F) * 0x100 + lo)); |
|
691 iDynamicWindow[hi >> 5] = iActiveWindowBase; |
|
692 return TRUE; |
|
693 } |
|
694 else |
|
695 return FALSE; |
|
696 } |
|
697 |
|
698 // Read either from the buffer (in the case of restarting after source finished in mid-operation) or from the source. |
|
699 TBool TUnicodeExpander::ReadByteL(TUint8& aByte) |
|
700 { |
|
701 if (iInputBufferStart < iInputBufferSize) |
|
702 { |
|
703 aByte = iInputBuffer[iInputBufferStart++]; |
|
704 return TRUE; |
|
705 } |
|
706 else if (iCompressedBytes < iMaxCompressedBytes) |
|
707 { |
|
708 if (iInputPointer) |
|
709 aByte = *iInputPointer++; |
|
710 else |
|
711 aByte = iInputStream->ReadUint8L(); |
|
712 iInputBuffer[iInputBufferStart++] = aByte; |
|
713 iInputBufferSize = iInputBufferStart; |
|
714 iCompressedBytes++; |
|
715 return TRUE; |
|
716 } |
|
717 else |
|
718 return FALSE; |
|
719 } |
|
720 |
|
721 void TUnicodeExpander::WriteChar(TUint16 aChar) |
|
722 { |
|
723 if (iOutputBufferSize >= EMaxOutputBufferSize) |
|
724 Panic(EOutputBufferOverflow); //Panic here is ok since this is a programming error |
|
725 iOutputBuffer[(iOutputBufferStart + iOutputBufferSize) % EMaxOutputBufferSize] = aChar; |
|
726 iOutputBufferSize++; |
|
727 } |
|
728 |
|
729 // Write a Unicode character; write using surrogates if in the range 0x10000..0x10FFFF. |
|
730 void TUnicodeExpander::WriteChar32(TUint aChar) |
|
731 { |
|
732 if (aChar <= 0xFFFF) |
|
733 WriteChar((TUint16)aChar); |
|
734 else if (aChar <= 0x10FFFF) |
|
735 { |
|
736 aChar -= 0x10000; // reduce to 20-bit value in the range 0x0..0xFFFFF |
|
737 WriteChar((TUint16)(0xD800 + (aChar >> 10))); // first high surrogate + high 10 bits |
|
738 WriteChar((TUint16)(0xDC00 + (aChar & 0x03FF))); // first low surrogate + low 10 bits |
|
739 } |
|
740 else |
|
741 //Panic to be kept here as impossible to test this case (nor the one before). Biggest value that can be passed is 0xFFFFF |
|
742 Panic(ENotUnicode); |
|
743 } |
|
744 |
|
745 #endif // _UNICODE |