|
1 /* |
|
2 * Copyright (c) 2006-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of the License "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * Ported from us_ucmp.cpp. |
|
16 * Implementation of the Standard Compression Scheme for Unicode. |
|
17 * This code is compiled only in the Unicode build. |
|
18 * |
|
19 */ |
|
20 |
|
21 |
|
22 #include "ucmp.h" |
|
23 #include <stdexcept> |
|
24 #include "deserialiser.h" |
|
25 #include "serialiser.h" |
|
26 |
|
27 namespace { |
|
28 const int KErrNotFound = -1; |
|
29 } |
|
30 |
|
31 const TUint32 TUnicodeCompressionState::iStaticWindow[EStaticWindows] = |
|
32 { |
|
33 0x0000, // tags |
|
34 0x0080, // Latin-1 supplement |
|
35 0x0100, // Latin Extended-A |
|
36 0x0300, // Combining Diacritics |
|
37 0x2000, // General Punctuation |
|
38 0x2080, // Currency Symbols |
|
39 0x2100, // Letterlike Symbols and Number Forms |
|
40 0x3000 // CJK Symbols and Punctuation |
|
41 }; |
|
42 |
|
43 const TUint32 TUnicodeCompressionState::iDynamicWindowDefault[EDynamicWindows] = |
|
44 { |
|
45 0x0080, // Latin-1 supplement |
|
46 0x00C0, // parts of Latin-1 supplement and Latin Extended-A |
|
47 0x0400, // Cyrillic |
|
48 0x0600, // Arabic |
|
49 0x0900, // Devanagari |
|
50 0x3040, // Hiragana |
|
51 0x30A0, // Katakana |
|
52 0xFF00 // Fullwidth ASCII |
|
53 }; |
|
54 |
|
55 const TUint16 TUnicodeCompressionState::iSpecialBase[ESpecialBases] = |
|
56 { |
|
57 0x00C0, // Latin 1 letters (not symbols) and some of Extended-A |
|
58 0x0250, // IPA extensions |
|
59 0x0370, // Greek |
|
60 0x0530, // Armenian |
|
61 0x3040, // Hiragana |
|
62 0x30A0, // Katakana |
|
63 0xFF60 // Halfwidth katakana |
|
64 }; |
|
65 |
|
66 // Single-byte mode tag values |
|
67 const TUint8 SQ0 = 0x01; // <byte> quote from window 0 |
|
68 const TUint8 SDX = 0x0B; // <hbyte> <lbyte> define window in expansion area |
|
69 const TUint8 SQU = 0x0E; // <hbyte> <lbyte> quote Unicode value |
|
70 const TUint8 SCU = 0x0F; // switch to Unicode mode |
|
71 const TUint8 SC0 = 0x10; // select dynamic window 0 |
|
72 const TUint8 SD0 = 0x18; // <byte> set dynamic window 0 index to <byte> and select it |
|
73 |
|
74 // Unicode mode tag values |
|
75 const TUint8 UC0 = 0xE0; // select dynamic window 0 and switch to single-byte mode |
|
76 const TUint8 UD0 = 0xE8; // <byte> set dynamic window 0 index to <byte>, select it and switch to |
|
77 // single-byte mode |
|
78 const TUint8 UQU = 0xF0; // <hbyte>, <lbyte> quote Unicode value |
|
79 const TUint8 UDX = 0xF1; // <hbyte>, <lbyte> define window in expansion area and switch to single-byte mode |
|
80 |
|
81 |
|
82 TUnicodeCompressionState::TUnicodeCompressionState(): |
|
83 iUnicodeWords(0), |
|
84 iMaxUnicodeWords(0), |
|
85 iCompressedBytes(0), |
|
86 iMaxCompressedBytes(0) |
|
87 { |
|
88 Reset(); |
|
89 } |
|
90 |
|
91 void TUnicodeCompressionState::Reset() |
|
92 { |
|
93 iUnicodeMode = false; |
|
94 iActiveWindowBase = 0x0080; |
|
95 for (int i = 0; i < EDynamicWindows; i++) |
|
96 iDynamicWindow[i] = iDynamicWindowDefault[i]; |
|
97 } |
|
98 |
|
99 |
|
100 // Return the index of the static window that contains this code, if any, or -1 if there is none. |
|
101 TInt TUnicodeCompressionState::StaticWindowIndex(TUint16 aCode) |
|
102 { |
|
103 for (TInt i = 0; i < EStaticWindows; i++) |
|
104 if (aCode >= iStaticWindow[i] && aCode < iStaticWindow[i] + 128) |
|
105 return i; |
|
106 return -1; |
|
107 } |
|
108 |
|
109 /* |
|
110 If aCode can be accommodated in one of the legal dynamic windows, return the index of that window |
|
111 in the offset table. If not return KErrNotFound. |
|
112 */ |
|
113 TInt TUnicodeCompressionState::DynamicWindowOffsetIndex(TUint16 aCode) |
|
114 { |
|
115 if (aCode < 0x0080) |
|
116 return KErrNotFound; |
|
117 if (aCode >= 0x3400 && aCode <= 0xDFFF) |
|
118 return KErrNotFound; |
|
119 |
|
120 /* |
|
121 Prefer sections that cross half-->WriteUint8L(block boundaries. These are better adapted to actual text. |
|
122 They are represented by offset indices 0xf9..0xff. |
|
123 */ |
|
124 for (int i = 0; i < ESpecialBases; i++) |
|
125 if (aCode >= iSpecialBase[i] && aCode < iSpecialBase[i] + 128) |
|
126 return 0xF9 + i; |
|
127 |
|
128 /* |
|
129 Offset indices 0x01..0x67 represent half blocks from 0x0080 to 0x3380 and |
|
130 0x68..0xA7 represent half blocks from 0xE000 to 0xFF80. |
|
131 */ |
|
132 if (aCode >= 0xE000) |
|
133 aCode -= 0xAC00; |
|
134 return aCode / 0x80; |
|
135 } |
|
136 |
|
137 // Return the base of the window represented by offset index <n>. Return 0 if the offset index is illegal. |
|
138 TUint32 TUnicodeCompressionState::DynamicWindowBase(TInt aOffsetIndex) |
|
139 { |
|
140 if (aOffsetIndex >= 0xF9 && aOffsetIndex <= 0xFF) |
|
141 { |
|
142 /*->WriteUint8L( |
|
143 WARNING: don't optimise the following two lines by replacing them with |
|
144 'return iSpecialBase[aOffsetIndex - 0xF9];'. To do so would re-introduce a defect |
|
145 in ARM builds caused by optimisation and consequent erroneous fixing up |
|
146 of the array base: see defect EDNGASR-4AGJQX in ER5U defects. |
|
147 */ |
|
148 int special_base_index = aOffsetIndex - 0xF9; |
|
149 return iSpecialBase[special_base_index]; |
|
150 } |
|
151 if (aOffsetIndex >= 0x01 && aOffsetIndex <= 0x67) |
|
152 return aOffsetIndex * 0x80; |
|
153 if (aOffsetIndex >= 0x68 && aOffsetIndex <= 0xA7) |
|
154 return aOffsetIndex * 0x80 + 0xAC00; |
|
155 return 0; |
|
156 } |
|
157 |
|
158 TBool TUnicodeCompressionState::EncodeAsIs(TUint16 aCode) |
|
159 { |
|
160 return aCode == 0x0000 || aCode == 0x0009 || aCode == 0x000A || aCode == 0x000D || |
|
161 (aCode >= 0x0020 && aCode <= 0x007F); |
|
162 } |
|
163 |
|
164 void TUnicodeCompressionState::Panic(TPanic /*aPanic*/) |
|
165 { |
|
166 throw std::runtime_error("ucmp"); |
|
167 } |
|
168 |
|
169 TUnicodeCompressor::TUnicodeCompressor(): |
|
170 iInputBufferStart(0), |
|
171 iInputBufferSize(0), |
|
172 iOutputBufferStart(0), |
|
173 iOutputBufferSize(0), |
|
174 iDynamicWindowIndex(0), |
|
175 iOutputStream(NULL), |
|
176 iOutputPointer(NULL), |
|
177 iInput(NULL) |
|
178 { |
|
179 } |
|
180 |
|
181 void TUnicodeCompressor::CompressL(Serialiser& aOutput,MUnicodeSource& aInput, |
|
182 TInt aMaxOutputBytes,TInt aMaxInputWords, |
|
183 TInt* aOutputBytes,TInt* aInputWords) |
|
184 { |
|
185 DoCompressL(&aOutput,NULL,&aInput,aMaxOutputBytes,aMaxInputWords,aOutputBytes,aInputWords); |
|
186 } |
|
187 |
|
188 void TUnicodeCompressor::CompressL(TUint8* aOutput,MUnicodeSource& aInput, |
|
189 TInt aMaxOutputBytes,TInt aMaxInputWords, |
|
190 TInt* aOutputBytes,TInt* aInputWords) |
|
191 { |
|
192 DoCompressL(NULL,aOutput,&aInput,aMaxOutputBytes,aMaxInputWords,aOutputBytes,aInputWords); |
|
193 } |
|
194 |
|
195 TInt TUnicodeCompressor::FlushL(Serialiser& aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes) |
|
196 { |
|
197 DoCompressL(&aOutput,NULL,NULL,aMaxOutputBytes,0,&aOutputBytes,NULL); |
|
198 return iOutputBufferSize; |
|
199 } |
|
200 |
|
201 TInt TUnicodeCompressor::FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes) |
|
202 { |
|
203 DoCompressL(NULL,aOutput,NULL,aMaxOutputBytes,0,&aOutputBytes,NULL); |
|
204 return iOutputBufferSize; |
|
205 } |
|
206 |
|
207 TInt TUnicodeCompressor::CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords) |
|
208 { |
|
209 TInt bytes; |
|
210 TUnicodeCompressor c; |
|
211 c.DoCompressL(NULL,NULL,&aInput,KMaxTInt,aInputWords,&bytes,NULL); |
|
212 return bytes; |
|
213 } |
|
214 |
|
215 // Compress until input or output is exhausted or an exception occurs. |
|
216 void TUnicodeCompressor::DoCompressL(Serialiser* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput, |
|
217 TInt aMaxOutputBytes,TInt aMaxInputWords, |
|
218 TInt* aOutputBytes,TInt* aInputWords) |
|
219 { |
|
220 iOutputStream = aOutputStream; |
|
221 iOutputPointer = aOutputPointer; |
|
222 iInput = aInput; |
|
223 iMaxCompressedBytes = aMaxOutputBytes; |
|
224 iMaxUnicodeWords = aMaxInputWords; |
|
225 iCompressedBytes = iUnicodeWords = 0; |
|
226 FlushOutputBufferL(); |
|
227 if (iInput) |
|
228 { |
|
229 while (iUnicodeWords < iMaxUnicodeWords && iCompressedBytes < iMaxCompressedBytes) |
|
230 { |
|
231 TUint16 x = iInput->ReadUnicodeValueL(); |
|
232 TAction action(x); |
|
233 iInputBuffer[(iInputBufferStart + iInputBufferSize) % EMaxInputBufferSize] = action; |
|
234 iInputBufferSize++; |
|
235 iUnicodeWords++; |
|
236 if (iInputBufferSize == EMaxInputBufferSize) |
|
237 WriteRunL(); |
|
238 } |
|
239 } |
|
240 FlushInputBufferL(); |
|
241 if (aOutputBytes) |
|
242 *aOutputBytes = iCompressedBytes; |
|
243 if (aInputWords) |
|
244 *aInputWords = iUnicodeWords; |
|
245 } |
|
246 |
|
247 TUnicodeCompressor::TAction::TAction(TUint16 aCode): |
|
248 iCode(aCode) |
|
249 { |
|
250 if (TUnicodeCompressionState::EncodeAsIs(aCode)) |
|
251 iTreatment = EPlainASCII; |
|
252 else |
|
253 { |
|
254 iTreatment = TUnicodeCompressionState::DynamicWindowOffsetIndex(aCode); |
|
255 if (iTreatment == -1) |
|
256 { |
|
257 iTreatment = TUnicodeCompressionState::StaticWindowIndex(aCode); |
|
258 if (iTreatment == -1) |
|
259 iTreatment = EPlainUnicode; |
|
260 else |
|
261 iTreatment += EFirstStatic; |
|
262 } |
|
263 } |
|
264 } |
|
265 |
|
266 void TUnicodeCompressor::WriteCharacterFromBuffer() |
|
267 { |
|
268 const TAction& action = iInputBuffer[iInputBufferStart]; |
|
269 iInputBufferSize--; |
|
270 iInputBufferStart = (iInputBufferStart + 1) % EMaxInputBufferSize; |
|
271 WriteCharacter(action); |
|
272 } |
|
273 |
|
274 void TUnicodeCompressor::FlushInputBufferL() |
|
275 { |
|
276 while (iInputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes) |
|
277 WriteRunL(); |
|
278 } |
|
279 |
|
280 void TUnicodeCompressor::WriteRunL() |
|
281 { |
|
282 // Write out any leading characters that can be passed through. |
|
283 if (!iUnicodeMode) |
|
284 while (iInputBufferSize > 0) |
|
285 { |
|
286 const TAction& action = iInputBuffer[iInputBufferStart]; |
|
287 if (action.iTreatment == TAction::EPlainASCII || |
|
288 (action.iCode >= iActiveWindowBase && action.iCode < iActiveWindowBase + 128)) |
|
289 WriteCharacterFromBuffer(); |
|
290 else |
|
291 break; |
|
292 } |
|
293 |
|
294 // Write a run of characters that cannot be passed through. |
|
295 int i; |
|
296 if (iInputBufferSize > 0) |
|
297 { |
|
298 /* |
|
299 Find a run of characters with the same treatment and select that treatment |
|
300 if the run has more than one character. |
|
301 */ |
|
302 int treatment = iInputBuffer[iInputBufferStart].iTreatment; |
|
303 int next_treatment = treatment; |
|
304 int run_size = 1; |
|
305 for (i = 1; i < iInputBufferSize; i++) |
|
306 { |
|
307 int index = (iInputBufferStart + i) % EMaxInputBufferSize; |
|
308 next_treatment = iInputBuffer[index].iTreatment; |
|
309 if (next_treatment != treatment) |
|
310 break; |
|
311 run_size++; |
|
312 } |
|
313 if (run_size > 1) |
|
314 SelectTreatment(treatment); |
|
315 for (i = 0; i < run_size; i++) |
|
316 WriteCharacterFromBuffer(); |
|
317 } |
|
318 |
|
319 FlushOutputBufferL(); |
|
320 } |
|
321 |
|
322 void TUnicodeCompressor::FlushOutputBufferL() |
|
323 { |
|
324 while (iOutputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes) |
|
325 { |
|
326 TUint8 byte = iOutputBuffer[iOutputBufferStart]; |
|
327 if (iOutputPointer) |
|
328 *iOutputPointer++ = byte; |
|
329 else if (iOutputStream) |
|
330 *iOutputStream << byte; |
|
331 iCompressedBytes++; |
|
332 iOutputBufferSize--; |
|
333 iOutputBufferStart = (iOutputBufferStart + 1) % EMaxOutputBufferSize; |
|
334 } |
|
335 } |
|
336 |
|
337 void TUnicodeCompressor::SelectTreatment(TInt aTreatment) |
|
338 { |
|
339 if (aTreatment == TAction::EPlainUnicode) |
|
340 { |
|
341 // Switch to Unicode mode if not there already. |
|
342 if (!iUnicodeMode) |
|
343 { |
|
344 WriteByte(SCU); |
|
345 iUnicodeMode = true; |
|
346 } |
|
347 return; |
|
348 } |
|
349 |
|
350 if (aTreatment == TAction::EPlainASCII) |
|
351 { |
|
352 // Switch to single-byte mode, using the current dynamic window, if not there already. |
|
353 if (iUnicodeMode) |
|
354 { |
|
355 WriteByte(UC0 + iDynamicWindowIndex); |
|
356 iUnicodeMode = false; |
|
357 } |
|
358 return; |
|
359 } |
|
360 |
|
361 if (aTreatment >= TAction::EFirstDynamic && aTreatment <= TAction::ELastDynamic) |
|
362 { |
|
363 TUint32 base = DynamicWindowBase(aTreatment); |
|
364 |
|
365 // Switch to the appropriate dynamic window if it is available; if not, redefine and select dynamic window 4. |
|
366 for (int i = 0; i < EDynamicWindows; i++) |
|
367 if (base == iDynamicWindow[i]) |
|
368 { |
|
369 if (iUnicodeMode) |
|
370 WriteByte(UC0 + i); |
|
371 else if (i != iDynamicWindowIndex) |
|
372 WriteByte(SC0 + i); |
|
373 iUnicodeMode = false; |
|
374 iDynamicWindowIndex = i; |
|
375 iActiveWindowBase = base; |
|
376 return; |
|
377 } |
|
378 if (iUnicodeMode) |
|
379 WriteByte(UD0 + 4); |
|
380 else |
|
381 WriteByte(SD0 + 4); |
|
382 iDynamicWindowIndex = 4; |
|
383 iUnicodeMode = false; |
|
384 WriteByte(aTreatment); |
|
385 iDynamicWindow[4] = base; |
|
386 iActiveWindowBase = base; |
|
387 return; |
|
388 } |
|
389 } |
|
390 |
|
391 // Write a character without changing mode or window. |
|
392 void TUnicodeCompressor::WriteCharacter(const TAction& aAction) |
|
393 { |
|
394 if (iUnicodeMode) |
|
395 WriteUCharacter(aAction.iCode); |
|
396 else |
|
397 WriteSCharacter(aAction); |
|
398 } |
|
399 |
|
400 void TUnicodeCompressor::WriteUCharacter(TUint16 aCode) |
|
401 { |
|
402 // Emit the 'quote Unicode' tag if the character would conflict with a tag. |
|
403 if (aCode >= 0xE000 && aCode <= 0xF2FF) |
|
404 WriteByte(UQU); |
|
405 |
|
406 // Write the Unicode value big-end first. |
|
407 WriteByte((aCode >> 8) & 0xFF); |
|
408 WriteByte(aCode & 0xFF); |
|
409 } |
|
410 |
|
411 void TUnicodeCompressor::WriteByte(TUint32 aByte) |
|
412 { |
|
413 if (iOutputBufferSize >= EMaxOutputBufferSize) |
|
414 Panic(EOutputBufferOverflow); |
|
415 iOutputBuffer[(iOutputBufferStart + iOutputBufferSize) % EMaxOutputBufferSize] = (TUint8)aByte; |
|
416 iOutputBufferSize++; |
|
417 } |
|
418 |
|
419 void TUnicodeCompressor::WriteSCharacter(const TAction& aAction) |
|
420 { |
|
421 // Characters in the range 0x0020..0x007F, plus nul, tab, cr, and lf, can be emitted as their low bytes. |
|
422 if (aAction.iTreatment == TAction::EPlainASCII) |
|
423 { |
|
424 WriteByte(aAction.iCode); |
|
425 return; |
|
426 } |
|
427 |
|
428 // Characters in a static window can be written using SQ<n> plus a byte in the range 0x00-0x7F |
|
429 if (aAction.iTreatment >= TAction::EFirstStatic && aAction.iTreatment <= TAction::ELastStatic) |
|
430 { |
|
431 int window = aAction.iTreatment - TAction::EFirstStatic; |
|
432 WriteByte(SQ0 + window); |
|
433 WriteByte(aAction.iCode); |
|
434 return; |
|
435 } |
|
436 |
|
437 // Characters in the current dynamic window can be written as a byte in the range 0x80-0xFF. |
|
438 if (aAction.iCode >= iActiveWindowBase && aAction.iCode < iActiveWindowBase + 128) |
|
439 { |
|
440 WriteByte(aAction.iCode - iActiveWindowBase + 0x80); |
|
441 return; |
|
442 } |
|
443 |
|
444 // Characters in another dynamic window can be written using SQ<n> plus a byte in the range 0x80-0xFF |
|
445 int i; |
|
446 for (i = 0; i < EDynamicWindows; i++) |
|
447 if (aAction.iCode >= iDynamicWindow[i] && aAction.iCode < iDynamicWindow[i] + 128) |
|
448 { |
|
449 WriteByte(SQ0 + i); |
|
450 WriteByte(aAction.iCode - iDynamicWindow[i] + 0x80); |
|
451 return; |
|
452 } |
|
453 |
|
454 // Other characters can be quoted. |
|
455 WriteByte(SQU); |
|
456 WriteByte((aAction.iCode >> 8) & 0xFF); |
|
457 WriteByte(aAction.iCode & 0xFF); |
|
458 return; |
|
459 } |
|
460 |
|
461 TUnicodeExpander::TUnicodeExpander(): |
|
462 iInputBufferStart(0), |
|
463 iInputBufferSize(0), |
|
464 iOutputBufferStart(0), |
|
465 iOutputBufferSize(0), |
|
466 iOutput(NULL), |
|
467 iInputStream(NULL), |
|
468 iInputPointer(NULL) |
|
469 { |
|
470 } |
|
471 |
|
472 void TUnicodeExpander::ExpandL(MUnicodeSink& aOutput,Deserialiser& aInput, |
|
473 TInt aMaxOutputWords,TInt aMaxInputBytes, |
|
474 TInt* aOutputWords,TInt* aInputBytes) |
|
475 { |
|
476 DoExpandL(&aOutput,&aInput,NULL,aMaxOutputWords,aMaxInputBytes,aOutputWords,aInputBytes); |
|
477 } |
|
478 |
|
479 void TUnicodeExpander::ExpandL(MUnicodeSink& aOutput,const TUint8* aInput, |
|
480 TInt aMaxOutputWords,TInt aMaxInputBytes, |
|
481 TInt* aOutputWords,TInt* aInputBytes) |
|
482 { |
|
483 DoExpandL(&aOutput,NULL,aInput,aMaxOutputWords,aMaxInputBytes,aOutputWords,aInputBytes); |
|
484 } |
|
485 |
|
486 TInt TUnicodeExpander::FlushL(MUnicodeSink& aOutput,TInt aMaxOutputWords,TInt& aOutputWords) |
|
487 { |
|
488 DoExpandL(&aOutput,NULL,NULL,aMaxOutputWords,0,&aOutputWords,NULL); |
|
489 return iOutputBufferSize; |
|
490 } |
|
491 |
|
492 TInt TUnicodeExpander::ExpandedSizeL(Deserialiser& aInput,TInt aInputBytes) |
|
493 { |
|
494 TInt words; |
|
495 TUnicodeExpander e; |
|
496 e.DoExpandL(NULL,&aInput,NULL,KMaxTInt,aInputBytes,&words,NULL); |
|
497 return words; |
|
498 } |
|
499 |
|
500 TInt TUnicodeExpander::ExpandedSizeL(const TUint8* aInput,TInt aInputBytes) |
|
501 { |
|
502 TInt words; |
|
503 TUnicodeExpander e; |
|
504 e.DoExpandL(NULL,NULL,aInput,KMaxTInt,aInputBytes,&words,NULL); |
|
505 return words; |
|
506 } |
|
507 |
|
508 // Expand until input or output is exhausted or an exception occurs. |
|
509 void TUnicodeExpander::DoExpandL(MUnicodeSink* aOutput,Deserialiser* aInputStream,const TUint8* aInputPointer, |
|
510 TInt aMaxOutputWords,TInt aMaxInputBytes, |
|
511 TInt* aOutputWords,TInt* aInputBytes) |
|
512 { |
|
513 iOutput = aOutput; |
|
514 iInputStream = aInputStream; |
|
515 iInputPointer = aInputPointer; |
|
516 iMaxUnicodeWords = aMaxOutputWords; |
|
517 iMaxCompressedBytes = aMaxInputBytes; |
|
518 iUnicodeWords = iCompressedBytes = 0; |
|
519 iInputBufferStart = 0; |
|
520 FlushOutputBufferL(); |
|
521 if (iInputPointer || iInputStream) |
|
522 { |
|
523 while (iUnicodeWords + iOutputBufferSize < iMaxUnicodeWords && iCompressedBytes < iMaxCompressedBytes) |
|
524 HandleByteL(); |
|
525 } |
|
526 if (aOutputWords) |
|
527 *aOutputWords = iUnicodeWords; |
|
528 if (aInputBytes) |
|
529 *aInputBytes = iCompressedBytes; |
|
530 } |
|
531 |
|
532 void TUnicodeExpander::HandleByteL() |
|
533 { |
|
534 TUint8 byte; |
|
535 TBool handled = false; |
|
536 if (ReadByteL(byte)) |
|
537 { |
|
538 if (iUnicodeMode) |
|
539 handled = HandleUByteL(byte); |
|
540 else |
|
541 handled = HandleSByteL(byte); |
|
542 } |
|
543 iInputBufferStart = 0; |
|
544 if (handled) |
|
545 iInputBufferSize = 0; |
|
546 FlushOutputBufferL(); |
|
547 } |
|
548 |
|
549 void TUnicodeExpander::FlushOutputBufferL() |
|
550 { |
|
551 while (iOutputBufferSize > 0 && iUnicodeWords < iMaxUnicodeWords) |
|
552 { |
|
553 if (iOutput) |
|
554 iOutput->WriteUnicodeValueL(iOutputBuffer[iOutputBufferStart]); |
|
555 iUnicodeWords++; |
|
556 iOutputBufferSize--; |
|
557 iOutputBufferStart = (iOutputBufferStart + 1) % EMaxOutputBufferSize; |
|
558 } |
|
559 } |
|
560 |
|
561 TBool TUnicodeExpander::HandleSByteL(TUint8 aByte) |
|
562 { |
|
563 // 'Pass-through' codes. |
|
564 if (TUnicodeCompressionState::EncodeAsIs(aByte)) |
|
565 { |
|
566 WriteChar(aByte); |
|
567 return true; |
|
568 } |
|
569 |
|
570 // Codes 0x80-0xFF select a character from the active window. |
|
571 if (aByte >= 0x80) |
|
572 { |
|
573 WriteChar32(iActiveWindowBase + aByte - 0x80); |
|
574 return true; |
|
575 } |
|
576 |
|
577 // SQU: quote a Unicode character. |
|
578 if (aByte == SQU) |
|
579 return QuoteUnicodeL(); |
|
580 |
|
581 // SCU: switch to Unicode mode. |
|
582 if (aByte == SCU) |
|
583 { |
|
584 iUnicodeMode = true; |
|
585 return true; |
|
586 } |
|
587 |
|
588 // SQn: quote from window n. |
|
589 if (aByte >= SQ0 && aByte <= SQ0 + 7) |
|
590 { |
|
591 int window = aByte - SQ0; |
|
592 TUint8 byte; |
|
593 if (ReadByteL(byte)) |
|
594 { |
|
595 TUint32 c = byte; |
|
596 if (c <= 0x7F) |
|
597 c += iStaticWindow[window]; |
|
598 else |
|
599 c += iDynamicWindow[window] - 0x80; |
|
600 WriteChar32(c); |
|
601 return true; |
|
602 } |
|
603 else |
|
604 return false; |
|
605 } |
|
606 |
|
607 // SCn: switch to dynamic window n. |
|
608 if (aByte >= SC0 && aByte <= SC0 + 7) |
|
609 { |
|
610 iActiveWindowBase = iDynamicWindow[aByte - SC0]; |
|
611 return true; |
|
612 } |
|
613 |
|
614 // SDn: define dynamic window n and switch to it. |
|
615 if (aByte >= SD0 && aByte <= SD0 + 7) |
|
616 return DefineWindowL(aByte - SD0); |
|
617 |
|
618 // SDX: define window in the expansion space. |
|
619 if (aByte == SDX) |
|
620 return DefineExpansionWindowL(); |
|
621 |
|
622 Panic(EUnhandledByte); |
|
623 return false; |
|
624 } |
|
625 |
|
626 TBool TUnicodeExpander::HandleUByteL(TUint8 aByte) |
|
627 { |
|
628 // Plain Unicode; get the low byte and emit the Unicode value. |
|
629 if (aByte <= 0xDF || aByte >= 0xF3) |
|
630 { |
|
631 TUint8 lo; |
|
632 if (ReadByteL(lo)) |
|
633 { |
|
634 TUint16 c = (TUint16)((aByte << 8) | lo); |
|
635 WriteChar(c); |
|
636 return true; |
|
637 } |
|
638 else |
|
639 return false; |
|
640 } |
|
641 |
|
642 // Quote a Unicode character that would otherwise conflict with a tag. |
|
643 if (aByte == UQU) |
|
644 return QuoteUnicodeL(); |
|
645 |
|
646 // UCn: change to single byte mode and select window n. |
|
647 if (aByte >= UC0 && aByte <= UC0 + 7) |
|
648 { |
|
649 iUnicodeMode = false; |
|
650 iActiveWindowBase = iDynamicWindow[aByte - UC0]; |
|
651 return true; |
|
652 } |
|
653 |
|
654 // UDn: define dynamic window n and switch to it. |
|
655 if (aByte >= UD0 && aByte <= UD0 + 7) |
|
656 return DefineWindowL(aByte - UD0); |
|
657 |
|
658 // UDX: define window in the expansion space. |
|
659 if (aByte == UDX) |
|
660 return DefineExpansionWindowL(); |
|
661 |
|
662 Panic(EUnhandledByte); |
|
663 return false; |
|
664 } |
|
665 |
|
666 TBool TUnicodeExpander::QuoteUnicodeL() |
|
667 { |
|
668 TUint8 hi, lo; |
|
669 if (ReadByteL(hi) && ReadByteL(lo)) |
|
670 { |
|
671 TUint16 c = (TUint16)((hi << 8) | lo); |
|
672 WriteChar(c); |
|
673 return true; |
|
674 } |
|
675 else |
|
676 return false; |
|
677 } |
|
678 |
|
679 TBool TUnicodeExpander::DefineWindowL(TInt aIndex) |
|
680 { |
|
681 TUint8 window; |
|
682 if (ReadByteL(window)) |
|
683 { |
|
684 iUnicodeMode = false; |
|
685 iActiveWindowBase = DynamicWindowBase(window); |
|
686 iDynamicWindow[aIndex] = iActiveWindowBase; |
|
687 return true; |
|
688 } |
|
689 else |
|
690 return false; |
|
691 } |
|
692 |
|
693 TBool TUnicodeExpander::DefineExpansionWindowL() |
|
694 { |
|
695 TUint8 hi, lo; |
|
696 if (ReadByteL(hi) && ReadByteL(lo)) |
|
697 { |
|
698 iUnicodeMode = false; |
|
699 iActiveWindowBase = 0x10000 + (0x80 * ((hi & 0x1F) * 0x100 + lo)); |
|
700 iDynamicWindow[hi >> 5] = iActiveWindowBase; |
|
701 return true; |
|
702 } |
|
703 else |
|
704 return false; |
|
705 } |
|
706 |
|
707 // Read either from the buffer (in the case of restarting after source finished in mid-operation) or from the source. |
|
708 TBool TUnicodeExpander::ReadByteL(TUint8& aByte) |
|
709 { |
|
710 if (iInputBufferStart < iInputBufferSize) |
|
711 { |
|
712 aByte = iInputBuffer[iInputBufferStart++]; |
|
713 return true; |
|
714 } |
|
715 else if (iCompressedBytes < iMaxCompressedBytes) |
|
716 { |
|
717 if (iInputPointer) |
|
718 aByte = *iInputPointer++; |
|
719 else |
|
720 *iInputStream >> aByte; |
|
721 iInputBuffer[iInputBufferStart++] = aByte; |
|
722 iInputBufferSize = iInputBufferStart; |
|
723 iCompressedBytes++; |
|
724 return true; |
|
725 } |
|
726 else |
|
727 return false; |
|
728 } |
|
729 |
|
730 void TUnicodeExpander::WriteChar(TUint16 aChar) |
|
731 { |
|
732 if (iOutputBufferSize >= EMaxOutputBufferSize) |
|
733 Panic(EOutputBufferOverflow); |
|
734 iOutputBuffer[(iOutputBufferStart + iOutputBufferSize) % EMaxOutputBufferSize] = aChar; |
|
735 iOutputBufferSize++; |
|
736 } |
|
737 |
|
738 // Write a Unicode character; write using surrogates if in the range 0x10000..0x10FFFF. |
|
739 void TUnicodeExpander::WriteChar32(TUint32 aChar) |
|
740 { |
|
741 if (aChar <= 0xFFFF) |
|
742 WriteChar((TUint16)aChar); |
|
743 else if (aChar <= 0x10FFFF) |
|
744 { |
|
745 aChar -= 0x10000; // reduce to 20-bit value in the range 0x0..0xFFFFF |
|
746 WriteChar((TUint16)(0xD800 + (aChar >> 10))); // first high surrogate + high 10 bits |
|
747 WriteChar((TUint16)(0xDC00 + (aChar & 0x03FF))); // first low surrogate + low 10 bits |
|
748 } |
|
749 else |
|
750 Panic(ENotUnicode); |
|
751 } |
|
752 |
|
753 |
|
754 void TMemoryStreamUnicodeSink::WriteUnicodeValueL(TUint16 aValue) |
|
755 { |
|
756 iStream.write((TUint8*)&aValue,sizeof(TUint16)); |
|
757 } |
|
758 |
|
759 TUint16 TMemoryStreamUnicodeSource::ReadUnicodeValueL() |
|
760 { |
|
761 TUint16 x; |
|
762 iStream.read((TUint8*)&x,sizeof(TUint16)); |
|
763 return x; |
|
764 } |