|
1 /* |
|
2 * Copyright (c) 2006-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of the License "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * Ported from us_ucmp.cpp. |
|
16 * Implementation of the Standard Compression Scheme for Unicode. |
|
17 * This code is compiled only in the Unicode build. |
|
18 * |
|
19 */ |
|
20 |
|
21 |
|
22 #include "ucmp.h" |
|
23 #include <stdexcept> |
|
24 |
|
25 namespace { |
|
26 const int KErrNotFound = -1; |
|
27 } |
|
28 |
|
29 const TUint32 TUnicodeCompressionState::iStaticWindow[EStaticWindows] = |
|
30 { |
|
31 0x0000, // tags |
|
32 0x0080, // Latin-1 supplement |
|
33 0x0100, // Latin Extended-A |
|
34 0x0300, // Combining Diacritics |
|
35 0x2000, // General Punctuation |
|
36 0x2080, // Currency Symbols |
|
37 0x2100, // Letterlike Symbols and Number Forms |
|
38 0x3000 // CJK Symbols and Punctuation |
|
39 }; |
|
40 |
|
41 const TUint32 TUnicodeCompressionState::iDynamicWindowDefault[EDynamicWindows] = |
|
42 { |
|
43 0x0080, // Latin-1 supplement |
|
44 0x00C0, // parts of Latin-1 supplement and Latin Extended-A |
|
45 0x0400, // Cyrillic |
|
46 0x0600, // Arabic |
|
47 0x0900, // Devanagari |
|
48 0x3040, // Hiragana |
|
49 0x30A0, // Katakana |
|
50 0xFF00 // Fullwidth ASCII |
|
51 }; |
|
52 |
|
53 const TUint16 TUnicodeCompressionState::iSpecialBase[ESpecialBases] = |
|
54 { |
|
55 0x00C0, // Latin 1 letters (not symbols) and some of Extended-A |
|
56 0x0250, // IPA extensions |
|
57 0x0370, // Greek |
|
58 0x0530, // Armenian |
|
59 0x3040, // Hiragana |
|
60 0x30A0, // Katakana |
|
61 0xFF60 // Halfwidth katakana |
|
62 }; |
|
63 |
|
64 // Single-byte mode tag values |
|
65 const TUint8 SQ0 = 0x01; // <byte> quote from window 0 |
|
66 const TUint8 SDX = 0x0B; // <hbyte> <lbyte> define window in expansion area |
|
67 const TUint8 SQU = 0x0E; // <hbyte> <lbyte> quote Unicode value |
|
68 const TUint8 SCU = 0x0F; // switch to Unicode mode |
|
69 const TUint8 SC0 = 0x10; // select dynamic window 0 |
|
70 const TUint8 SD0 = 0x18; // <byte> set dynamic window 0 index to <byte> and select it |
|
71 |
|
72 // Unicode mode tag values |
|
73 const TUint8 UC0 = 0xE0; // select dynamic window 0 and switch to single-byte mode |
|
74 const TUint8 UD0 = 0xE8; // <byte> set dynamic window 0 index to <byte>, select it and switch to |
|
75 // single-byte mode |
|
76 const TUint8 UQU = 0xF0; // <hbyte>, <lbyte> quote Unicode value |
|
77 const TUint8 UDX = 0xF1; // <hbyte>, <lbyte> define window in expansion area and switch to single-byte mode |
|
78 |
|
79 |
|
80 TUnicodeCompressionState::TUnicodeCompressionState(): |
|
81 iUnicodeWords(0), |
|
82 iMaxUnicodeWords(0), |
|
83 iCompressedBytes(0), |
|
84 iMaxCompressedBytes(0) |
|
85 { |
|
86 Reset(); |
|
87 } |
|
88 |
|
89 void TUnicodeCompressionState::Reset() |
|
90 { |
|
91 iUnicodeMode = false; |
|
92 iActiveWindowBase = 0x0080; |
|
93 for (int i = 0; i < EDynamicWindows; i++) |
|
94 iDynamicWindow[i] = iDynamicWindowDefault[i]; |
|
95 } |
|
96 |
|
97 |
|
98 // Return the index of the static window that contains this code, if any, or -1 if there is none. |
|
99 TInt32 TUnicodeCompressionState::StaticWindowIndex(TUint16 aCode) |
|
100 { |
|
101 for (TInt32 i = 0; i < EStaticWindows; i++) |
|
102 if (aCode >= iStaticWindow[i] && aCode < iStaticWindow[i] + 128) |
|
103 return i; |
|
104 return -1; |
|
105 } |
|
106 |
|
107 /* |
|
108 If aCode can be accommodated in one of the legal dynamic windows, return the index of that window |
|
109 in the offset table. If not return KErrNotFound. |
|
110 */ |
|
111 TInt32 TUnicodeCompressionState::DynamicWindowOffsetIndex(TUint16 aCode) |
|
112 { |
|
113 if (aCode < 0x0080) |
|
114 return KErrNotFound; |
|
115 if (aCode >= 0x3400 && aCode <= 0xDFFF) |
|
116 return KErrNotFound; |
|
117 |
|
118 /* |
|
119 Prefer sections that cross half-->WriteUint8(block boundaries. These are better adapted to actual text. |
|
120 They are represented by offset indices 0xf9..0xff. |
|
121 */ |
|
122 for (int i = 0; i < ESpecialBases; i++) |
|
123 if (aCode >= iSpecialBase[i] && aCode < iSpecialBase[i] + 128) |
|
124 return 0xF9 + i; |
|
125 |
|
126 /* |
|
127 Offset indices 0x01..0x67 represent half blocks from 0x0080 to 0x3380 and |
|
128 0x68..0xA7 represent half blocks from 0xE000 to 0xFF80. |
|
129 */ |
|
130 if (aCode >= 0xE000) |
|
131 aCode -= 0xAC00; |
|
132 return aCode / 0x80; |
|
133 } |
|
134 |
|
135 // Return the base of the window represented by offset index <n>. Return 0 if the offset index is illegal. |
|
136 TUint32 TUnicodeCompressionState::DynamicWindowBase(TInt32 aOffsetIndex) |
|
137 { |
|
138 if (aOffsetIndex >= 0xF9 && aOffsetIndex <= 0xFF) |
|
139 { |
|
140 /*->WriteUint8( |
|
141 WARNING: don't optimise the following two lines by replacing them with |
|
142 'return iSpecialBase[aOffsetIndex - 0xF9];'. To do so would re-introduce a problem |
|
143 in ARM builds caused by optimisation and consequent erroneous fixing up |
|
144 of the array base: see defect EDNGASR-4AGJQX in ER5U defects. |
|
145 */ |
|
146 int special_base_index = aOffsetIndex - 0xF9; |
|
147 return iSpecialBase[special_base_index]; |
|
148 } |
|
149 if (aOffsetIndex >= 0x01 && aOffsetIndex <= 0x67) |
|
150 return aOffsetIndex * 0x80; |
|
151 if (aOffsetIndex >= 0x68 && aOffsetIndex <= 0xA7) |
|
152 return aOffsetIndex * 0x80 + 0xAC00; |
|
153 return 0; |
|
154 } |
|
155 |
|
156 TBool TUnicodeCompressionState::EncodeAsIs(TUint16 aCode) |
|
157 { |
|
158 return aCode == 0x0000 || aCode == 0x0009 || aCode == 0x000A || aCode == 0x000D || |
|
159 (aCode >= 0x0020 && aCode <= 0x007F); |
|
160 } |
|
161 |
|
162 void TUnicodeCompressionState::Panic(TPanic /*aPanic*/) |
|
163 { |
|
164 throw std::runtime_error("ucmp"); |
|
165 } |
|
166 |
|
167 TUnicodeCompressor::TUnicodeCompressor(): |
|
168 iInputBufferStart(0), |
|
169 iInputBufferSize(0), |
|
170 iOutputBufferStart(0), |
|
171 iOutputBufferSize(0), |
|
172 iDynamicWindowIndex(0), |
|
173 iOutputStream(NULL), |
|
174 iOutputPointer(NULL), |
|
175 iInput(NULL) |
|
176 { |
|
177 } |
|
178 |
|
179 void TUnicodeCompressor::Compress(Serialiser& aOutput,MUnicodeSource& aInput, |
|
180 TInt32 aMaxOutputBytes,TInt32 aMaxInputWords, |
|
181 TInt32* aOutputBytes,TInt32* aInputWords) |
|
182 { |
|
183 DoCompress(&aOutput,NULL,&aInput,aMaxOutputBytes,aMaxInputWords,aOutputBytes,aInputWords); |
|
184 } |
|
185 |
|
186 void TUnicodeCompressor::Compress(TUint8* aOutput,MUnicodeSource& aInput, |
|
187 TInt32 aMaxOutputBytes,TInt32 aMaxInputWords, |
|
188 TInt32* aOutputBytes,TInt32* aInputWords) |
|
189 { |
|
190 DoCompress(NULL,aOutput,&aInput,aMaxOutputBytes,aMaxInputWords,aOutputBytes,aInputWords); |
|
191 } |
|
192 |
|
193 TInt32 TUnicodeCompressor::Flush(Serialiser& aOutput,TInt32 aMaxOutputBytes,TInt32& aOutputBytes) |
|
194 { |
|
195 DoCompress(&aOutput,NULL,NULL,aMaxOutputBytes,0,&aOutputBytes,NULL); |
|
196 return iOutputBufferSize; |
|
197 } |
|
198 |
|
199 TInt32 TUnicodeCompressor::Flush(TUint8* aOutput,TInt32 aMaxOutputBytes,TInt32& aOutputBytes) |
|
200 { |
|
201 DoCompress(NULL,aOutput,NULL,aMaxOutputBytes,0,&aOutputBytes,NULL); |
|
202 return iOutputBufferSize; |
|
203 } |
|
204 |
|
205 TInt32 TUnicodeCompressor::CompressedSize(MUnicodeSource& aInput,TInt32 aInputWords) |
|
206 { |
|
207 TInt32 bytes; |
|
208 TUnicodeCompressor c; |
|
209 c.DoCompress(NULL,NULL,&aInput,KMaxTInt,aInputWords,&bytes,NULL); |
|
210 return bytes; |
|
211 } |
|
212 |
|
213 // Compress until input or output is exhausted or an exception occurs. |
|
214 void TUnicodeCompressor::DoCompress(Serialiser* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput, |
|
215 TInt32 aMaxOutputBytes,TInt32 aMaxInputWords, |
|
216 TInt32* aOutputBytes,TInt32* aInputWords) |
|
217 { |
|
218 iOutputStream = aOutputStream; |
|
219 iOutputPointer = aOutputPointer; |
|
220 iInput = aInput; |
|
221 iMaxCompressedBytes = aMaxOutputBytes; |
|
222 iMaxUnicodeWords = aMaxInputWords; |
|
223 iCompressedBytes = iUnicodeWords = 0; |
|
224 FlushOutputBuffer(); |
|
225 if (iInput) |
|
226 { |
|
227 while (iUnicodeWords < iMaxUnicodeWords && iCompressedBytes < iMaxCompressedBytes) |
|
228 { |
|
229 TUint16 x = iInput->ReadUnicodeValue(); |
|
230 TAction action(x); |
|
231 iInputBuffer[(iInputBufferStart + iInputBufferSize) % EMaxInputBufferSize] = action; |
|
232 iInputBufferSize++; |
|
233 iUnicodeWords++; |
|
234 if (iInputBufferSize == EMaxInputBufferSize) |
|
235 WriteRun(); |
|
236 } |
|
237 } |
|
238 FlushInputBuffer(); |
|
239 if (aOutputBytes) |
|
240 *aOutputBytes = iCompressedBytes; |
|
241 if (aInputWords) |
|
242 *aInputWords = iUnicodeWords; |
|
243 } |
|
244 |
|
245 TUnicodeCompressor::TAction::TAction(TUint16 aCode): |
|
246 iCode(aCode) |
|
247 { |
|
248 if (TUnicodeCompressionState::EncodeAsIs(aCode)) |
|
249 iTreatment = EPlainASCII; |
|
250 else |
|
251 { |
|
252 iTreatment = TUnicodeCompressionState::DynamicWindowOffsetIndex(aCode); |
|
253 if (iTreatment == -1) |
|
254 { |
|
255 iTreatment = TUnicodeCompressionState::StaticWindowIndex(aCode); |
|
256 if (iTreatment == -1) |
|
257 iTreatment = EPlainUnicode; |
|
258 else |
|
259 iTreatment += EFirstStatic; |
|
260 } |
|
261 } |
|
262 } |
|
263 |
|
264 void TUnicodeCompressor::WriteCharacterFromBuffer() |
|
265 { |
|
266 const TAction& action = iInputBuffer[iInputBufferStart]; |
|
267 iInputBufferSize--; |
|
268 iInputBufferStart = (iInputBufferStart + 1) % EMaxInputBufferSize; |
|
269 WriteCharacter(action); |
|
270 } |
|
271 |
|
272 void TUnicodeCompressor::FlushInputBuffer() |
|
273 { |
|
274 while (iInputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes) |
|
275 WriteRun(); |
|
276 } |
|
277 |
|
278 void TUnicodeCompressor::WriteRun() |
|
279 { |
|
280 // Write out any leading characters that can be passed through. |
|
281 if (!iUnicodeMode) |
|
282 while (iInputBufferSize > 0) |
|
283 { |
|
284 const TAction& action = iInputBuffer[iInputBufferStart]; |
|
285 if (action.iTreatment == TAction::EPlainASCII || |
|
286 (action.iCode >= iActiveWindowBase && action.iCode < iActiveWindowBase + 128)) |
|
287 WriteCharacterFromBuffer(); |
|
288 else |
|
289 break; |
|
290 } |
|
291 |
|
292 // Write a run of characters that cannot be passed through. |
|
293 int i; |
|
294 if (iInputBufferSize > 0) |
|
295 { |
|
296 /* |
|
297 Find a run of characters with the same treatment and select that treatment |
|
298 if the run has more than one character. |
|
299 */ |
|
300 int treatment = iInputBuffer[iInputBufferStart].iTreatment; |
|
301 int next_treatment = treatment; |
|
302 int run_size = 1; |
|
303 for (i = 1; i < iInputBufferSize; i++) |
|
304 { |
|
305 int index = (iInputBufferStart + i) % EMaxInputBufferSize; |
|
306 next_treatment = iInputBuffer[index].iTreatment; |
|
307 if (next_treatment != treatment) |
|
308 break; |
|
309 run_size++; |
|
310 } |
|
311 if (run_size > 1) |
|
312 SelectTreatment(treatment); |
|
313 for (i = 0; i < run_size; i++) |
|
314 WriteCharacterFromBuffer(); |
|
315 } |
|
316 |
|
317 FlushOutputBuffer(); |
|
318 } |
|
319 |
|
320 void TUnicodeCompressor::FlushOutputBuffer() |
|
321 { |
|
322 while (iOutputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes) |
|
323 { |
|
324 TUint8 byte = iOutputBuffer[iOutputBufferStart]; |
|
325 if (iOutputPointer) |
|
326 *iOutputPointer++ = byte; |
|
327 else if (iOutputStream) |
|
328 *iOutputStream << byte; |
|
329 iCompressedBytes++; |
|
330 iOutputBufferSize--; |
|
331 iOutputBufferStart = (iOutputBufferStart + 1) % EMaxOutputBufferSize; |
|
332 } |
|
333 } |
|
334 |
|
335 void TUnicodeCompressor::SelectTreatment(TInt32 aTreatment) |
|
336 { |
|
337 if (aTreatment == TAction::EPlainUnicode) |
|
338 { |
|
339 // Switch to Unicode mode if not there already. |
|
340 if (!iUnicodeMode) |
|
341 { |
|
342 WriteByte(SCU); |
|
343 iUnicodeMode = true; |
|
344 } |
|
345 return; |
|
346 } |
|
347 |
|
348 if (aTreatment == TAction::EPlainASCII) |
|
349 { |
|
350 // Switch to single-byte mode, using the current dynamic window, if not there already. |
|
351 if (iUnicodeMode) |
|
352 { |
|
353 WriteByte(UC0 + iDynamicWindowIndex); |
|
354 iUnicodeMode = false; |
|
355 } |
|
356 return; |
|
357 } |
|
358 |
|
359 if (aTreatment >= TAction::EFirstDynamic && aTreatment <= TAction::ELastDynamic) |
|
360 { |
|
361 TUint32 base = DynamicWindowBase(aTreatment); |
|
362 |
|
363 // Switch to the appropriate dynamic window if it is available; if not, redefine and select dynamic window 4. |
|
364 for (int i = 0; i < EDynamicWindows; i++) |
|
365 if (base == iDynamicWindow[i]) |
|
366 { |
|
367 if (iUnicodeMode) |
|
368 WriteByte(UC0 + i); |
|
369 else if (i != iDynamicWindowIndex) |
|
370 WriteByte(SC0 + i); |
|
371 iUnicodeMode = false; |
|
372 iDynamicWindowIndex = i; |
|
373 iActiveWindowBase = base; |
|
374 return; |
|
375 } |
|
376 if (iUnicodeMode) |
|
377 WriteByte(UD0 + 4); |
|
378 else |
|
379 WriteByte(SD0 + 4); |
|
380 iDynamicWindowIndex = 4; |
|
381 iUnicodeMode = false; |
|
382 WriteByte(aTreatment); |
|
383 iDynamicWindow[4] = base; |
|
384 iActiveWindowBase = base; |
|
385 return; |
|
386 } |
|
387 } |
|
388 |
|
389 // Write a character without changing mode or window. |
|
390 void TUnicodeCompressor::WriteCharacter(const TAction& aAction) |
|
391 { |
|
392 if (iUnicodeMode) |
|
393 WriteUCharacter(aAction.iCode); |
|
394 else |
|
395 WriteSCharacter(aAction); |
|
396 } |
|
397 |
|
398 void TUnicodeCompressor::WriteUCharacter(TUint16 aCode) |
|
399 { |
|
400 // Emit the 'quote Unicode' tag if the character would conflict with a tag. |
|
401 if (aCode >= 0xE000 && aCode <= 0xF2FF) |
|
402 WriteByte(UQU); |
|
403 |
|
404 // Write the Unicode value big-end first. |
|
405 WriteByte((aCode >> 8) & 0xFF); |
|
406 WriteByte(aCode & 0xFF); |
|
407 } |
|
408 |
|
409 void TUnicodeCompressor::WriteByte(TUint32 aByte) |
|
410 { |
|
411 if (iOutputBufferSize >= EMaxOutputBufferSize) |
|
412 Panic(EOutputBufferOverflow); |
|
413 iOutputBuffer[(iOutputBufferStart + iOutputBufferSize) % EMaxOutputBufferSize] = (TUint8)aByte; |
|
414 iOutputBufferSize++; |
|
415 } |
|
416 |
|
417 void TUnicodeCompressor::WriteSCharacter(const TAction& aAction) |
|
418 { |
|
419 // Characters in the range 0x0020..0x007F, plus nul, tab, cr, and lf, can be emitted as their low bytes. |
|
420 if (aAction.iTreatment == TAction::EPlainASCII) |
|
421 { |
|
422 WriteByte(aAction.iCode); |
|
423 return; |
|
424 } |
|
425 |
|
426 // Characters in a static window can be written using SQ<n> plus a byte in the range 0x00-0x7F |
|
427 if (aAction.iTreatment >= TAction::EFirstStatic && aAction.iTreatment <= TAction::ELastStatic) |
|
428 { |
|
429 int window = aAction.iTreatment - TAction::EFirstStatic; |
|
430 WriteByte(SQ0 + window); |
|
431 WriteByte(aAction.iCode); |
|
432 return; |
|
433 } |
|
434 |
|
435 // Characters in the current dynamic window can be written as a byte in the range 0x80-0xFF. |
|
436 if (aAction.iCode >= iActiveWindowBase && aAction.iCode < iActiveWindowBase + 128) |
|
437 { |
|
438 WriteByte(aAction.iCode - iActiveWindowBase + 0x80); |
|
439 return; |
|
440 } |
|
441 |
|
442 // Characters in another dynamic window can be written using SQ<n> plus a byte in the range 0x80-0xFF |
|
443 int i; |
|
444 for (i = 0; i < EDynamicWindows; i++) |
|
445 if (aAction.iCode >= iDynamicWindow[i] && aAction.iCode < iDynamicWindow[i] + 128) |
|
446 { |
|
447 WriteByte(SQ0 + i); |
|
448 WriteByte(aAction.iCode - iDynamicWindow[i] + 0x80); |
|
449 return; |
|
450 } |
|
451 |
|
452 // Other characters can be quoted. |
|
453 WriteByte(SQU); |
|
454 WriteByte((aAction.iCode >> 8) & 0xFF); |
|
455 WriteByte(aAction.iCode & 0xFF); |
|
456 return; |
|
457 } |
|
458 |
|
459 TUnicodeExpander::TUnicodeExpander(): |
|
460 iInputBufferStart(0), |
|
461 iInputBufferSize(0), |
|
462 iOutputBufferStart(0), |
|
463 iOutputBufferSize(0), |
|
464 iOutput(NULL), |
|
465 iInputStream(NULL), |
|
466 iInputPointer(NULL) |
|
467 { |
|
468 } |
|
469 |
|
470 void TUnicodeExpander::Expand(MUnicodeSink& aOutput,Deserialiser& aInput, |
|
471 TInt32 aMaxOutputWords,TInt32 aMaxInputBytes, |
|
472 TInt32* aOutputWords,TInt32* aInputBytes) |
|
473 { |
|
474 DoExpand(&aOutput,&aInput,NULL,aMaxOutputWords,aMaxInputBytes,aOutputWords,aInputBytes); |
|
475 } |
|
476 |
|
477 void TUnicodeExpander::Expand(MUnicodeSink& aOutput,const TUint8* aInput, |
|
478 TInt32 aMaxOutputWords,TInt32 aMaxInputBytes, |
|
479 TInt32* aOutputWords,TInt32* aInputBytes) |
|
480 { |
|
481 DoExpand(&aOutput,NULL,aInput,aMaxOutputWords,aMaxInputBytes,aOutputWords,aInputBytes); |
|
482 } |
|
483 |
|
484 TInt32 TUnicodeExpander::Flush(MUnicodeSink& aOutput,TInt32 aMaxOutputWords,TInt32& aOutputWords) |
|
485 { |
|
486 DoExpand(&aOutput,NULL,NULL,aMaxOutputWords,0,&aOutputWords,NULL); |
|
487 return iOutputBufferSize; |
|
488 } |
|
489 |
|
490 TInt32 TUnicodeExpander::ExpandedSize(Deserialiser& aInput,TInt32 aInputBytes) |
|
491 { |
|
492 TInt32 words; |
|
493 TUnicodeExpander e; |
|
494 e.DoExpand(NULL,&aInput,NULL,KMaxTInt,aInputBytes,&words,NULL); |
|
495 return words; |
|
496 } |
|
497 |
|
498 TInt32 TUnicodeExpander::ExpandedSize(const TUint8* aInput,TInt32 aInputBytes) |
|
499 { |
|
500 TInt32 words; |
|
501 TUnicodeExpander e; |
|
502 e.DoExpand(NULL,NULL,aInput,KMaxTInt,aInputBytes,&words,NULL); |
|
503 return words; |
|
504 } |
|
505 |
|
506 // Expand until input or output is exhausted or an exception occurs. |
|
507 void TUnicodeExpander::DoExpand(MUnicodeSink* aOutput,Deserialiser* aInputStream,const TUint8* aInputPointer, |
|
508 TInt32 aMaxOutputWords,TInt32 aMaxInputBytes, |
|
509 TInt32* aOutputWords,TInt32* aInputBytes) |
|
510 { |
|
511 iOutput = aOutput; |
|
512 iInputStream = aInputStream; |
|
513 iInputPointer = aInputPointer; |
|
514 iMaxUnicodeWords = aMaxOutputWords; |
|
515 iMaxCompressedBytes = aMaxInputBytes; |
|
516 iUnicodeWords = iCompressedBytes = 0; |
|
517 iInputBufferStart = 0; |
|
518 FlushOutputBuffer(); |
|
519 if (iInputPointer || iInputStream) |
|
520 { |
|
521 while (iUnicodeWords + iOutputBufferSize < iMaxUnicodeWords && iCompressedBytes < iMaxCompressedBytes) |
|
522 HandleByte(); |
|
523 } |
|
524 if (aOutputWords) |
|
525 *aOutputWords = iUnicodeWords; |
|
526 if (aInputBytes) |
|
527 *aInputBytes = iCompressedBytes; |
|
528 } |
|
529 |
|
530 void TUnicodeExpander::HandleByte() |
|
531 { |
|
532 TUint8 byte; |
|
533 TBool handled = false; |
|
534 if (ReadByte(byte)) |
|
535 { |
|
536 if (iUnicodeMode) |
|
537 handled = HandleUByte(byte); |
|
538 else |
|
539 handled = HandleSByte(byte); |
|
540 } |
|
541 iInputBufferStart = 0; |
|
542 if (handled) |
|
543 iInputBufferSize = 0; |
|
544 FlushOutputBuffer(); |
|
545 } |
|
546 |
|
547 void TUnicodeExpander::FlushOutputBuffer() |
|
548 { |
|
549 while (iOutputBufferSize > 0 && iUnicodeWords < iMaxUnicodeWords) |
|
550 { |
|
551 if (iOutput) |
|
552 iOutput->WriteUnicodeValue(iOutputBuffer[iOutputBufferStart]); |
|
553 iUnicodeWords++; |
|
554 iOutputBufferSize--; |
|
555 iOutputBufferStart = (iOutputBufferStart + 1) % EMaxOutputBufferSize; |
|
556 } |
|
557 } |
|
558 |
|
559 TBool TUnicodeExpander::HandleSByte(TUint8 aByte) |
|
560 { |
|
561 // 'Pass-through' codes. |
|
562 if (TUnicodeCompressionState::EncodeAsIs(aByte)) |
|
563 { |
|
564 WriteChar(aByte); |
|
565 return true; |
|
566 } |
|
567 |
|
568 // Codes 0x80-0xFF select a character from the active window. |
|
569 if (aByte >= 0x80) |
|
570 { |
|
571 WriteChar32(iActiveWindowBase + aByte - 0x80); |
|
572 return true; |
|
573 } |
|
574 |
|
575 // SQU: quote a Unicode character. |
|
576 if (aByte == SQU) |
|
577 return QuoteUnicode(); |
|
578 |
|
579 // SCU: switch to Unicode mode. |
|
580 if (aByte == SCU) |
|
581 { |
|
582 iUnicodeMode = true; |
|
583 return true; |
|
584 } |
|
585 |
|
586 // SQn: quote from window n. |
|
587 if (aByte >= SQ0 && aByte <= SQ0 + 7) |
|
588 { |
|
589 int window = aByte - SQ0; |
|
590 TUint8 byte; |
|
591 if (ReadByte(byte)) |
|
592 { |
|
593 TUint32 c = byte; |
|
594 if (c <= 0x7F) |
|
595 c += iStaticWindow[window]; |
|
596 else |
|
597 c += iDynamicWindow[window] - 0x80; |
|
598 WriteChar32(c); |
|
599 return true; |
|
600 } |
|
601 else |
|
602 return false; |
|
603 } |
|
604 |
|
605 // SCn: switch to dynamic window n. |
|
606 if (aByte >= SC0 && aByte <= SC0 + 7) |
|
607 { |
|
608 iActiveWindowBase = iDynamicWindow[aByte - SC0]; |
|
609 return true; |
|
610 } |
|
611 |
|
612 // SDn: define dynamic window n and switch to it. |
|
613 if (aByte >= SD0 && aByte <= SD0 + 7) |
|
614 return DefineWindow(aByte - SD0); |
|
615 |
|
616 // SDX: define window in the expansion space. |
|
617 if (aByte == SDX) |
|
618 return DefineExpansionWindow(); |
|
619 |
|
620 Panic(EUnhandledByte); |
|
621 return false; |
|
622 } |
|
623 |
|
624 TBool TUnicodeExpander::HandleUByte(TUint8 aByte) |
|
625 { |
|
626 // Plain Unicode; get the low byte and emit the Unicode value. |
|
627 if (aByte <= 0xDF || aByte >= 0xF3) |
|
628 { |
|
629 TUint8 lo; |
|
630 if (ReadByte(lo)) |
|
631 { |
|
632 TUint16 c = (TUint16)((aByte << 8) | lo); |
|
633 WriteChar(c); |
|
634 return true; |
|
635 } |
|
636 else |
|
637 return false; |
|
638 } |
|
639 |
|
640 // Quote a Unicode character that would otherwise conflict with a tag. |
|
641 if (aByte == UQU) |
|
642 return QuoteUnicode(); |
|
643 |
|
644 // UCn: change to single byte mode and select window n. |
|
645 if (aByte >= UC0 && aByte <= UC0 + 7) |
|
646 { |
|
647 iUnicodeMode = false; |
|
648 iActiveWindowBase = iDynamicWindow[aByte - UC0]; |
|
649 return true; |
|
650 } |
|
651 |
|
652 // UDn: define dynamic window n and switch to it. |
|
653 if (aByte >= UD0 && aByte <= UD0 + 7) |
|
654 return DefineWindow(aByte - UD0); |
|
655 |
|
656 // UDX: define window in the expansion space. |
|
657 if (aByte == UDX) |
|
658 return DefineExpansionWindow(); |
|
659 |
|
660 Panic(EUnhandledByte); |
|
661 return false; |
|
662 } |
|
663 |
|
664 TBool TUnicodeExpander::QuoteUnicode() |
|
665 { |
|
666 TUint8 hi, lo; |
|
667 if (ReadByte(hi) && ReadByte(lo)) |
|
668 { |
|
669 TUint16 c = (TUint16)((hi << 8) | lo); |
|
670 WriteChar(c); |
|
671 return true; |
|
672 } |
|
673 else |
|
674 return false; |
|
675 } |
|
676 |
|
677 TBool TUnicodeExpander::DefineWindow(TInt32 aIndex) |
|
678 { |
|
679 TUint8 window; |
|
680 if (ReadByte(window)) |
|
681 { |
|
682 iUnicodeMode = false; |
|
683 iActiveWindowBase = DynamicWindowBase(window); |
|
684 iDynamicWindow[aIndex] = iActiveWindowBase; |
|
685 return true; |
|
686 } |
|
687 else |
|
688 return false; |
|
689 } |
|
690 |
|
691 TBool TUnicodeExpander::DefineExpansionWindow() |
|
692 { |
|
693 TUint8 hi, lo; |
|
694 if (ReadByte(hi) && ReadByte(lo)) |
|
695 { |
|
696 iUnicodeMode = false; |
|
697 iActiveWindowBase = 0x10000 + (0x80 * ((hi & 0x1F) * 0x100 + lo)); |
|
698 iDynamicWindow[hi >> 5] = iActiveWindowBase; |
|
699 return true; |
|
700 } |
|
701 else |
|
702 return false; |
|
703 } |
|
704 |
|
705 // Read either from the buffer (in the case of restarting after source finished in mid-operation) or from the source. |
|
706 TBool TUnicodeExpander::ReadByte(TUint8& aByte) |
|
707 { |
|
708 if (iInputBufferStart < iInputBufferSize) |
|
709 { |
|
710 aByte = iInputBuffer[iInputBufferStart++]; |
|
711 return true; |
|
712 } |
|
713 else if (iCompressedBytes < iMaxCompressedBytes) |
|
714 { |
|
715 if (iInputPointer) |
|
716 aByte = *iInputPointer++; |
|
717 else |
|
718 *iInputStream >> aByte; |
|
719 iInputBuffer[iInputBufferStart++] = aByte; |
|
720 iInputBufferSize = iInputBufferStart; |
|
721 iCompressedBytes++; |
|
722 return true; |
|
723 } |
|
724 else |
|
725 return false; |
|
726 } |
|
727 |
|
728 void TUnicodeExpander::WriteChar(TUint16 aChar) |
|
729 { |
|
730 if (iOutputBufferSize >= EMaxOutputBufferSize) |
|
731 Panic(EOutputBufferOverflow); |
|
732 iOutputBuffer[(iOutputBufferStart + iOutputBufferSize) % EMaxOutputBufferSize] = aChar; |
|
733 iOutputBufferSize++; |
|
734 } |
|
735 |
|
736 // Write a Unicode character; write using surrogates if in the range 0x10000..0x10FFFF. |
|
737 void TUnicodeExpander::WriteChar32(TUint32 aChar) |
|
738 { |
|
739 if (aChar <= 0xFFFF) |
|
740 WriteChar((TUint16)aChar); |
|
741 else if (aChar <= 0x10FFFF) |
|
742 { |
|
743 aChar -= 0x10000; // reduce to 20-bit value in the range 0x0..0xFFFFF |
|
744 WriteChar((TUint16)(0xD800 + (aChar >> 10))); // first high surrogate + high 10 bits |
|
745 WriteChar((TUint16)(0xDC00 + (aChar & 0x03FF))); // first low surrogate + low 10 bits |
|
746 } |
|
747 else |
|
748 Panic(ENotUnicode); |
|
749 } |
|
750 |
|
751 |
|
752 void TMemoryStreamUnicodeSink::WriteUnicodeValue(TUint16 aValue) |
|
753 { |
|
754 iStream.write((const char*)&aValue,sizeof(TUint16)); |
|
755 } |
|
756 |
|
757 TUint16 TMemoryStreamUnicodeSource::ReadUnicodeValue() |
|
758 { |
|
759 TUint16 x; |
|
760 iStream.read((char *)&x,sizeof(TUint16)); |
|
761 return x; |
|
762 } |
|
763 |