|
1 /* |
|
2 * Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of the License "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 #include "UNICODE_COMPRESSOR.H" |
|
20 |
|
21 void CompressUnicode(unsigned char* aOutputBuffer, int& aOutputLength, int aMaximumOutputLength, const UTF16* aInputBuffer, int aInputLength) |
|
22 { |
|
23 TUnicodeCompressor unicodeCompressor; |
|
24 TMemoryUnicodeSource decompressedUnicode(aInputBuffer); |
|
25 TInt numberOfInputElementsConsumed; |
|
26 unicodeCompressor.CompressL(aOutputBuffer, decompressedUnicode, aMaximumOutputLength, aInputLength, &aOutputLength, &numberOfInputElementsConsumed); |
|
27 TInt temp; |
|
28 unicodeCompressor.FlushL(aOutputBuffer, aMaximumOutputLength, temp); |
|
29 aOutputLength+=temp; |
|
30 if (aOutputLength<aMaximumOutputLength && numberOfInputElementsConsumed!=aInputLength) |
|
31 { |
|
32 ::Panic(1); |
|
33 } |
|
34 } |
|
35 |
|
36 // the rest of the contents of this file is a selective copy of base\store\ustrm\US_UCMP.CPP |
|
37 |
|
38 const TUint32 TUnicodeCompressionState::iStaticWindow[EStaticWindows] = |
|
39 { |
|
40 0x0000, // tags |
|
41 0x0080, // Latin-1 supplement |
|
42 0x0100, // Latin Extended-A |
|
43 0x0300, // Combining Diacritics |
|
44 0x2000, // General Punctuation |
|
45 0x2080, // Currency Symbols |
|
46 0x2100, // Letterlike Symbols and Number Forms |
|
47 0x3000 // CJK Symbols and Punctuation |
|
48 }; |
|
49 |
|
50 const TUint32 TUnicodeCompressionState::iDynamicWindowDefault[EDynamicWindows] = |
|
51 { |
|
52 0x0080, // Latin-1 supplement |
|
53 0x00C0, // parts of Latin-1 supplement and Latin Extended-A |
|
54 0x0400, // Cyrillic |
|
55 0x0600, // Arabic |
|
56 0x0900, // Devanagari |
|
57 0x3040, // Hiragana |
|
58 0x30A0, // Katakana |
|
59 0xFF00 // Fullwidth ASCII |
|
60 }; |
|
61 |
|
62 const TUint16 TUnicodeCompressionState::iSpecialBase[ESpecialBases] = |
|
63 { |
|
64 0x00C0, // Latin 1 letters (not symbols) and some of Extended-A |
|
65 0x0250, // IPA extensions |
|
66 0x0370, // Greek |
|
67 0x0530, // Armenian |
|
68 0x3040, // Hiragana |
|
69 0x30A0, // Katakana |
|
70 0xFF60 // Halfwidth katakana |
|
71 }; |
|
72 |
|
73 // Single-byte mode tag values |
|
74 const TUint8 SQ0 = 0x01; // <byte> quote from window 0 |
|
75 const TUint8 SDX = 0x0B; // <hbyte> <lbyte> define window in expansion area |
|
76 const TUint8 SQU = 0x0E; // <hbyte> <lbyte> quote Unicode value |
|
77 const TUint8 SCU = 0x0F; // switch to Unicode mode |
|
78 const TUint8 SC0 = 0x10; // select dynamic window 0 |
|
79 const TUint8 SD0 = 0x18; // <byte> set dynamic window 0 index to <byte> and select it |
|
80 |
|
81 // Unicode mode tag values |
|
82 const TUint8 UC0 = 0xE0; // select dynamic window 0 and switch to single-byte mode |
|
83 const TUint8 UD0 = 0xE8; // <byte> set dynamic window 0 index to <byte>, select it and switch to |
|
84 // single-byte mode |
|
85 const TUint8 UQU = 0xF0; // <hbyte>, <lbyte> quote Unicode value |
|
86 const TUint8 UDX = 0xF1; // <hbyte>, <lbyte> define window in expansion area and switch to single-byte mode |
|
87 |
|
88 TUnicodeCompressionState::TUnicodeCompressionState(): |
|
89 iUnicodeWords(0), |
|
90 iMaxUnicodeWords(0), |
|
91 iCompressedBytes(0), |
|
92 iMaxCompressedBytes(0) |
|
93 { |
|
94 Reset(); |
|
95 } |
|
96 |
|
97 void TUnicodeCompressionState::Reset() |
|
98 { |
|
99 iUnicodeMode = FALSE; |
|
100 iActiveWindowBase = 0x0080; |
|
101 for (int i = 0; i < EDynamicWindows; i++) |
|
102 iDynamicWindow[i] = iDynamicWindowDefault[i]; |
|
103 } |
|
104 |
|
105 |
|
106 // Return the index of the static window that contains this code, if any, or -1 if there is none. |
|
107 TInt TUnicodeCompressionState::StaticWindowIndex(TUint16 aCode) |
|
108 { |
|
109 for (TInt i = 0; i < EStaticWindows; i++) |
|
110 if (aCode >= iStaticWindow[i] && aCode < iStaticWindow[i] + 128) |
|
111 return i; |
|
112 return -1; |
|
113 } |
|
114 |
|
115 /* |
|
116 If aCode can be accommodated in one of the legal dynamic windows, return the index of that window |
|
117 in the offset table. If not return KErrNotFound. |
|
118 */ |
|
119 TInt TUnicodeCompressionState::DynamicWindowOffsetIndex(TUint16 aCode) |
|
120 { |
|
121 if (aCode < 0x0080) |
|
122 return KErrNotFound; |
|
123 if (aCode >= 0x3400 && aCode <= 0xDFFF) |
|
124 return KErrNotFound; |
|
125 |
|
126 /* |
|
127 Prefer sections that cross half-block boundaries. These are better adapted to actual text. |
|
128 They are represented by offset indices 0xf9..0xff. |
|
129 */ |
|
130 for (int i = 0; i < ESpecialBases; i++) |
|
131 if (aCode >= iSpecialBase[i] && aCode < iSpecialBase[i] + 128) |
|
132 return 0xF9 + i; |
|
133 |
|
134 /* |
|
135 Offset indices 0x01..0x67 represent half blocks from 0x0080 to 0x3380 and |
|
136 0x68..0xA7 represent half blocks from 0xE000 to 0xFF80. |
|
137 */ |
|
138 if (aCode >= 0xE000) |
|
139 aCode -= 0xAC00; |
|
140 return aCode / 0x80; |
|
141 } |
|
142 |
|
143 // Return the base of the window represented by offset index <n>. Return 0 if the offset index is illegal. |
|
144 TUint32 TUnicodeCompressionState::DynamicWindowBase(TInt aOffsetIndex) |
|
145 { |
|
146 if (aOffsetIndex >= 0xF9 && aOffsetIndex <= 0xFF) |
|
147 { |
|
148 /* |
|
149 WARNING: don't optimise the following two lines by replacing them with |
|
150 'return iSpecialBase[aOffsetIndex - 0xF9];'. To do so would re-introduce a defect |
|
151 in ARM builds caused by optimisation and consequent erroneous fixing up |
|
152 of the array base: see defect EDNGASR-4AGJQX in ER5U defects. |
|
153 */ |
|
154 int special_base_index = aOffsetIndex - 0xF9; |
|
155 return iSpecialBase[special_base_index]; |
|
156 } |
|
157 if (aOffsetIndex >= 0x01 && aOffsetIndex <= 0x67) |
|
158 return aOffsetIndex * 0x80; |
|
159 if (aOffsetIndex >= 0x68 && aOffsetIndex <= 0xA7) |
|
160 return aOffsetIndex * 0x80 + 0xAC00; |
|
161 return 0; |
|
162 } |
|
163 |
|
164 TBool TUnicodeCompressionState::EncodeAsIs(TUint16 aCode) |
|
165 { |
|
166 return aCode == 0x0000 || aCode == 0x0009 || aCode == 0x000A || aCode == 0x000D || |
|
167 (aCode >= 0x0020 && aCode <= 0x007F); |
|
168 } |
|
169 |
|
170 void TUnicodeCompressionState::Panic(TPanic aPanic) |
|
171 { |
|
172 ::Panic(100+aPanic); |
|
173 } |
|
174 |
|
175 EXPORT_C TUnicodeCompressor::TUnicodeCompressor(): |
|
176 iInputBufferStart(0), |
|
177 iInputBufferSize(0), |
|
178 iOutputBufferStart(0), |
|
179 iOutputBufferSize(0), |
|
180 iDynamicWindowIndex(0), |
|
181 iOutputStream(NULL), |
|
182 iOutputPointer(NULL), |
|
183 iInput(NULL) |
|
184 { |
|
185 } |
|
186 |
|
187 EXPORT_C void TUnicodeCompressor::CompressL(TUint8* aOutput,MUnicodeSource& aInput, |
|
188 TInt aMaxOutputBytes,TInt aMaxInputWords, |
|
189 TInt* aOutputBytes,TInt* aInputWords) |
|
190 { |
|
191 DoCompressL(NULL,aOutput,&aInput,aMaxOutputBytes,aMaxInputWords,aOutputBytes,aInputWords); |
|
192 } |
|
193 |
|
194 EXPORT_C TInt TUnicodeCompressor::FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes) |
|
195 { |
|
196 DoCompressL(NULL,aOutput,NULL,aMaxOutputBytes,0,&aOutputBytes,NULL); |
|
197 return iOutputBufferSize; |
|
198 } |
|
199 |
|
200 EXPORT_C TInt TUnicodeCompressor::CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords) |
|
201 { |
|
202 TInt bytes; |
|
203 TUnicodeCompressor c; |
|
204 c.DoCompressL(NULL,NULL,&aInput,KMaxTInt,aInputWords,&bytes,NULL); |
|
205 return bytes; |
|
206 } |
|
207 |
|
208 // Compress until input or output is exhausted or an exception occurs. |
|
209 void TUnicodeCompressor::DoCompressL(RWriteStream* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput, |
|
210 TInt aMaxOutputBytes,TInt aMaxInputWords, |
|
211 TInt* aOutputBytes,TInt* aInputWords) |
|
212 { |
|
213 iOutputStream = aOutputStream; |
|
214 iOutputPointer = aOutputPointer; |
|
215 iInput = aInput; |
|
216 iMaxCompressedBytes = aMaxOutputBytes; |
|
217 iMaxUnicodeWords = aMaxInputWords; |
|
218 iCompressedBytes = iUnicodeWords = 0; |
|
219 FlushOutputBufferL(); |
|
220 if (iInput) |
|
221 { |
|
222 while (iUnicodeWords < iMaxUnicodeWords && iCompressedBytes < iMaxCompressedBytes) |
|
223 { |
|
224 TUint16 x = iInput->ReadUnicodeValueL(); |
|
225 TAction action(x); |
|
226 iInputBuffer[(iInputBufferStart + iInputBufferSize) % EMaxInputBufferSize] = action; |
|
227 iInputBufferSize++; |
|
228 iUnicodeWords++; |
|
229 if (iInputBufferSize == EMaxInputBufferSize) |
|
230 WriteRunL(); |
|
231 } |
|
232 } |
|
233 FlushInputBufferL(); |
|
234 if (aOutputBytes) |
|
235 *aOutputBytes = iCompressedBytes; |
|
236 if (aInputWords) |
|
237 *aInputWords = iUnicodeWords; |
|
238 } |
|
239 |
|
240 TUnicodeCompressor::TAction::TAction(TUint16 aCode): |
|
241 iCode(aCode) |
|
242 { |
|
243 if (TUnicodeCompressionState::EncodeAsIs(aCode)) |
|
244 iTreatment = EPlainASCII; |
|
245 else |
|
246 { |
|
247 iTreatment = TUnicodeCompressionState::DynamicWindowOffsetIndex(aCode); |
|
248 if (iTreatment == -1) |
|
249 { |
|
250 iTreatment = TUnicodeCompressionState::StaticWindowIndex(aCode); |
|
251 if (iTreatment == -1) |
|
252 iTreatment = EPlainUnicode; |
|
253 else |
|
254 iTreatment += EFirstStatic; |
|
255 } |
|
256 } |
|
257 } |
|
258 |
|
259 void TUnicodeCompressor::WriteCharacterFromBuffer() |
|
260 { |
|
261 const TAction& action = iInputBuffer[iInputBufferStart]; |
|
262 iInputBufferSize--; |
|
263 iInputBufferStart = (iInputBufferStart + 1) % EMaxInputBufferSize; |
|
264 WriteCharacter(action); |
|
265 } |
|
266 |
|
267 void TUnicodeCompressor::FlushInputBufferL() |
|
268 { |
|
269 while (iInputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes) |
|
270 WriteRunL(); |
|
271 } |
|
272 |
|
273 void TUnicodeCompressor::WriteRunL() |
|
274 { |
|
275 // Write out any leading characters that can be passed through. |
|
276 if (!iUnicodeMode) |
|
277 while (iInputBufferSize > 0) |
|
278 { |
|
279 const TAction& action = iInputBuffer[iInputBufferStart]; |
|
280 if (action.iTreatment == TAction::EPlainASCII || |
|
281 (action.iCode >= iActiveWindowBase && action.iCode < iActiveWindowBase + 128)) |
|
282 WriteCharacterFromBuffer(); |
|
283 else |
|
284 break; |
|
285 } |
|
286 |
|
287 // Write a run of characters that cannot be passed through. |
|
288 int i; |
|
289 if (iInputBufferSize > 0) |
|
290 { |
|
291 /* |
|
292 Find a run of characters with the same treatment and select that treatment |
|
293 if the run has more than one character. |
|
294 */ |
|
295 int treatment = iInputBuffer[iInputBufferStart].iTreatment; |
|
296 int next_treatment = treatment; |
|
297 int run_size = 1; |
|
298 for (i = 1; i < iInputBufferSize; i++) |
|
299 { |
|
300 int index = (iInputBufferStart + i) % EMaxInputBufferSize; |
|
301 next_treatment = iInputBuffer[index].iTreatment; |
|
302 if (next_treatment != treatment) |
|
303 break; |
|
304 run_size++; |
|
305 } |
|
306 if (run_size > 1) |
|
307 SelectTreatment(treatment); |
|
308 for (i = 0; i < run_size; i++) |
|
309 WriteCharacterFromBuffer(); |
|
310 } |
|
311 |
|
312 FlushOutputBufferL(); |
|
313 } |
|
314 |
|
315 void TUnicodeCompressor::FlushOutputBufferL() |
|
316 { |
|
317 while (iOutputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes) |
|
318 { |
|
319 TUint8 byte = iOutputBuffer[iOutputBufferStart]; |
|
320 if (iOutputPointer) |
|
321 *iOutputPointer++ = byte; |
|
322 else if (iOutputStream) |
|
323 Panic(ECannotUseStreams); |
|
324 iCompressedBytes++; |
|
325 iOutputBufferSize--; |
|
326 iOutputBufferStart = (iOutputBufferStart + 1) % EMaxOutputBufferSize; |
|
327 } |
|
328 } |
|
329 |
|
330 void TUnicodeCompressor::SelectTreatment(TInt aTreatment) |
|
331 { |
|
332 if (aTreatment == TAction::EPlainUnicode) |
|
333 { |
|
334 // Switch to Unicode mode if not there already. |
|
335 if (!iUnicodeMode) |
|
336 { |
|
337 WriteByte(SCU); |
|
338 iUnicodeMode = TRUE; |
|
339 } |
|
340 return; |
|
341 } |
|
342 |
|
343 if (aTreatment == TAction::EPlainASCII) |
|
344 { |
|
345 // Switch to single-byte mode, using the current dynamic window, if not there already. |
|
346 if (iUnicodeMode) |
|
347 { |
|
348 WriteByte(UC0 + iDynamicWindowIndex); |
|
349 iUnicodeMode = FALSE; |
|
350 } |
|
351 return; |
|
352 } |
|
353 |
|
354 if (aTreatment >= TAction::EFirstDynamic && aTreatment <= TAction::ELastDynamic) |
|
355 { |
|
356 TUint32 base = DynamicWindowBase(aTreatment); |
|
357 |
|
358 // Switch to the appropriate dynamic window if it is available; if not, redefine and select dynamic window 4. |
|
359 for (int i = 0; i < EDynamicWindows; i++) |
|
360 if (base == iDynamicWindow[i]) |
|
361 { |
|
362 if (iUnicodeMode) |
|
363 WriteByte(UC0 + i); |
|
364 else if (i != iDynamicWindowIndex) |
|
365 WriteByte(SC0 + i); |
|
366 iUnicodeMode = FALSE; |
|
367 iDynamicWindowIndex = i; |
|
368 iActiveWindowBase = base; |
|
369 return; |
|
370 } |
|
371 if (iUnicodeMode) |
|
372 WriteByte(UD0 + 4); |
|
373 else |
|
374 WriteByte(SD0 + 4); |
|
375 iDynamicWindowIndex = 4; |
|
376 iUnicodeMode = FALSE; |
|
377 WriteByte(aTreatment); |
|
378 iDynamicWindow[4] = base; |
|
379 iActiveWindowBase = base; |
|
380 return; |
|
381 } |
|
382 } |
|
383 |
|
384 // Write a character without changing mode or window. |
|
385 void TUnicodeCompressor::WriteCharacter(const TAction& aAction) |
|
386 { |
|
387 if (iUnicodeMode) |
|
388 WriteUCharacter(aAction.iCode); |
|
389 else |
|
390 WriteSCharacter(aAction); |
|
391 } |
|
392 |
|
393 void TUnicodeCompressor::WriteUCharacter(TUint16 aCode) |
|
394 { |
|
395 // Emit the 'quote Unicode' tag if the character would conflict with a tag. |
|
396 if (aCode >= 0xE000 && aCode <= 0xF2FF) |
|
397 WriteByte(UQU); |
|
398 |
|
399 // Write the Unicode value big-end first. |
|
400 WriteByte((aCode >> 8) & 0xFF); |
|
401 WriteByte(aCode & 0xFF); |
|
402 } |
|
403 |
|
404 void TUnicodeCompressor::WriteByte(TUint aByte) |
|
405 { |
|
406 if (iOutputBufferSize >= EMaxOutputBufferSize) |
|
407 Panic(EOutputBufferOverflow); |
|
408 iOutputBuffer[(iOutputBufferStart + iOutputBufferSize) % EMaxOutputBufferSize] = (TUint8)aByte; |
|
409 iOutputBufferSize++; |
|
410 } |
|
411 |
|
412 void TUnicodeCompressor::WriteSCharacter(const TAction& aAction) |
|
413 { |
|
414 // Characters in the range 0x0020..0x007F, plus nul, tab, cr, and lf, can be emitted as their low bytes. |
|
415 if (aAction.iTreatment == TAction::EPlainASCII) |
|
416 { |
|
417 WriteByte(aAction.iCode); |
|
418 return; |
|
419 } |
|
420 |
|
421 // Characters in a static window can be written using SQ<n> plus a byte in the range 0x00-0x7F |
|
422 if (aAction.iTreatment >= TAction::EFirstStatic && aAction.iTreatment <= TAction::ELastStatic) |
|
423 { |
|
424 int window = aAction.iTreatment - TAction::EFirstStatic; |
|
425 WriteByte(SQ0 + window); |
|
426 WriteByte(aAction.iCode); |
|
427 return; |
|
428 } |
|
429 |
|
430 // Characters in the current dynamic window can be written as a byte in the range 0x80-0xFF. |
|
431 if (aAction.iCode >= iActiveWindowBase && aAction.iCode < iActiveWindowBase + 128) |
|
432 { |
|
433 WriteByte(aAction.iCode - iActiveWindowBase + 0x80); |
|
434 return; |
|
435 } |
|
436 |
|
437 // Characters in another dynamic window can be written using SQ<n> plus a byte in the range 0x80-0xFF |
|
438 int i; |
|
439 for (i = 0; i < EDynamicWindows; i++) |
|
440 if (aAction.iCode >= iDynamicWindow[i] && aAction.iCode < iDynamicWindow[i] + 128) |
|
441 { |
|
442 WriteByte(SQ0 + i); |
|
443 WriteByte(aAction.iCode - iDynamicWindow[i] + 0x80); |
|
444 return; |
|
445 } |
|
446 |
|
447 // Other characters can be quoted. |
|
448 WriteByte(SQU); |
|
449 WriteByte((aAction.iCode >> 8) & 0xFF); |
|
450 WriteByte(aAction.iCode & 0xFF); |
|
451 return; |
|
452 } |
|
453 |