|
1 /* |
|
2 * Copyright (c) 2006-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of the License "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * Ported from ucmp.h |
|
16 * Header for the Standard Compression Scheme for Unicode. |
|
17 * This code is compiled only in the Unicode build. |
|
18 * |
|
19 */ |
|
20 |
|
21 |
|
22 /** |
|
23 @file |
|
24 @internalComponent |
|
25 */ |
|
26 |
|
27 #ifndef __UCMP_H__ |
|
28 #define __UCMP_H__ 1 |
|
29 |
|
30 #include "symbiantypes.h" |
|
31 #include <iostream> |
|
32 |
|
33 |
|
34 typedef std::istream Deserialiser; |
|
35 typedef std::ostream Serialiser; |
|
36 |
|
37 #define KMaxTInt 0x7FFFFFFF |
|
38 /** |
|
39 * @internalComponent |
|
40 */ |
|
41 |
|
42 class TUnicodeCompressionState |
|
43 { |
|
44 public: |
|
45 TUnicodeCompressionState(); |
|
46 void Reset(); |
|
47 static TInt32 StaticWindowIndex(TUint16 aCode); |
|
48 static TInt32 DynamicWindowOffsetIndex(TUint16 aCode); |
|
49 static TUint32 DynamicWindowBase(TInt32 aOffsetIndex); |
|
50 static TBool EncodeAsIs(TUint16 aCode); |
|
51 |
|
52 enum TPanic |
|
53 { |
|
54 EUnhandledByte, // expander code fails to handle all possible byte codes |
|
55 ENotUnicode, // expander can't handle Unicode values outside range 0x0..0x10FFFF; |
|
56 // that is, 16-bit codes plus 32-bit codes that can be expressed using |
|
57 // 16-bit surrogates |
|
58 EOutputBufferOverflow // output buffer is not big enough |
|
59 }; |
|
60 |
|
61 static void Panic(TPanic aPanic); |
|
62 |
|
63 protected: |
|
64 |
|
65 enum |
|
66 { |
|
67 EStaticWindows = 8, |
|
68 EDynamicWindows = 8, |
|
69 ESpecialBases = 7 |
|
70 }; |
|
71 |
|
72 TBool iUnicodeMode; // TRUE if in Unicode mode as opposed to single-byte mode |
|
73 TUint32 iActiveWindowBase; // base of the active window - bases are 32-bit because they |
|
74 // can be set to the surrogate area, which represents codes |
|
75 // from 0x00010000 to 0x0010FFFF - planes 1-16 of ISO-10646. |
|
76 static const TUint32 iStaticWindow[EStaticWindows]; // bases of the static windows |
|
77 static const TUint32 iDynamicWindowDefault[EDynamicWindows]; // default bases of the dynamic windows |
|
78 static const TUint16 iSpecialBase[ESpecialBases]; // bases for window offsets F9..FF |
|
79 |
|
80 TUint32 iDynamicWindow[EDynamicWindows]; // bases of the dynamic windows |
|
81 TInt32 iUnicodeWords; // Unicode words processed; read by compressor, written by expander |
|
82 TInt32 iMaxUnicodeWords; // maximum number of Unicode words to read or write |
|
83 TInt32 iCompressedBytes; // compressed bytes processed: read by expander, written by compressor |
|
84 TInt32 iMaxCompressedBytes; // maximum number of compressed bytes to read or write |
|
85 }; |
|
86 |
|
87 /** |
|
88 * @internalComponent |
|
89 */ |
|
90 |
|
91 class MUnicodeSource |
|
92 { |
|
93 public: |
|
94 virtual TUint16 ReadUnicodeValue() = 0; |
|
95 }; |
|
96 |
|
97 /** |
|
98 * @internalComponent |
|
99 A class to read Unicode values directly from memory. |
|
100 */ |
|
101 class TMemoryUnicodeSource: public MUnicodeSource |
|
102 { |
|
103 public: |
|
104 inline TMemoryUnicodeSource(const TUint16* aPtr); |
|
105 inline TUint16 ReadUnicodeValue(); |
|
106 |
|
107 private: |
|
108 const TUint16* iPtr; |
|
109 }; |
|
110 |
|
111 /**Deserialiser& Deserialiser:: |
|
112 * @internalComponent |
|
113 A class to read Unicode values from a stream built on a memory object. |
|
114 */ |
|
115 class TMemoryStreamUnicodeSource: public MUnicodeSource |
|
116 { |
|
117 public: |
|
118 inline TMemoryStreamUnicodeSource(Deserialiser& aStream); |
|
119 inline TUint16 ReadUnicodeValue(); |
|
120 |
|
121 private: |
|
122 Deserialiser& iStream; |
|
123 }; |
|
124 |
|
125 /** |
|
126 * @internalComponent |
|
127 */ |
|
128 class MUnicodeSink |
|
129 { |
|
130 public: |
|
131 virtual void WriteUnicodeValue(TUint16 aValue) = 0; |
|
132 }; |
|
133 |
|
134 /** |
|
135 * @internalComponent |
|
136 A class to write Unicode values directly to memory. |
|
137 */ |
|
138 class TMemoryUnicodeSink: public MUnicodeSink |
|
139 { |
|
140 public: |
|
141 inline TMemoryUnicodeSink(TUint16* aPtr); |
|
142 inline void WriteUnicodeValue(TUint16 aValue); |
|
143 |
|
144 private: |
|
145 TUint16* iPtr; |
|
146 }; |
|
147 |
|
148 /** |
|
149 * @internalComponent |
|
150 A class to write Unicode values to a stream built on a memory object. |
|
151 */ |
|
152 class TMemoryStreamUnicodeSink: public MUnicodeSink |
|
153 { |
|
154 public: |
|
155 inline TMemoryStreamUnicodeSink(Serialiser& aStream); |
|
156 inline void WriteUnicodeValue(TUint16 aValue); |
|
157 |
|
158 private: |
|
159 Serialiser& iStream; |
|
160 }; |
|
161 |
|
162 /** |
|
163 * @internalComponent |
|
164 A class to hold functions to compress text using the Standard Compression Scheme for Unicode. |
|
165 |
|
166 A note on error handling and leaving. |
|
167 |
|
168 Although all the public functions except the constructor can leave, it is possible to guarantee success: that is, |
|
169 guarantee that a call will not leave, and that compression will be completed. To do this, (i) supply a MUnicodeSource |
|
170 object with a non-leaving ReadUnicodeValue function, such as a TMemoryUnicodeSource; (ii) write output to a |
|
171 Serialiser with a non-leaving Write function, or to a buffer that you already know to be big enough, which can be |
|
172 found out using CompressedSize. |
|
173 |
|
174 This guarantee of success is particularly useful when compressing from one memory buffer to another. |
|
175 */ |
|
176 class TUnicodeCompressor: public TUnicodeCompressionState |
|
177 { |
|
178 public: |
|
179 TUnicodeCompressor(); |
|
180 void Compress(Serialiser& aOutput,MUnicodeSource& aInput, |
|
181 TInt32 aMaxOutputBytes = KMaxTInt,TInt32 aMaxInputWords = KMaxTInt, |
|
182 TInt32* aOutputBytes = 0,TInt32* aInputWords = 0); |
|
183 void Compress(TUint8* aOutput,MUnicodeSource& aInput, |
|
184 TInt32 aMaxOutputBytes = KMaxTInt,TInt32 aMaxInputWords = KMaxTInt, |
|
185 TInt32* aOutputBytes = 0,TInt32* aInputWords = 0); |
|
186 TInt32 Flush(Serialiser& aOutput,TInt32 aMaxOutputBytes,TInt32& aOutputBytes); |
|
187 TInt32 Flush(TUint8* aOutput,TInt32 aMaxOutputBytes,TInt32& aOutputBytes); |
|
188 static TInt32 CompressedSize(MUnicodeSource& aInput,TInt32 aInputWords); |
|
189 |
|
190 private: |
|
191 |
|
192 // A structure to store a character and its treatment code |
|
193 struct TAction |
|
194 { |
|
195 // Treatment codes: static and dynamic window numbers, plain ASCII or plain Unicode |
|
196 enum |
|
197 { |
|
198 EPlainUnicode = -2, // character cannot be expressed as ASCII or using static or dynamic windows |
|
199 EPlainASCII = -1, // character can be emitted as an ASCII code |
|
200 EFirstDynamic = 0, // values 0..255 are for dynamic windows with offsets at these places in the offset table |
|
201 ELastDynamic = 255, |
|
202 EFirstStatic = 256, // values 256..263 are for static windows 0..7 |
|
203 ELastStatic = 263 |
|
204 }; |
|
205 |
|
206 inline TAction(); |
|
207 TAction(TUint16 aCode); |
|
208 |
|
209 TUint16 iCode; // Unicode value of the character |
|
210 TInt32 iTreatment; // treatment code: see above |
|
211 }; |
|
212 |
|
213 void DoCompress(Serialiser* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput, |
|
214 TInt32 aMaxCompressedBytes,TInt32 aMaxUnicodeWords, |
|
215 TInt32* aCompressedBytes,TInt32* aUnicodeWords); |
|
216 void FlushInputBuffer(); |
|
217 void FlushOutputBuffer(); |
|
218 void WriteRun(); |
|
219 void WriteCharacter(const TAction& aAction); |
|
220 void WriteSCharacter( const TAction& aAction); |
|
221 void WriteUCharacter(TUint16 aCode); |
|
222 void WriteByte(TUint32 aByte); |
|
223 void WriteCharacterFromBuffer(); |
|
224 void SelectTreatment(TInt32 aTreatment); |
|
225 |
|
226 enum |
|
227 { |
|
228 EMaxInputBufferSize = 4, |
|
229 EMaxOutputBufferSize = EMaxInputBufferSize * 3 // no Unicode character can be encoded as more than three bytes |
|
230 }; |
|
231 TAction iInputBuffer[EMaxInputBufferSize]; // circular buffer; queue of Unicode characters to be processed |
|
232 TInt32 iInputBufferStart; // position of first Unicode character to be processed |
|
233 TInt32 iInputBufferSize; // characters in the input buffer |
|
234 TUint8 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of compressed bytes to be output |
|
235 TInt32 iOutputBufferStart; // position of first compressed byte to be output |
|
236 TInt32 iOutputBufferSize; // characters in the output buffer |
|
237 TInt32 iDynamicWindowIndex; // index of the current dynamic window |
|
238 Serialiser* iOutputStream; // if non-null, output is to this stream |
|
239 TUint8* iOutputPointer; // if non-null, output is to memory |
|
240 MUnicodeSource* iInput; // input object |
|
241 }; |
|
242 |
|
243 /** |
|
244 * @internalComponent |
|
245 |
|
246 A class to hold functions to expand text using the Standard Compression Scheme for Unicode. |
|
247 |
|
248 A note on error handling and leaving. |
|
249 |
|
250 Although all the puTUnicodeExpanderblic functions except the constructor can leave, it is possible to guarantee success: that is, |
|
251 guarantee that a call will not leave, and that expansion will be completed. To do this, (i) supply a MUnicodeSink |
|
252 object with a non-leaving WriteUnicodeValue function, such as a TMemoryUnicodeSink; (ii) read input from a Deserialiser |
|
253 with a non-leaving Read function; (iii) supply a big enough buffer to write the ouput; you can find out how big by |
|
254 calling ExpandedSize, using methods (i) and (ii) to guarantee success. |
|
255 |
|
256 This guarantee of success is particularly useful when expanding from one memory buffer to another. |
|
257 */ |
|
258 class TUnicodeExpander: public TUnicodeCompressionState |
|
259 { |
|
260 public: |
|
261 TUnicodeExpander(); |
|
262 void Expand(MUnicodeSink& aOutput,Deserialiser& aInput, |
|
263 TInt32 aMaxOutputWords = KMaxTInt,TInt32 aMaxInputBytes = KMaxTInt, |
|
264 TInt32* aOutputWords = 0,TInt32* aInputBytes = 0); |
|
265 void Expand(MUnicodeSink& aOutput,const TUint8* aInput, |
|
266 TInt32 aMaxOutputWords = KMaxTInt,TInt32 aMaxInputBytes = KMaxTInt, |
|
267 TInt32* aOutputWords = 0,TInt32* aInputBytes = 0); |
|
268 TInt32 Flush(MUnicodeSink& aOutput,TInt32 aMaxOutputWords,TInt32& aOutputWords); |
|
269 static TInt32 ExpandedSize(Deserialiser& aInput,TInt32 aInputBytes); |
|
270 static TInt32 ExpandedSize(const TUint8* aInput,TInt32 aInputBytes); |
|
271 |
|
272 private: |
|
273 void DoExpand(MUnicodeSink* aOutput,Deserialiser* aInputStream,const TUint8* aInputPointer, |
|
274 TInt32 aMaxOutputWords,TInt32 aMaxInputBytes, |
|
275 TInt32* aOutputWords,TInt32* aInputBytes); |
|
276 void HandleByte(); |
|
277 void FlushOutputBuffer(); |
|
278 TBool HandleSByte(TUint8 aByte); |
|
279 TBool HandleUByte(TUint8 aByte); |
|
280 TBool ReadByte(TUint8& aByte); |
|
281 TBool QuoteUnicode(); |
|
282 TBool DefineWindow(TInt32 aIndex); |
|
283 TBool DefineExpansionWindow(); |
|
284 void WriteChar(TUint16 aChar); |
|
285 void WriteChar32(TUint32 aChar); |
|
286 |
|
287 enum |
|
288 { |
|
289 EMaxInputBufferSize = 3, // no Unicode character can be encoded as more than 3 bytes |
|
290 EMaxOutputBufferSize = 2 // no byte can be expanded into more than 2 Unicode characters |
|
291 }; |
|
292 TUint8 iInputBuffer[EMaxInputBufferSize]; // buffer containing a group of compressed bytes representing |
|
293 // a single operation; when an input source ends in the |
|
294 // middle of an operation, this buffer enables the next |
|
295 // expansion to start in the correct state |
|
296 TInt32 iInputBufferStart; // next read position in the input buffer |
|
297 TInt32 iInputBufferSize; // bytes in the input buffer |
|
298 TUint16 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of Unicode characters to be output |
|
299 TInt32 iOutputBufferStart; // position of first Unicode character to be output |
|
300 TInt32 iOutputBufferSize; // characters in the output buffer |
|
301 MUnicodeSink* iOutput; // output object |
|
302 Deserialiser* iInputStream; // if non-null, input is from this stream |
|
303 const TUint8* iInputPointer; // if non-null, input is from memory |
|
304 }; |
|
305 |
|
306 // inline functions start here |
|
307 |
|
308 inline TMemoryUnicodeSource::TMemoryUnicodeSource(const TUint16* aPtr): |
|
309 iPtr(aPtr) |
|
310 { |
|
311 } |
|
312 |
|
313 inline TUint16 TMemoryUnicodeSource::ReadUnicodeValue() |
|
314 { |
|
315 return *iPtr++; |
|
316 } |
|
317 |
|
318 inline TMemoryStreamUnicodeSource::TMemoryStreamUnicodeSource(Deserialiser& aStream): |
|
319 iStream(aStream) |
|
320 { |
|
321 } |
|
322 |
|
323 inline TMemoryUnicodeSink::TMemoryUnicodeSink(TUint16* aPtr): |
|
324 iPtr(aPtr) |
|
325 { |
|
326 } |
|
327 |
|
328 inline void TMemoryUnicodeSink::WriteUnicodeValue(TUint16 aValue) |
|
329 { |
|
330 *iPtr++ = aValue; |
|
331 } |
|
332 |
|
333 inline TMemoryStreamUnicodeSink::TMemoryStreamUnicodeSink(Serialiser& aStream): |
|
334 iStream(aStream) |
|
335 { |
|
336 } |
|
337 |
|
338 inline TUnicodeCompressor::TAction::TAction(): |
|
339 iCode(0), |
|
340 iTreatment(EPlainUnicode) |
|
341 { |
|
342 } |
|
343 |
|
344 #endif // __UCMP_H__ |
|
345 |