|
1 /* |
|
2 * Copyright (c) 2006-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of the License "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * Ported from ucmp.h |
|
16 * Header for the Standard Compression Scheme for Unicode. |
|
17 * This code is compiled only in the Unicode build. |
|
18 * |
|
19 */ |
|
20 |
|
21 |
|
22 #ifndef __UCMP_H__ |
|
23 #define __UCMP_H__ 1 |
|
24 |
|
25 #include "symbiantypes.h" |
|
26 |
|
27 class Deserialiser; |
|
28 class Serialiser; |
|
29 |
|
30 #define KMaxTInt 0x7FFFFFFF |
|
31 /** |
|
32 * @file UCMP.H |
|
33 * |
|
34 * @publishedAll |
|
35 * @released |
|
36 */ |
|
37 class TUnicodeCompressionState |
|
38 { |
|
39 public: |
|
40 TUnicodeCompressionState(); |
|
41 void Reset(); |
|
42 static TInt StaticWindowIndex(TUint16 aCode); |
|
43 static TInt DynamicWindowOffsetIndex(TUint16 aCode); |
|
44 static TUint32 DynamicWindowBase(TInt aOffsetIndex); |
|
45 static TBool EncodeAsIs(TUint16 aCode); |
|
46 |
|
47 enum TPanic |
|
48 { |
|
49 EUnhandledByte, // expander code fails to handle all possible byte codes |
|
50 ENotUnicode, // expander can't handle Unicode values outside range 0x0..0x10FFFF; |
|
51 // that is, 16-bit codes plus 32-bit codes that can be expressed using |
|
52 // 16-bit surrogates |
|
53 EOutputBufferOverflow // output buffer is not big enough |
|
54 }; |
|
55 |
|
56 static void Panic(TPanic aPanic); |
|
57 |
|
58 protected: |
|
59 |
|
60 enum |
|
61 { |
|
62 EStaticWindows = 8, |
|
63 EDynamicWindows = 8, |
|
64 ESpecialBases = 7 |
|
65 }; |
|
66 |
|
67 TBool iUnicodeMode; // TRUE if in Unicode mode as opposed to single-byte mode |
|
68 TUint32 iActiveWindowBase; // base of the active window - bases are 32-bit because they |
|
69 // can be set to the surrogate area, which represents codes |
|
70 // from 0x00010000 to 0x0010FFFF - planes 1-16 of ISO-10646. |
|
71 static const TUint32 iStaticWindow[EStaticWindows]; // bases of the static windows |
|
72 static const TUint32 iDynamicWindowDefault[EDynamicWindows]; // default bases of the dynamic windows |
|
73 static const TUint16 iSpecialBase[ESpecialBases]; // bases for window offsets F9..FF |
|
74 |
|
75 TUint32 iDynamicWindow[EDynamicWindows]; // bases of the dynamic windows |
|
76 TInt iUnicodeWords; // Unicode words processed; read by compressor, written by expander |
|
77 TInt iMaxUnicodeWords; // maximum number of Unicode words to read or write |
|
78 TInt iCompressedBytes; // compressed bytes processed: read by expander, written by compressor |
|
79 TInt iMaxCompressedBytes; // maximum number of compressed bytes to read or write |
|
80 }; |
|
81 |
|
82 /** |
|
83 * @publishedAll |
|
84 * @released |
|
85 */ |
|
86 class MUnicodeSource |
|
87 { |
|
88 public: |
|
89 virtual TUint16 ReadUnicodeValueL() = 0; |
|
90 }; |
|
91 |
|
92 /** |
|
93 * @publishedAll |
|
94 * @released |
|
95 A class to read Unicode values directly from memory. |
|
96 */ |
|
97 class TMemoryUnicodeSource: public MUnicodeSource |
|
98 { |
|
99 public: |
|
100 inline TMemoryUnicodeSource(const TUint16* aPtr); |
|
101 inline TUint16 ReadUnicodeValueL(); |
|
102 |
|
103 private: |
|
104 const TUint16* iPtr; |
|
105 }; |
|
106 |
|
107 /**Deserialiser& Deserialiser:: |
|
108 * @publishedAll |
|
109 * @released |
|
110 A class to read Unicode values from a stream built on a memory object. |
|
111 */ |
|
112 class TMemoryStreamUnicodeSource: public MUnicodeSource |
|
113 { |
|
114 public: |
|
115 inline TMemoryStreamUnicodeSource(Deserialiser& aStream); |
|
116 inline TUint16 ReadUnicodeValueL(); |
|
117 |
|
118 private: |
|
119 Deserialiser& iStream; |
|
120 }; |
|
121 |
|
122 /** |
|
123 * @publishedAll |
|
124 * @released |
|
125 */ |
|
126 class MUnicodeSink |
|
127 { |
|
128 public: |
|
129 virtual void WriteUnicodeValueL(TUint16 aValue) = 0; |
|
130 }; |
|
131 |
|
132 /** |
|
133 * @publishedAll |
|
134 * @released |
|
135 A class to write Unicode values directly to memory. |
|
136 */ |
|
137 class TMemoryUnicodeSink: public MUnicodeSink |
|
138 { |
|
139 public: |
|
140 inline TMemoryUnicodeSink(TUint16* aPtr); |
|
141 inline void WriteUnicodeValueL(TUint16 aValue); |
|
142 |
|
143 private: |
|
144 TUint16* iPtr; |
|
145 }; |
|
146 |
|
147 /** |
|
148 * @publishedAll |
|
149 * @released |
|
150 A class to write Unicode values to a stream built on a memory object. |
|
151 */ |
|
152 class TMemoryStreamUnicodeSink: public MUnicodeSink |
|
153 { |
|
154 public: |
|
155 inline TMemoryStreamUnicodeSink(Serialiser& aStream); |
|
156 inline void WriteUnicodeValueL(TUint16 aValue); |
|
157 |
|
158 private: |
|
159 Serialiser& iStream; |
|
160 }; |
|
161 |
|
162 /** |
|
163 * @publishedAll |
|
164 * @released |
|
165 |
|
166 A class to hold functions to compress text using the Standard Compression Scheme for Unicode. |
|
167 |
|
168 A note on error handling and leaving. |
|
169 |
|
170 Although all the public functions except the constructor can leave, it is possible to guarantee success: that is, |
|
171 guarantee that a call will not leave, and that compression will be completed. To do this, (i) supply a MUnicodeSource |
|
172 object with a non-leaving ReadUnicodeValueL function, such as a TMemoryUnicodeSource; (ii) write output to a |
|
173 Serialiser with a non-leaving WriteL function, or to a buffer that you already know to be big enough, which can be |
|
174 found out using CompressedSizeL. |
|
175 |
|
176 This guarantee of success is particularly useful when compressing from one memory buffer to another. |
|
177 */ |
|
178 class TUnicodeCompressor: public TUnicodeCompressionState |
|
179 { |
|
180 public: |
|
181 TUnicodeCompressor(); |
|
182 void CompressL(Serialiser& aOutput,MUnicodeSource& aInput, |
|
183 TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt, |
|
184 TInt* aOutputBytes = 0,TInt* aInputWords = 0); |
|
185 void CompressL(TUint8* aOutput,MUnicodeSource& aInput, |
|
186 TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt, |
|
187 TInt* aOutputBytes = 0,TInt* aInputWords = 0); |
|
188 TInt FlushL(Serialiser& aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes); |
|
189 TInt FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes); |
|
190 static TInt CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords); |
|
191 |
|
192 private: |
|
193 |
|
194 // A structure to store a character and its treatment code |
|
195 struct TAction |
|
196 { |
|
197 // Treatment codes: static and dynamic window numbers, plain ASCII or plain Unicode |
|
198 enum |
|
199 { |
|
200 EPlainUnicode = -2, // character cannot be expressed as ASCII or using static or dynamic windows |
|
201 EPlainASCII = -1, // character can be emitted as an ASCII code |
|
202 EFirstDynamic = 0, // values 0..255 are for dynamic windows with offsets at these places in the offset table |
|
203 ELastDynamic = 255, |
|
204 EFirstStatic = 256, // values 256..263 are for static windows 0..7 |
|
205 ELastStatic = 263 |
|
206 }; |
|
207 |
|
208 inline TAction(); |
|
209 TAction(TUint16 aCode); |
|
210 |
|
211 TUint16 iCode; // Unicode value of the character |
|
212 TInt iTreatment; // treatment code: see above |
|
213 }; |
|
214 |
|
215 void DoCompressL(Serialiser* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput, |
|
216 TInt aMaxCompressedBytes,TInt aMaxUnicodeWords, |
|
217 TInt* aCompressedBytes,TInt* aUnicodeWords); |
|
218 void FlushInputBufferL(); |
|
219 void FlushOutputBufferL(); |
|
220 void WriteRunL(); |
|
221 void WriteCharacter(const TAction& aAction); |
|
222 void WriteSCharacter( const TAction& aAction); |
|
223 void WriteUCharacter(TUint16 aCode); |
|
224 void WriteByte(TUint32 aByte); |
|
225 void WriteCharacterFromBuffer(); |
|
226 void SelectTreatment(TInt aTreatment); |
|
227 |
|
228 enum |
|
229 { |
|
230 EMaxInputBufferSize = 4, |
|
231 EMaxOutputBufferSize = EMaxInputBufferSize * 3 // no Unicode character can be encoded as more than three bytes |
|
232 }; |
|
233 TAction iInputBuffer[EMaxInputBufferSize]; // circular buffer; queue of Unicode characters to be processed |
|
234 TInt iInputBufferStart; // position of first Unicode character to be processed |
|
235 TInt iInputBufferSize; // characters in the input buffer |
|
236 TUint8 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of compressed bytes to be output |
|
237 TInt iOutputBufferStart; // position of first compressed byte to be output |
|
238 TInt iOutputBufferSize; // characters in the output buffer |
|
239 TInt iDynamicWindowIndex; // index of the current dynamic window |
|
240 Serialiser* iOutputStream; // if non-null, output is to this stream |
|
241 TUint8* iOutputPointer; // if non-null, output is to memory |
|
242 MUnicodeSource* iInput; // input object |
|
243 }; |
|
244 |
|
245 /** |
|
246 * @publishedAll |
|
247 * @released |
|
248 |
|
249 A class to hold functions to expand text using the Standard Compression Scheme for Unicode. |
|
250 |
|
251 A note on error handling and leaving. |
|
252 |
|
253 Although all the puTUnicodeExpanderblic functions except the constructor can leave, it is possible to guarantee success: that is, |
|
254 guarantee that a call will not leave, and that expansion will be completed. To do this, (i) supply a MUnicodeSink |
|
255 object with a non-leaving WriteUnicodeValueL function, such as a TMemoryUnicodeSink; (ii) read input from a Deserialiser |
|
256 with a non-leaving ReadL function; (iii) supply a big enough buffer to write the ouput; you can find out how big by |
|
257 calling ExpandedSizeL, using methods (i) and (ii) to guarantee success. |
|
258 |
|
259 This guarantee of success is particularly useful when expanding from one memory buffer to another. |
|
260 */ |
|
261 class TUnicodeExpander: public TUnicodeCompressionState |
|
262 { |
|
263 public: |
|
264 TUnicodeExpander(); |
|
265 void ExpandL(MUnicodeSink& aOutput,Deserialiser& aInput, |
|
266 TInt aMaxOutputWords = KMaxTInt,TInt aMaxInputBytes = KMaxTInt, |
|
267 TInt* aOutputWords = 0,TInt* aInputBytes = 0); |
|
268 void ExpandL(MUnicodeSink& aOutput,const TUint8* aInput, |
|
269 TInt aMaxOutputWords = KMaxTInt,TInt aMaxInputBytes = KMaxTInt, |
|
270 TInt* aOutputWords = 0,TInt* aInputBytes = 0); |
|
271 TInt FlushL(MUnicodeSink& aOutput,TInt aMaxOutputWords,TInt& aOutputWords); |
|
272 static TInt ExpandedSizeL(Deserialiser& aInput,TInt aInputBytes); |
|
273 static TInt ExpandedSizeL(const TUint8* aInput,TInt aInputBytes); |
|
274 |
|
275 private: |
|
276 void DoExpandL(MUnicodeSink* aOutput,Deserialiser* aInputStream,const TUint8* aInputPointer, |
|
277 TInt aMaxOutputWords,TInt aMaxInputBytes, |
|
278 TInt* aOutputWords,TInt* aInputBytes); |
|
279 void HandleByteL(); |
|
280 void FlushOutputBufferL(); |
|
281 TBool HandleSByteL(TUint8 aByte); |
|
282 TBool HandleUByteL(TUint8 aByte); |
|
283 TBool ReadByteL(TUint8& aByte); |
|
284 TBool QuoteUnicodeL(); |
|
285 TBool DefineWindowL(TInt aIndex); |
|
286 TBool DefineExpansionWindowL(); |
|
287 void WriteChar(TUint16 aChar); |
|
288 void WriteChar32(TUint32 aChar); |
|
289 |
|
290 enum |
|
291 { |
|
292 EMaxInputBufferSize = 3, // no Unicode character can be encoded as more than 3 bytes |
|
293 EMaxOutputBufferSize = 2 // no byte can be expanded into more than 2 Unicode characters |
|
294 }; |
|
295 TUint8 iInputBuffer[EMaxInputBufferSize]; // buffer containing a group of compressed bytes representing |
|
296 // a single operation; when an input source ends in the |
|
297 // middle of an operation, this buffer enables the next |
|
298 // expansion to start in the correct state |
|
299 TInt iInputBufferStart; // next read position in the input buffer |
|
300 TInt iInputBufferSize; // bytes in the input buffer |
|
301 TUint16 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of Unicode characters to be output |
|
302 TInt iOutputBufferStart; // position of first Unicode character to be output |
|
303 TInt iOutputBufferSize; // characters in the output buffer |
|
304 MUnicodeSink* iOutput; // output object |
|
305 Deserialiser* iInputStream; // if non-null, input is from this stream |
|
306 const TUint8* iInputPointer; // if non-null, input is from memory |
|
307 }; |
|
308 |
|
309 // inline functions start here |
|
310 |
|
311 inline TMemoryUnicodeSource::TMemoryUnicodeSource(const TUint16* aPtr): |
|
312 iPtr(aPtr) |
|
313 { |
|
314 } |
|
315 |
|
316 inline TUint16 TMemoryUnicodeSource::ReadUnicodeValueL() |
|
317 { |
|
318 return *iPtr++; |
|
319 } |
|
320 |
|
321 inline TMemoryStreamUnicodeSource::TMemoryStreamUnicodeSource(Deserialiser& aStream): |
|
322 iStream(aStream) |
|
323 { |
|
324 } |
|
325 |
|
326 inline TMemoryUnicodeSink::TMemoryUnicodeSink(TUint16* aPtr): |
|
327 iPtr(aPtr) |
|
328 { |
|
329 } |
|
330 |
|
331 inline void TMemoryUnicodeSink::WriteUnicodeValueL(TUint16 aValue) |
|
332 { |
|
333 *iPtr++ = aValue; |
|
334 } |
|
335 |
|
336 inline TMemoryStreamUnicodeSink::TMemoryStreamUnicodeSink(Serialiser& aStream): |
|
337 iStream(aStream) |
|
338 { |
|
339 } |
|
340 |
|
341 inline TUnicodeCompressor::TAction::TAction(): |
|
342 iCode(0), |
|
343 iTreatment(EPlainUnicode) |
|
344 { |
|
345 } |
|
346 |
|
347 #endif // __UCMP_H__ |