1 s32ucmp.h |
1 // Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of the License "Symbian Foundation License v1.0" to Symbian Foundation members and "Symbian Foundation End User License Agreement v1.0" to non-members |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.symbianfoundation.org/legal/licencesv10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // Header for the Standard Compression Scheme for Unicode. |
|
15 // This code is compiled only in the Unicode build. |
|
16 // |
|
17 // |
|
18 |
|
19 |
|
20 |
|
21 #ifndef __S32UCMP_H__ |
|
22 #define __S32UCMP_H__ 1 |
|
23 |
|
24 #ifdef _UNICODE |
|
25 |
|
26 #include <e32std.h> |
|
27 #include <s32mem.h> |
|
28 |
|
29 /** |
|
30 * @publishedAll |
|
31 * @released |
|
32 */ |
|
33 class TUnicodeCompressionState |
|
34 { |
|
35 public: |
|
36 TUnicodeCompressionState(); |
|
37 void Reset(); |
|
38 static TInt StaticWindowIndex(TUint16 aCode); |
|
39 static TInt DynamicWindowOffsetIndex(TUint16 aCode); |
|
40 static TUint32 DynamicWindowBase(TInt aOffsetIndex); |
|
41 static TBool EncodeAsIs(TUint16 aCode); |
|
42 |
|
43 enum TPanic |
|
44 { |
|
45 EUnhandledByte, // expander code fails to handle all possible byte codes |
|
46 ENotUnicode, // expander can't handle Unicode values outside range 0x0..0x10FFFF; |
|
47 // that is, 16-bit codes plus 32-bit codes that can be expressed using |
|
48 // 16-bit surrogates |
|
49 EOutputBufferOverflow // output buffer is not big enough |
|
50 }; |
|
51 |
|
52 static void Panic(TPanic aPanic); |
|
53 |
|
54 protected: |
|
55 |
|
56 enum |
|
57 { |
|
58 EStaticWindows = 8, |
|
59 EDynamicWindows = 8, |
|
60 ESpecialBases = 7 |
|
61 }; |
|
62 |
|
63 TBool iUnicodeMode; // TRUE if in Unicode mode as opposed to single-byte mode |
|
64 TUint32 iActiveWindowBase; // base of the active window - bases are 32-bit because they |
|
65 // can be set to the surrogate area, which represents codes |
|
66 // from 0x00010000 to 0x0010FFFF - planes 1-16 of ISO-10646. |
|
67 static const TUint32 iStaticWindow[EStaticWindows]; // bases of the static windows |
|
68 static const TUint32 iDynamicWindowDefault[EDynamicWindows]; // default bases of the dynamic windows |
|
69 static const TUint16 iSpecialBase[ESpecialBases]; // bases for window offsets F9..FF |
|
70 |
|
71 TUint32 iDynamicWindow[EDynamicWindows]; // bases of the dynamic windows |
|
72 TInt iUnicodeWords; // Unicode words processed; read by compressor, written by expander |
|
73 TInt iMaxUnicodeWords; // maximum number of Unicode words to read or write |
|
74 TInt iCompressedBytes; // compressed bytes processed: read by expander, written by compressor |
|
75 TInt iMaxCompressedBytes; // maximum number of compressed bytes to read or write |
|
76 }; |
|
77 |
|
78 /** |
|
79 * @publishedAll |
|
80 * @released |
|
81 */ |
|
82 class MUnicodeSource |
|
83 { |
|
84 public: |
|
85 virtual TUint16 ReadUnicodeValueL() = 0; |
|
86 }; |
|
87 |
|
88 /** |
|
89 * @publishedAll |
|
90 * @released |
|
91 A class to read Unicode values directly from memory. |
|
92 */ |
|
93 class TMemoryUnicodeSource: public MUnicodeSource |
|
94 { |
|
95 public: |
|
96 inline TMemoryUnicodeSource(const TUint16* aPtr); |
|
97 inline TUint16 ReadUnicodeValueL(); |
|
98 |
|
99 private: |
|
100 const TUint16* iPtr; |
|
101 }; |
|
102 |
|
103 /** |
|
104 * @publishedAll |
|
105 * @released |
|
106 A class to read Unicode values from a stream built on a memory object. |
|
107 */ |
|
108 class TMemoryStreamUnicodeSource: public MUnicodeSource |
|
109 { |
|
110 public: |
|
111 inline TMemoryStreamUnicodeSource(RReadStream& aStream); |
|
112 inline TUint16 ReadUnicodeValueL(); |
|
113 |
|
114 private: |
|
115 RReadStream& iStream; |
|
116 }; |
|
117 |
|
118 /** |
|
119 * @publishedAll |
|
120 * @released |
|
121 */ |
|
122 class MUnicodeSink |
|
123 { |
|
124 public: |
|
125 virtual void WriteUnicodeValueL(TUint16 aValue) = 0; |
|
126 }; |
|
127 |
|
128 /** |
|
129 * @publishedAll |
|
130 * @released |
|
131 A class to write Unicode values directly to memory. |
|
132 */ |
|
133 class TMemoryUnicodeSink: public MUnicodeSink |
|
134 { |
|
135 public: |
|
136 inline TMemoryUnicodeSink(TUint16* aPtr); |
|
137 inline void WriteUnicodeValueL(TUint16 aValue); |
|
138 |
|
139 private: |
|
140 TUint16* iPtr; |
|
141 }; |
|
142 |
|
143 /** |
|
144 * @publishedAll |
|
145 * @released |
|
146 A class to write Unicode values to a stream built on a memory object. |
|
147 */ |
|
148 class TMemoryStreamUnicodeSink: public MUnicodeSink |
|
149 { |
|
150 public: |
|
151 inline TMemoryStreamUnicodeSink(RWriteStream& aStream); |
|
152 inline void WriteUnicodeValueL(TUint16 aValue); |
|
153 |
|
154 private: |
|
155 RWriteStream& iStream; |
|
156 }; |
|
157 |
|
158 /** |
|
159 * @publishedAll |
|
160 * @released |
|
161 |
|
162 A class to hold functions to compress text using the Standard Compression Scheme for Unicode. |
|
163 |
|
164 A note on error handling and leaving. |
|
165 |
|
166 Although all the public functions except the constructor can leave, it is possible to guarantee success: that is, |
|
167 guarantee that a call will not leave, and that compression will be completed. To do this, (i) supply a MUnicodeSource |
|
168 object with a non-leaving ReadUnicodeValueL function, such as a TMemoryUnicodeSource; (ii) write output to a |
|
169 RWriteStream with a non-leaving WriteL function, or to a buffer that you already know to be big enough, which can be |
|
170 found out using CompressedSizeL. |
|
171 |
|
172 This guarantee of success is particularly useful when compressing from one memory buffer to another. |
|
173 */ |
|
174 class TUnicodeCompressor: public TUnicodeCompressionState |
|
175 { |
|
176 public: |
|
177 IMPORT_C TUnicodeCompressor(); |
|
178 IMPORT_C void CompressL(RWriteStream& aOutput,MUnicodeSource& aInput, |
|
179 TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt, |
|
180 TInt* aOutputBytes = NULL,TInt* aInputWords = NULL); |
|
181 IMPORT_C void CompressL(TUint8* aOutput,MUnicodeSource& aInput, |
|
182 TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt, |
|
183 TInt* aOutputBytes = NULL,TInt* aInputWords = NULL); |
|
184 IMPORT_C TInt FlushL(RWriteStream& aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes); |
|
185 IMPORT_C TInt FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes); |
|
186 IMPORT_C static TInt CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords); |
|
187 |
|
188 private: |
|
189 |
|
190 // A structure to store a character and its treatment code |
|
191 struct TAction |
|
192 { |
|
193 // Treatment codes: static and dynamic window numbers, plain ASCII or plain Unicode |
|
194 enum |
|
195 { |
|
196 EPlainUnicode = -2, // character cannot be expressed as ASCII or using static or dynamic windows |
|
197 EPlainASCII = -1, // character can be emitted as an ASCII code |
|
198 EFirstDynamic = 0, // values 0..255 are for dynamic windows with offsets at these places in the offset table |
|
199 ELastDynamic = 255, |
|
200 EFirstStatic = 256, // values 256..263 are for static windows 0..7 |
|
201 ELastStatic = 263 |
|
202 }; |
|
203 |
|
204 inline TAction(); |
|
205 TAction(TUint16 aCode); |
|
206 |
|
207 TUint16 iCode; // Unicode value of the character |
|
208 TInt iTreatment; // treatment code: see above |
|
209 }; |
|
210 |
|
211 void DoCompressL(RWriteStream* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput, |
|
212 TInt aMaxCompressedBytes,TInt aMaxUnicodeWords, |
|
213 TInt* aCompressedBytes,TInt* aUnicodeWords); |
|
214 void FlushInputBufferL(); |
|
215 void FlushOutputBufferL(); |
|
216 void WriteRunL(); |
|
217 void WriteCharacter(const TAction& aAction); |
|
218 void WriteSCharacter(const TAction& aAction); |
|
219 void WriteUCharacter(TUint16 aCode); |
|
220 void WriteByte(TUint aByte); |
|
221 void WriteCharacterFromBuffer(); |
|
222 void SelectTreatment(TInt aTreatment); |
|
223 |
|
224 enum |
|
225 { |
|
226 EMaxInputBufferSize = 4, |
|
227 EMaxOutputBufferSize = EMaxInputBufferSize * 3 // no Unicode character can be encoded as more than three bytes |
|
228 }; |
|
229 TAction iInputBuffer[EMaxInputBufferSize]; // circular buffer; queue of Unicode characters to be processed |
|
230 TInt iInputBufferStart; // position of first Unicode character to be processed |
|
231 TInt iInputBufferSize; // characters in the input buffer |
|
232 TUint8 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of compressed bytes to be output |
|
233 TInt iOutputBufferStart; // position of first compressed byte to be output |
|
234 TInt iOutputBufferSize; // characters in the output buffer |
|
235 TInt iDynamicWindowIndex; // index of the current dynamic window |
|
236 RWriteStream* iOutputStream; // if non-null, output is to this stream |
|
237 TUint8* iOutputPointer; // if non-null, output is to memory |
|
238 MUnicodeSource* iInput; // input object |
|
239 }; |
|
240 |
|
241 /** |
|
242 * @publishedAll |
|
243 * @released |
|
244 |
|
245 A class to hold functions to expand text using the Standard Compression Scheme for Unicode. |
|
246 |
|
247 A note on error handling and leaving. |
|
248 |
|
249 Although all the public functions except the constructor can leave, it is possible to guarantee success: that is, |
|
250 guarantee that a call will not leave, and that expansion will be completed. To do this, (i) supply a MUnicodeSink |
|
251 object with a non-leaving WriteUnicodeValueL function, such as a TMemoryUnicodeSink; (ii) read input from a RReadStream |
|
252 with a non-leaving ReadL function; (iii) supply a big enough buffer to write the ouput; you can find out how big by |
|
253 calling ExpandedSizeL, using methods (i) and (ii) to guarantee success. |
|
254 |
|
255 This guarantee of success is particularly useful when expanding from one memory buffer to another. |
|
256 */ |
|
257 class TUnicodeExpander: public TUnicodeCompressionState |
|
258 { |
|
259 public: |
|
260 IMPORT_C TUnicodeExpander(); |
|
261 IMPORT_C void ExpandL(MUnicodeSink& aOutput,RReadStream& aInput, |
|
262 TInt aMaxOutputWords = KMaxTInt,TInt aMaxInputBytes = KMaxTInt, |
|
263 TInt* aOutputWords = NULL,TInt* aInputBytes = NULL); |
|
264 IMPORT_C void ExpandL(MUnicodeSink& aOutput,const TUint8* aInput, |
|
265 TInt aMaxOutputWords = KMaxTInt,TInt aMaxInputBytes = KMaxTInt, |
|
266 TInt* aOutputWords = NULL,TInt* aInputBytes = NULL); |
|
267 IMPORT_C TInt FlushL(MUnicodeSink& aOutput,TInt aMaxOutputWords,TInt& aOutputWords); |
|
268 IMPORT_C static TInt ExpandedSizeL(RReadStream& aInput,TInt aInputBytes); |
|
269 IMPORT_C static TInt ExpandedSizeL(const TUint8* aInput,TInt aInputBytes); |
|
270 |
|
271 private: |
|
272 void DoExpandL(MUnicodeSink* aOutput,RReadStream* aInputStream,const TUint8* aInputPointer, |
|
273 TInt aMaxOutputWords,TInt aMaxInputBytes, |
|
274 TInt* aOutputWords,TInt* aInputBytes); |
|
275 void HandleByteL(); |
|
276 void FlushOutputBufferL(); |
|
277 TBool HandleSByteL(TUint8 aByte); |
|
278 TBool HandleUByteL(TUint8 aByte); |
|
279 TBool ReadByteL(TUint8& aByte); |
|
280 TBool QuoteUnicodeL(); |
|
281 TBool DefineWindowL(TInt aIndex); |
|
282 TBool DefineExpansionWindowL(); |
|
283 void WriteChar(TText aChar); |
|
284 void WriteChar32(TUint aChar); |
|
285 |
|
286 enum |
|
287 { |
|
288 EMaxInputBufferSize = 3, // no Unicode character can be encoded as more than 3 bytes |
|
289 EMaxOutputBufferSize = 2 // no byte can be expanded into more than 2 Unicode characters |
|
290 }; |
|
291 TUint8 iInputBuffer[EMaxInputBufferSize]; // buffer containing a group of compressed bytes representing |
|
292 // a single operation; when an input source ends in the |
|
293 // middle of an operation, this buffer enables the next |
|
294 // expansion to start in the correct state |
|
295 TInt iInputBufferStart; // next read position in the input buffer |
|
296 TInt iInputBufferSize; // bytes in the input buffer |
|
297 TUint16 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of Unicode characters to be output |
|
298 TInt iOutputBufferStart; // position of first Unicode character to be output |
|
299 TInt iOutputBufferSize; // characters in the output buffer |
|
300 MUnicodeSink* iOutput; // output object |
|
301 RReadStream* iInputStream; // if non-null, input is from this stream |
|
302 const TUint8* iInputPointer; // if non-null, input is from memory |
|
303 }; |
|
304 |
|
305 // inline functions start here |
|
306 |
|
307 inline TMemoryUnicodeSource::TMemoryUnicodeSource(const TUint16* aPtr): |
|
308 iPtr(aPtr) |
|
309 { |
|
310 } |
|
311 |
|
312 inline TUint16 TMemoryUnicodeSource::ReadUnicodeValueL() |
|
313 { |
|
314 return *iPtr++; |
|
315 } |
|
316 |
|
317 inline TMemoryStreamUnicodeSource::TMemoryStreamUnicodeSource(RReadStream& aStream): |
|
318 iStream(aStream) |
|
319 { |
|
320 } |
|
321 |
|
322 inline TUint16 TMemoryStreamUnicodeSource::ReadUnicodeValueL() |
|
323 { |
|
324 TUint16 x; |
|
325 iStream.ReadL((TUint8*)&x,sizeof(TUint16)); |
|
326 return x; |
|
327 } |
|
328 |
|
329 inline TMemoryUnicodeSink::TMemoryUnicodeSink(TUint16* aPtr): |
|
330 iPtr(aPtr) |
|
331 { |
|
332 } |
|
333 |
|
334 inline void TMemoryUnicodeSink::WriteUnicodeValueL(TUint16 aValue) |
|
335 { |
|
336 *iPtr++ = aValue; |
|
337 } |
|
338 |
|
339 inline TMemoryStreamUnicodeSink::TMemoryStreamUnicodeSink(RWriteStream& aStream): |
|
340 iStream(aStream) |
|
341 { |
|
342 } |
|
343 |
|
344 inline void TMemoryStreamUnicodeSink::WriteUnicodeValueL(TUint16 aValue) |
|
345 { |
|
346 iStream.WriteL((TUint8*)&aValue,sizeof(TUint16)); |
|
347 } |
|
348 |
|
349 inline TUnicodeCompressor::TAction::TAction(): |
|
350 iCode(0), |
|
351 iTreatment(EPlainUnicode) |
|
352 { |
|
353 } |
|
354 |
|
355 #endif // _UNICODE |
|
356 |
|
357 #endif // __S32UCMP_H__ |