|
1 /* |
|
2 * Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of the License "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * Header for the Standard Compression Scheme for Unicode. |
|
16 * This code is compiled only in the Unicode build. |
|
17 * |
|
18 */ |
|
19 |
|
20 |
|
21 |
|
22 |
|
23 #ifndef __S32UCMP_H__ |
|
24 #define __S32UCMP_H__ 1 |
|
25 |
|
26 /** |
|
27 * @file |
|
28 * @internalComponent |
|
29 */ |
|
30 |
|
31 #ifdef _UNICODE |
|
32 |
|
33 #include <e32base.h> |
|
34 #include <s32file.h> |
|
35 |
|
36 class TUnicodeCompressionState |
|
37 { |
|
38 public: |
|
39 TUnicodeCompressionState(); |
|
40 void Reset(); |
|
41 static TInt StaticWindowIndex(TUint16 aCode); |
|
42 static TInt DynamicWindowOffsetIndex(TUint16 aCode); |
|
43 static TUint32 DynamicWindowBase(TInt aOffsetIndex); |
|
44 static TBool EncodeAsIs(TUint16 aCode); |
|
45 |
|
46 enum TPanic |
|
47 { |
|
48 EUnhandledByte, // expander code fails to handle all possible byte codes |
|
49 ENotUnicode, // expander can't handle Unicode values outside range 0x0..0x10FFFF; |
|
50 // that is, 16-bit codes plus 32-bit codes that can be expressed using |
|
51 // 16-bit surrogates |
|
52 EOutputBufferOverflow // output buffer is not big enough |
|
53 }; |
|
54 |
|
55 static void Panic(TPanic aPanic); |
|
56 |
|
57 protected: |
|
58 |
|
59 enum |
|
60 { |
|
61 EStaticWindows = 8, |
|
62 EDynamicWindows = 8, |
|
63 ESpecialBases = 7 |
|
64 }; |
|
65 |
|
66 TBool iUnicodeMode; // TRUE if in Unicode mode as opposed to single-byte mode |
|
67 TUint32 iActiveWindowBase; // base of the active window - bases are 32-bit because they |
|
68 // can be set to the surrogate area, which represents codes |
|
69 // from 0x00010000 to 0x0010FFFF - planes 1-16 of ISO-10646. |
|
70 static const TUint32 iStaticWindow[EStaticWindows]; // bases of the static windows |
|
71 static const TUint32 iDynamicWindowDefault[EDynamicWindows]; // default bases of the dynamic windows |
|
72 static const TUint16 iSpecialBase[ESpecialBases]; // bases for window offsets F9..FF |
|
73 |
|
74 TUint32 iDynamicWindow[EDynamicWindows]; // bases of the dynamic windows |
|
75 TInt iUnicodeWords; // Unicode words processed; read by compressor, written by expander |
|
76 TInt iMaxUnicodeWords; // maximum number of Unicode words to read or write |
|
77 TInt iCompressedBytes; // compressed bytes processed: read by expander, written by compressor |
|
78 TInt iMaxCompressedBytes; // maximum number of compressed bytes to read or write |
|
79 }; |
|
80 |
|
81 class MUnicodeSource |
|
82 { |
|
83 public: |
|
84 virtual TUint16 ReadUnicodeValueL() = 0; |
|
85 }; |
|
86 |
|
87 /** |
|
88 A class to read Unicode values directly from memory. |
|
89 */ |
|
90 class TMemoryUnicodeSource: public MUnicodeSource |
|
91 { |
|
92 public: |
|
93 inline TMemoryUnicodeSource(const TUint16* aPtr); |
|
94 inline TUint16 ReadUnicodeValueL(); |
|
95 |
|
96 private: |
|
97 const TUint16* iPtr; |
|
98 }; |
|
99 |
|
100 /** |
|
101 A class to read Unicode values from a stream built on a memory object. |
|
102 */ |
|
103 class TMemoryStreamUnicodeSource: public MUnicodeSource |
|
104 { |
|
105 public: |
|
106 inline TMemoryStreamUnicodeSource(RReadStream& aStream); |
|
107 inline TUint16 ReadUnicodeValueL(); |
|
108 |
|
109 private: |
|
110 RReadStream& iStream; |
|
111 }; |
|
112 |
|
113 class MUnicodeSink |
|
114 { |
|
115 public: |
|
116 virtual void WriteUnicodeValueL(TUint16 aValue) = 0; |
|
117 }; |
|
118 |
|
119 /** |
|
120 A class to write Unicode values directly to memory. |
|
121 */ |
|
122 class TMemoryUnicodeSink: public MUnicodeSink |
|
123 { |
|
124 public: |
|
125 inline TMemoryUnicodeSink(TUint16* aPtr); |
|
126 inline void WriteUnicodeValueL(TUint16 aValue); |
|
127 |
|
128 private: |
|
129 TUint16* iPtr; |
|
130 }; |
|
131 |
|
132 /** |
|
133 A class to write Unicode values to a stream built on a memory object. |
|
134 */ |
|
135 class TMemoryStreamUnicodeSink: public MUnicodeSink |
|
136 { |
|
137 public: |
|
138 inline TMemoryStreamUnicodeSink(RWriteStream& aStream); |
|
139 inline void WriteUnicodeValueL(TUint16 aValue); |
|
140 |
|
141 private: |
|
142 RWriteStream& iStream; |
|
143 }; |
|
144 |
|
145 /** |
|
146 |
|
147 A class to hold functions to compress text using the Standard Compression Scheme for Unicode. |
|
148 |
|
149 A note on error handling and leaving. |
|
150 |
|
151 Although all the public functions except the constructor can leave, it is possible to guarantee success: that is, |
|
152 guarantee that a call will not leave, and that compression will be completed. To do this, (i) supply a MUnicodeSource |
|
153 object with a non-leaving ReadUnicodeValueL function, such as a TMemoryUnicodeSource; (ii) write output to a |
|
154 RWriteStream with a non-leaving WriteL function, or to a buffer that you already know to be big enough, which can be |
|
155 found out using CompressedSizeL. |
|
156 |
|
157 This guarantee of success is particularly useful when compressing from one memory buffer to another. |
|
158 */ |
|
159 class TUnicodeCompressor: public TUnicodeCompressionState |
|
160 { |
|
161 public: |
|
162 IMPORT_C TUnicodeCompressor(); |
|
163 IMPORT_C void CompressL(RWriteStream& aOutput,MUnicodeSource& aInput, |
|
164 TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt, |
|
165 TInt* aOutputBytes = NULL,TInt* aInputWords = NULL); |
|
166 IMPORT_C void CompressL(TUint8* aOutput,MUnicodeSource& aInput, |
|
167 TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt, |
|
168 TInt* aOutputBytes = NULL,TInt* aInputWords = NULL); |
|
169 IMPORT_C TInt FlushL(RWriteStream& aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes); |
|
170 IMPORT_C TInt FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes); |
|
171 IMPORT_C static TInt CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords); |
|
172 |
|
173 private: |
|
174 |
|
175 // A structure to store a character and its treatment code |
|
176 struct TAction |
|
177 { |
|
178 // Treatment codes: static and dynamic window numbers, plain ASCII or plain Unicode |
|
179 enum |
|
180 { |
|
181 EPlainUnicode = -2, // character cannot be expressed as ASCII or using static or dynamic windows |
|
182 EPlainASCII = -1, // character can be emitted as an ASCII code |
|
183 EFirstDynamic = 0, // values 0..255 are for dynamic windows with offsets at these places in the offset table |
|
184 ELastDynamic = 255, |
|
185 EFirstStatic = 256, // values 256..263 are for static windows 0..7 |
|
186 ELastStatic = 263 |
|
187 }; |
|
188 |
|
189 inline TAction(); |
|
190 TAction(TUint16 aCode); |
|
191 |
|
192 TUint16 iCode; // Unicode value of the character |
|
193 TInt iTreatment; // treatment code: see above |
|
194 }; |
|
195 |
|
196 void DoCompressL(RWriteStream* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput, |
|
197 TInt aMaxCompressedBytes,TInt aMaxUnicodeWords, |
|
198 TInt* aCompressedBytes,TInt* aUnicodeWords); |
|
199 void FlushInputBufferL(); |
|
200 void FlushOutputBufferL(); |
|
201 void WriteRunL(); |
|
202 void WriteCharacter(const TAction& aAction); |
|
203 void WriteSCharacter(const TAction& aAction); |
|
204 void WriteUCharacter(TUint16 aCode); |
|
205 void WriteByte(TUint aByte); |
|
206 void WriteCharacterFromBuffer(); |
|
207 void SelectTreatment(TInt aTreatment); |
|
208 |
|
209 enum |
|
210 { |
|
211 EMaxInputBufferSize = 4, |
|
212 EMaxOutputBufferSize = EMaxInputBufferSize * 3 // no Unicode character can be encoded as more than three bytes |
|
213 }; |
|
214 TAction iInputBuffer[EMaxInputBufferSize]; // circular buffer; queue of Unicode characters to be processed |
|
215 TInt iInputBufferStart; // position of first Unicode character to be processed |
|
216 TInt iInputBufferSize; // characters in the input buffer |
|
217 TUint8 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of compressed bytes to be output |
|
218 TInt iOutputBufferStart; // position of first compressed byte to be output |
|
219 TInt iOutputBufferSize; // characters in the output buffer |
|
220 TInt iDynamicWindowIndex; // index of the current dynamic window |
|
221 RWriteStream* iOutputStream; // if non-null, output is to this stream |
|
222 TUint8* iOutputPointer; // if non-null, output is to memory |
|
223 MUnicodeSource* iInput; // input object |
|
224 }; |
|
225 |
|
226 /** |
|
227 |
|
228 A class to hold functions to expand text using the Standard Compression Scheme for Unicode. |
|
229 |
|
230 A note on error handling and leaving. |
|
231 |
|
232 Although all the public functions except the constructor can leave, it is possible to guarantee success: that is, |
|
233 guarantee that a call will not leave, and that expansion will be completed. To do this, (i) supply a MUnicodeSink |
|
234 object with a non-leaving WriteUnicodeValueL function, such as a TMemoryUnicodeSink; (ii) read input from a RReadStream |
|
235 with a non-leaving ReadL function; (iii) supply a big enough buffer to write the ouput; you can find out how big by |
|
236 calling ExpandedSizeL, using methods (i) and (ii) to guarantee success. |
|
237 |
|
238 This guarantee of success is particularly useful when expanding from one memory buffer to another. |
|
239 */ |
|
240 class TUnicodeExpander: public TUnicodeCompressionState |
|
241 { |
|
242 public: |
|
243 IMPORT_C TUnicodeExpander(); |
|
244 IMPORT_C void ExpandL(MUnicodeSink& aOutput,RReadStream& aInput, |
|
245 TInt aMaxOutputWords = KMaxTInt,TInt aMaxInputBytes = KMaxTInt, |
|
246 TInt* aOutputWords = NULL,TInt* aInputBytes = NULL); |
|
247 IMPORT_C void ExpandL(MUnicodeSink& aOutput,const TUint8* aInput, |
|
248 TInt aMaxOutputWords = KMaxTInt,TInt aMaxInputBytes = KMaxTInt, |
|
249 TInt* aOutputWords = NULL,TInt* aInputBytes = NULL); |
|
250 IMPORT_C TInt FlushL(MUnicodeSink& aOutput,TInt aMaxOutputWords,TInt& aOutputWords); |
|
251 IMPORT_C static TInt ExpandedSizeL(RReadStream& aInput,TInt aInputBytes); |
|
252 IMPORT_C static TInt ExpandedSizeL(const TUint8* aInput,TInt aInputBytes); |
|
253 |
|
254 private: |
|
255 void DoExpandL(MUnicodeSink* aOutput,RReadStream* aInputStream,const TUint8* aInputPointer, |
|
256 TInt aMaxOutputWords,TInt aMaxInputBytes, |
|
257 TInt* aOutputWords,TInt* aInputBytes); |
|
258 void HandleByteL(); |
|
259 void FlushOutputBufferL(); |
|
260 TBool HandleSByteL(TUint8 aByte); |
|
261 TBool HandleUByteL(TUint8 aByte); |
|
262 TBool ReadByteL(TUint8& aByte); |
|
263 TBool QuoteUnicodeL(); |
|
264 TBool DefineWindowL(TInt aIndex); |
|
265 TBool DefineExpansionWindowL(); |
|
266 void WriteChar(TText aChar); |
|
267 void WriteChar32(TUint aChar); |
|
268 |
|
269 enum |
|
270 { |
|
271 EMaxInputBufferSize = 3, // no Unicode character can be encoded as more than 3 bytes |
|
272 EMaxOutputBufferSize = 2 // no byte can be expanded into more than 2 Unicode characters |
|
273 }; |
|
274 TUint8 iInputBuffer[EMaxInputBufferSize]; // buffer containing a group of compressed bytes representing |
|
275 // a single operation; when an input source ends in the |
|
276 // middle of an operation, this buffer enables the next |
|
277 // expansion to start in the correct state |
|
278 TInt iInputBufferStart; // next read position in the input buffer |
|
279 TInt iInputBufferSize; // bytes in the input buffer |
|
280 TUint16 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of Unicode characters to be output |
|
281 TInt iOutputBufferStart; // position of first Unicode character to be output |
|
282 TInt iOutputBufferSize; // characters in the output buffer |
|
283 MUnicodeSink* iOutput; // output object |
|
284 RReadStream* iInputStream; // if non-null, input is from this stream |
|
285 const TUint8* iInputPointer; // if non-null, input is from memory |
|
286 }; |
|
287 |
|
288 // inline functions start here |
|
289 |
|
290 inline TMemoryUnicodeSource::TMemoryUnicodeSource(const TUint16* aPtr): |
|
291 iPtr(aPtr) |
|
292 { |
|
293 } |
|
294 |
|
295 inline TUint16 TMemoryUnicodeSource::ReadUnicodeValueL() |
|
296 { |
|
297 return *iPtr++; |
|
298 } |
|
299 |
|
300 inline TMemoryStreamUnicodeSource::TMemoryStreamUnicodeSource(RReadStream& aStream): |
|
301 iStream(aStream) |
|
302 { |
|
303 } |
|
304 |
|
305 inline TUint16 TMemoryStreamUnicodeSource::ReadUnicodeValueL() |
|
306 { |
|
307 TUint16 x; |
|
308 iStream.ReadL((TUint8*)&x,sizeof(TUint16)); |
|
309 return x; |
|
310 } |
|
311 |
|
312 inline TMemoryUnicodeSink::TMemoryUnicodeSink(TUint16* aPtr): |
|
313 iPtr(aPtr) |
|
314 { |
|
315 } |
|
316 |
|
317 inline void TMemoryUnicodeSink::WriteUnicodeValueL(TUint16 aValue) |
|
318 { |
|
319 *iPtr++ = aValue; |
|
320 } |
|
321 |
|
322 inline TMemoryStreamUnicodeSink::TMemoryStreamUnicodeSink(RWriteStream& aStream): |
|
323 iStream(aStream) |
|
324 { |
|
325 } |
|
326 |
|
327 inline void TMemoryStreamUnicodeSink::WriteUnicodeValueL(TUint16 aValue) |
|
328 { |
|
329 iStream.WriteL((TUint8*)&aValue,sizeof(TUint16)); |
|
330 } |
|
331 |
|
332 inline TUnicodeCompressor::TAction::TAction(): |
|
333 iCode(0), |
|
334 iTreatment(EPlainUnicode) |
|
335 { |
|
336 } |
|
337 |
|
338 #endif // _UNICODE |
|
339 |
|
340 #endif // __S32UCMP_H__ |