|
1 // Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // Header for the Standard Compression Scheme for Unicode. |
|
15 // This code is compiled only in the Unicode build. |
|
16 // |
|
17 // |
|
18 |
|
19 #ifndef __S32UCMP_H__ |
|
20 #define __S32UCMP_H__ 1 |
|
21 |
|
22 #ifdef _UNICODE |
|
23 |
|
24 #include <e32std.h> |
|
25 #include <s32mem.h> |
|
26 |
|
27 /** |
|
28 * @publishedAll |
|
29 * @released |
|
30 */ |
|
31 class TUnicodeCompressionState |
|
32 { |
|
33 public: |
|
34 TUnicodeCompressionState(); |
|
35 void Reset(); |
|
36 static TInt StaticWindowIndex(TUint16 aCode); |
|
37 static TInt DynamicWindowOffsetIndex(TUint16 aCode); |
|
38 static TUint32 DynamicWindowBase(TInt aOffsetIndex); |
|
39 static TBool EncodeAsIs(TUint16 aCode); |
|
40 |
|
41 enum TPanic |
|
42 { |
|
43 EUnhandledByte, // expander code fails to handle all possible byte codes |
|
44 ENotUnicode, // expander can't handle Unicode values outside range 0x0..0x10FFFF; |
|
45 // that is, 16-bit codes plus 32-bit codes that can be expressed using |
|
46 // 16-bit surrogates |
|
47 EOutputBufferOverflow // output buffer is not big enough |
|
48 }; |
|
49 |
|
50 static void Panic(TPanic aPanic); |
|
51 |
|
52 protected: |
|
53 |
|
54 enum |
|
55 { |
|
56 EStaticWindows = 8, |
|
57 EDynamicWindows = 8, |
|
58 ESpecialBases = 7 |
|
59 }; |
|
60 |
|
61 TBool iUnicodeMode; // TRUE if in Unicode mode as opposed to single-byte mode |
|
62 TUint32 iActiveWindowBase; // base of the active window - bases are 32-bit because they |
|
63 // can be set to the surrogate area, which represents codes |
|
64 // from 0x00010000 to 0x0010FFFF - planes 1-16 of ISO-10646. |
|
65 static const TUint32 iStaticWindow[EStaticWindows]; // bases of the static windows |
|
66 static const TUint32 iDynamicWindowDefault[EDynamicWindows]; // default bases of the dynamic windows |
|
67 static const TUint16 iSpecialBase[ESpecialBases]; // bases for window offsets F9..FF |
|
68 |
|
69 TUint32 iDynamicWindow[EDynamicWindows]; // bases of the dynamic windows |
|
70 TInt iUnicodeWords; // Unicode words processed; read by compressor, written by expander |
|
71 TInt iMaxUnicodeWords; // maximum number of Unicode words to read or write |
|
72 TInt iCompressedBytes; // compressed bytes processed: read by expander, written by compressor |
|
73 TInt iMaxCompressedBytes; // maximum number of compressed bytes to read or write |
|
74 }; |
|
75 |
|
76 /** |
|
77 * @publishedAll |
|
78 * @released |
|
79 */ |
|
80 class MUnicodeSource |
|
81 { |
|
82 public: |
|
83 virtual TUint16 ReadUnicodeValueL() = 0; |
|
84 }; |
|
85 |
|
86 /** |
|
87 * @publishedAll |
|
88 * @released |
|
89 A class to read Unicode values directly from memory. |
|
90 */ |
|
91 class TMemoryUnicodeSource: public MUnicodeSource |
|
92 { |
|
93 public: |
|
94 inline TMemoryUnicodeSource(const TUint16* aPtr); |
|
95 inline TUint16 ReadUnicodeValueL(); |
|
96 |
|
97 private: |
|
98 const TUint16* iPtr; |
|
99 }; |
|
100 |
|
101 /** |
|
102 * @publishedAll |
|
103 * @released |
|
104 A class to read Unicode values from a stream built on a memory object. |
|
105 */ |
|
106 class TMemoryStreamUnicodeSource: public MUnicodeSource |
|
107 { |
|
108 public: |
|
109 inline TMemoryStreamUnicodeSource(RReadStream& aStream); |
|
110 inline TUint16 ReadUnicodeValueL(); |
|
111 |
|
112 private: |
|
113 RReadStream& iStream; |
|
114 }; |
|
115 |
|
116 /** |
|
117 * @publishedAll |
|
118 * @released |
|
119 */ |
|
120 class MUnicodeSink |
|
121 { |
|
122 public: |
|
123 virtual void WriteUnicodeValueL(TUint16 aValue) = 0; |
|
124 }; |
|
125 |
|
126 /** |
|
127 * @publishedAll |
|
128 * @released |
|
129 A class to write Unicode values directly to memory. |
|
130 */ |
|
131 class TMemoryUnicodeSink: public MUnicodeSink |
|
132 { |
|
133 public: |
|
134 inline TMemoryUnicodeSink(TUint16* aPtr); |
|
135 inline void WriteUnicodeValueL(TUint16 aValue); |
|
136 |
|
137 private: |
|
138 TUint16* iPtr; |
|
139 }; |
|
140 |
|
141 /** |
|
142 * @publishedAll |
|
143 * @released |
|
144 A class to write Unicode values to a stream built on a memory object. |
|
145 */ |
|
146 class TMemoryStreamUnicodeSink: public MUnicodeSink |
|
147 { |
|
148 public: |
|
149 inline TMemoryStreamUnicodeSink(RWriteStream& aStream); |
|
150 inline void WriteUnicodeValueL(TUint16 aValue); |
|
151 |
|
152 private: |
|
153 RWriteStream& iStream; |
|
154 }; |
|
155 |
|
156 /** |
|
157 * @publishedAll |
|
158 * @released |
|
159 |
|
160 A class to hold functions to compress text using the Standard Compression Scheme for Unicode. |
|
161 |
|
162 A note on error handling and leaving. |
|
163 |
|
164 Although all the public functions except the constructor can leave, it is possible to guarantee success: that is, |
|
165 guarantee that a call will not leave, and that compression will be completed. To do this, (i) supply a MUnicodeSource |
|
166 object with a non-leaving ReadUnicodeValueL function, such as a TMemoryUnicodeSource; (ii) write output to a |
|
167 RWriteStream with a non-leaving WriteL function, or to a buffer that you already know to be big enough, which can be |
|
168 found out using CompressedSizeL. |
|
169 |
|
170 This guarantee of success is particularly useful when compressing from one memory buffer to another. |
|
171 */ |
|
172 class TUnicodeCompressor: public TUnicodeCompressionState |
|
173 { |
|
174 public: |
|
175 IMPORT_C TUnicodeCompressor(); |
|
176 IMPORT_C void CompressL(RWriteStream& aOutput,MUnicodeSource& aInput, |
|
177 TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt, |
|
178 TInt* aOutputBytes = NULL,TInt* aInputWords = NULL); |
|
179 IMPORT_C void CompressL(TUint8* aOutput,MUnicodeSource& aInput, |
|
180 TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt, |
|
181 TInt* aOutputBytes = NULL,TInt* aInputWords = NULL); |
|
182 IMPORT_C TInt FlushL(RWriteStream& aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes); |
|
183 IMPORT_C TInt FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes); |
|
184 IMPORT_C static TInt CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords); |
|
185 |
|
186 private: |
|
187 |
|
188 // A structure to store a character and its treatment code |
|
189 struct TAction |
|
190 { |
|
191 // Treatment codes: static and dynamic window numbers, plain ASCII or plain Unicode |
|
192 enum |
|
193 { |
|
194 EPlainUnicode = -2, // character cannot be expressed as ASCII or using static or dynamic windows |
|
195 EPlainASCII = -1, // character can be emitted as an ASCII code |
|
196 EFirstDynamic = 0, // values 0..255 are for dynamic windows with offsets at these places in the offset table |
|
197 ELastDynamic = 255, |
|
198 EFirstStatic = 256, // values 256..263 are for static windows 0..7 |
|
199 ELastStatic = 263 |
|
200 }; |
|
201 |
|
202 inline TAction(); |
|
203 TAction(TUint16 aCode); |
|
204 |
|
205 TUint16 iCode; // Unicode value of the character |
|
206 TInt iTreatment; // treatment code: see above |
|
207 }; |
|
208 |
|
209 void DoCompressL(RWriteStream* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput, |
|
210 TInt aMaxCompressedBytes,TInt aMaxUnicodeWords, |
|
211 TInt* aCompressedBytes,TInt* aUnicodeWords); |
|
212 void FlushInputBufferL(); |
|
213 void FlushOutputBufferL(); |
|
214 void WriteRunL(); |
|
215 void WriteCharacter(const TAction& aAction); |
|
216 void WriteSCharacter(const TAction& aAction); |
|
217 void WriteUCharacter(TUint16 aCode); |
|
218 void WriteByte(TUint aByte); |
|
219 void WriteCharacterFromBuffer(); |
|
220 void SelectTreatment(TInt aTreatment); |
|
221 |
|
222 enum |
|
223 { |
|
224 EMaxInputBufferSize = 4, |
|
225 EMaxOutputBufferSize = EMaxInputBufferSize * 3 // no Unicode character can be encoded as more than three bytes |
|
226 }; |
|
227 TAction iInputBuffer[EMaxInputBufferSize]; // circular buffer; queue of Unicode characters to be processed |
|
228 TInt iInputBufferStart; // position of first Unicode character to be processed |
|
229 TInt iInputBufferSize; // characters in the input buffer |
|
230 TUint8 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of compressed bytes to be output |
|
231 TInt iOutputBufferStart; // position of first compressed byte to be output |
|
232 TInt iOutputBufferSize; // characters in the output buffer |
|
233 TInt iDynamicWindowIndex; // index of the current dynamic window |
|
234 RWriteStream* iOutputStream; // if non-null, output is to this stream |
|
235 TUint8* iOutputPointer; // if non-null, output is to memory |
|
236 MUnicodeSource* iInput; // input object |
|
237 }; |
|
238 |
|
239 /** |
|
240 * @publishedAll |
|
241 * @released |
|
242 |
|
243 A class to hold functions to expand text using the Standard Compression Scheme for Unicode. |
|
244 |
|
245 A note on error handling and leaving. |
|
246 |
|
247 Although all the public functions except the constructor can leave, it is possible to guarantee success: that is, |
|
248 guarantee that a call will not leave, and that expansion will be completed. To do this, (i) supply a MUnicodeSink |
|
249 object with a non-leaving WriteUnicodeValueL function, such as a TMemoryUnicodeSink; (ii) read input from a RReadStream |
|
250 with a non-leaving ReadL function; (iii) supply a big enough buffer to write the ouput; you can find out how big by |
|
251 calling ExpandedSizeL, using methods (i) and (ii) to guarantee success. |
|
252 |
|
253 This guarantee of success is particularly useful when expanding from one memory buffer to another. |
|
254 */ |
|
255 class TUnicodeExpander: public TUnicodeCompressionState |
|
256 { |
|
257 public: |
|
258 IMPORT_C TUnicodeExpander(); |
|
259 IMPORT_C void ExpandL(MUnicodeSink& aOutput,RReadStream& aInput, |
|
260 TInt aMaxOutputWords = KMaxTInt,TInt aMaxInputBytes = KMaxTInt, |
|
261 TInt* aOutputWords = NULL,TInt* aInputBytes = NULL); |
|
262 IMPORT_C void ExpandL(MUnicodeSink& aOutput,const TUint8* aInput, |
|
263 TInt aMaxOutputWords = KMaxTInt,TInt aMaxInputBytes = KMaxTInt, |
|
264 TInt* aOutputWords = NULL,TInt* aInputBytes = NULL); |
|
265 IMPORT_C TInt FlushL(MUnicodeSink& aOutput,TInt aMaxOutputWords,TInt& aOutputWords); |
|
266 IMPORT_C static TInt ExpandedSizeL(RReadStream& aInput,TInt aInputBytes); |
|
267 IMPORT_C static TInt ExpandedSizeL(const TUint8* aInput,TInt aInputBytes); |
|
268 |
|
269 private: |
|
270 void DoExpandL(MUnicodeSink* aOutput,RReadStream* aInputStream,const TUint8* aInputPointer, |
|
271 TInt aMaxOutputWords,TInt aMaxInputBytes, |
|
272 TInt* aOutputWords,TInt* aInputBytes); |
|
273 void HandleByteL(); |
|
274 void FlushOutputBufferL(); |
|
275 TBool HandleSByteL(TUint8 aByte); |
|
276 TBool HandleUByteL(TUint8 aByte); |
|
277 TBool ReadByteL(TUint8& aByte); |
|
278 TBool QuoteUnicodeL(); |
|
279 TBool DefineWindowL(TInt aIndex); |
|
280 TBool DefineExpansionWindowL(); |
|
281 void WriteChar(TText aChar); |
|
282 void WriteChar32(TUint aChar); |
|
283 |
|
284 enum |
|
285 { |
|
286 EMaxInputBufferSize = 3, // no Unicode character can be encoded as more than 3 bytes |
|
287 EMaxOutputBufferSize = 2 // no byte can be expanded into more than 2 Unicode characters |
|
288 }; |
|
289 TUint8 iInputBuffer[EMaxInputBufferSize]; // buffer containing a group of compressed bytes representing |
|
290 // a single operation; when an input source ends in the |
|
291 // middle of an operation, this buffer enables the next |
|
292 // expansion to start in the correct state |
|
293 TInt iInputBufferStart; // next read position in the input buffer |
|
294 TInt iInputBufferSize; // bytes in the input buffer |
|
295 TUint16 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of Unicode characters to be output |
|
296 TInt iOutputBufferStart; // position of first Unicode character to be output |
|
297 TInt iOutputBufferSize; // characters in the output buffer |
|
298 MUnicodeSink* iOutput; // output object |
|
299 RReadStream* iInputStream; // if non-null, input is from this stream |
|
300 const TUint8* iInputPointer; // if non-null, input is from memory |
|
301 }; |
|
302 |
|
303 // inline functions start here |
|
304 |
|
305 inline TMemoryUnicodeSource::TMemoryUnicodeSource(const TUint16* aPtr): |
|
306 iPtr(aPtr) |
|
307 { |
|
308 } |
|
309 |
|
310 inline TUint16 TMemoryUnicodeSource::ReadUnicodeValueL() |
|
311 { |
|
312 return *iPtr++; |
|
313 } |
|
314 |
|
315 inline TMemoryStreamUnicodeSource::TMemoryStreamUnicodeSource(RReadStream& aStream): |
|
316 iStream(aStream) |
|
317 { |
|
318 } |
|
319 |
|
320 inline TUint16 TMemoryStreamUnicodeSource::ReadUnicodeValueL() |
|
321 { |
|
322 TUint16 x; |
|
323 iStream.ReadL((TUint8*)&x,sizeof(TUint16)); |
|
324 return x; |
|
325 } |
|
326 |
|
327 inline TMemoryUnicodeSink::TMemoryUnicodeSink(TUint16* aPtr): |
|
328 iPtr(aPtr) |
|
329 { |
|
330 } |
|
331 |
|
332 inline void TMemoryUnicodeSink::WriteUnicodeValueL(TUint16 aValue) |
|
333 { |
|
334 *iPtr++ = aValue; |
|
335 } |
|
336 |
|
337 inline TMemoryStreamUnicodeSink::TMemoryStreamUnicodeSink(RWriteStream& aStream): |
|
338 iStream(aStream) |
|
339 { |
|
340 } |
|
341 |
|
342 inline void TMemoryStreamUnicodeSink::WriteUnicodeValueL(TUint16 aValue) |
|
343 { |
|
344 iStream.WriteL((TUint8*)&aValue,sizeof(TUint16)); |
|
345 } |
|
346 |
|
347 inline TUnicodeCompressor::TAction::TAction(): |
|
348 iCode(0), |
|
349 iTreatment(EPlainUnicode) |
|
350 { |
|
351 } |
|
352 |
|
353 #endif // _UNICODE |
|
354 |
|
355 #endif // __S32UCMP_H__ |