|
1 /* |
|
2 * Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of the License "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * the API to the Unicode compressor |
|
16 * |
|
17 */ |
|
18 |
|
19 |
|
20 #include "wide.h" |
|
21 |
|
22 |
|
23 extern void CompressUnicode(unsigned char* aOutputBuffer, int& aOutputLength, int aMaximumOutputLength, const UTF16* aInputBuffer, int aInputLength); |
|
24 |
|
25 // the Symbian things that the Symbian Unicode-compression classes use |
|
26 |
|
27 #define IMPORT_C |
|
28 #define EXPORT_C |
|
29 #ifdef __TOOLS2__ // If TOOLS2 defined, use the definition of NULL as supplied in stddef.h |
|
30 #include <stddef.h> |
|
31 #else |
|
32 #define NULL 0 |
|
33 #endif // !__TOOLS2__ |
|
34 #define TRUE true |
|
35 #define FALSE false |
|
36 typedef unsigned char TUint8; |
|
37 typedef unsigned short TUint16; |
|
38 typedef unsigned long TUint32; |
|
39 typedef unsigned int TUint; |
|
40 typedef int TInt; |
|
41 typedef bool TBool; |
|
42 const TInt KMaxTInt = 0x7fffffff; |
|
43 const TInt KErrNotFound = -1; |
|
44 class RWriteStream; |
|
45 extern void Panic(int aCode); |
|
46 |
|
47 // the rest of the contents of this file is a selective copy of base\store\inc\S32UCMP.H |
|
48 |
|
49 class TUnicodeCompressionState |
|
50 { |
|
51 public: |
|
52 TUnicodeCompressionState(); |
|
53 void Reset(); |
|
54 static TInt StaticWindowIndex(TUint16 aCode); |
|
55 static TInt DynamicWindowOffsetIndex(TUint16 aCode); |
|
56 static TUint32 DynamicWindowBase(TInt aOffsetIndex); |
|
57 static TBool EncodeAsIs(TUint16 aCode); |
|
58 |
|
59 enum TPanic |
|
60 { |
|
61 EUnhandledByte, // expander code fails to handle all possible byte codes |
|
62 ENotUnicode, // expander can't handle Unicode values outside range 0x0..0x10FFFF; |
|
63 // that is, 16-bit codes plus 32-bit codes that can be expressed using |
|
64 // 16-bit surrogates |
|
65 EOutputBufferOverflow // output buffer is not big enough |
|
66 ,ECannotUseStreams // not in the file from which this file is derived (i.e. not in base\store\inc\S32UCMP.H) |
|
67 }; |
|
68 |
|
69 static void Panic(TPanic aPanic); |
|
70 |
|
71 protected: |
|
72 |
|
73 enum |
|
74 { |
|
75 EStaticWindows = 8, |
|
76 EDynamicWindows = 8, |
|
77 ESpecialBases = 7 |
|
78 }; |
|
79 |
|
80 TBool iUnicodeMode; // TRUE if in Unicode mode as opposed to single-byte mode |
|
81 TUint32 iActiveWindowBase; // base of the active window - bases are 32-bit because they |
|
82 // can be set to the surrogate area, which represents codes |
|
83 // from 0x00010000 to 0x0010FFFF - planes 1-16 of ISO-10646. |
|
84 static const TUint32 iStaticWindow[EStaticWindows]; // bases of the static windows |
|
85 static const TUint32 iDynamicWindowDefault[EDynamicWindows]; // default bases of the dynamic windows |
|
86 static const TUint16 iSpecialBase[ESpecialBases]; // bases for window offsets F9..FF |
|
87 |
|
88 TUint32 iDynamicWindow[EDynamicWindows]; // bases of the dynamic windows |
|
89 TInt iUnicodeWords; // Unicode words processed; read by compressor, written by expander |
|
90 TInt iMaxUnicodeWords; // maximum number of Unicode words to read or write |
|
91 TInt iCompressedBytes; // compressed bytes processed: read by expander, written by compressor |
|
92 TInt iMaxCompressedBytes; // maximum number of compressed bytes to read or write |
|
93 }; |
|
94 |
|
95 class MUnicodeSource |
|
96 { |
|
97 public: |
|
98 virtual TUint16 ReadUnicodeValueL() = 0; |
|
99 }; |
|
100 |
|
101 // A class to read Unicode values directly from memory. |
|
102 class TMemoryUnicodeSource: public MUnicodeSource |
|
103 { |
|
104 public: |
|
105 inline TMemoryUnicodeSource(const UTF16* aPtr); |
|
106 inline TUint16 ReadUnicodeValueL(); |
|
107 |
|
108 private: |
|
109 const UTF16* iPtr; |
|
110 }; |
|
111 |
|
112 /** |
|
113 A class to hold functions to compress text using the Standard Compression Scheme for Unicode. |
|
114 |
|
115 A note on error handling and leaving. |
|
116 |
|
117 Although all the public functions except the constructor can leave, it is possible to guarantee success: that is, |
|
118 guarantee that a call will not leave, and that compression will be completed. To do this, (i) supply a MUnicodeSource |
|
119 object with a non-leaving ReadUnicodeValueL function, such as a TMemoryUnicodeSource; (ii) write output to a |
|
120 RWriteStream with a non-leaving WriteL function, or to a buffer that you already know to be big enough, which can be |
|
121 found out using CompressedSizeL. |
|
122 |
|
123 This guarantee of success is particularly useful when compressing from one memory buffer to another. |
|
124 */ |
|
125 class TUnicodeCompressor: public TUnicodeCompressionState |
|
126 { |
|
127 public: |
|
128 IMPORT_C TUnicodeCompressor(); |
|
129 IMPORT_C void CompressL(TUint8* aOutput,MUnicodeSource& aInput, |
|
130 TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt, |
|
131 TInt* aOutputBytes = NULL,TInt* aInputWords = NULL); |
|
132 IMPORT_C TInt FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes); |
|
133 IMPORT_C static TInt CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords); |
|
134 |
|
135 private: |
|
136 // A structure to store a character and its treatment code |
|
137 struct TAction |
|
138 { |
|
139 // Treatment codes: static and dynamic window numbers, plain ASCII or plain Unicode |
|
140 enum |
|
141 { |
|
142 EPlainUnicode = -2, // character cannot be expressed as ASCII or using static or dynamic windows |
|
143 EPlainASCII = -1, // character can be emitted as an ASCII code |
|
144 EFirstDynamic = 0, // values 0..255 are for dynamic windows with offsets at these places in the offset table |
|
145 ELastDynamic = 255, |
|
146 EFirstStatic = 256, // values 256..263 are for static windows 0..7 |
|
147 ELastStatic = 263 |
|
148 }; |
|
149 |
|
150 inline TAction(); |
|
151 TAction(TUint16 aCode); |
|
152 |
|
153 TUint16 iCode; // Unicode value of the character |
|
154 TInt iTreatment; // treatment code: see above |
|
155 }; |
|
156 |
|
157 void DoCompressL(RWriteStream* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput, |
|
158 TInt aMaxCompressedBytes,TInt aMaxUnicodeWords, |
|
159 TInt* aCompressedBytes,TInt* aUnicodeWords); |
|
160 void FlushInputBufferL(); |
|
161 void FlushOutputBufferL(); |
|
162 void WriteRunL(); |
|
163 void WriteCharacter(const TAction& aAction); |
|
164 void WriteSCharacter(const TAction& aAction); |
|
165 void WriteUCharacter(TUint16 aCode); |
|
166 void WriteByte(TUint aByte); |
|
167 void WriteCharacterFromBuffer(); |
|
168 void SelectTreatment(TInt aTreatment); |
|
169 |
|
170 enum |
|
171 { |
|
172 EMaxInputBufferSize = 4, |
|
173 EMaxOutputBufferSize = EMaxInputBufferSize * 3 // no Unicode character can be encoded as more than three bytes |
|
174 }; |
|
175 TAction iInputBuffer[EMaxInputBufferSize]; // circular buffer; queue of Unicode characters to be processed |
|
176 TInt iInputBufferStart; // position of first Unicode character to be processed |
|
177 TInt iInputBufferSize; // characters in the input buffer |
|
178 TUint8 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of compressed bytes to be output |
|
179 TInt iOutputBufferStart; // position of first compressed byte to be output |
|
180 TInt iOutputBufferSize; // characters in the output buffer |
|
181 TInt iDynamicWindowIndex; // index of the current dynamic window |
|
182 RWriteStream* iOutputStream; // if non-null, output is to this stream |
|
183 TUint8* iOutputPointer; // if non-null, output is to memory |
|
184 MUnicodeSource* iInput; // input object |
|
185 }; |
|
186 |
|
187 // inline functions start here |
|
188 inline TMemoryUnicodeSource::TMemoryUnicodeSource(const UTF16* aPtr): |
|
189 iPtr(aPtr) |
|
190 { |
|
191 } |
|
192 |
|
193 inline TUint16 TMemoryUnicodeSource::ReadUnicodeValueL() |
|
194 { |
|
195 return *iPtr++; |
|
196 } |
|
197 |
|
198 inline TUnicodeCompressor::TAction::TAction(): |
|
199 iCode(0), |
|
200 iTreatment(EPlainUnicode) |
|
201 { |
|
202 } |
|
203 |