secureswitools/swianalysistoolkit/source/common/ucmp.h
changeset 0 ba25891c3a9e
equal deleted inserted replaced
-1:000000000000 0:ba25891c3a9e
       
     1 /*
       
     2 * Copyright (c) 2006-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of the License "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 * Ported from ucmp.h
       
    16 * Header for the Standard Compression Scheme for Unicode.
       
    17 * This code is compiled only in the Unicode build.
       
    18 *
       
    19 */
       
    20 
       
    21 
       
    22 /**
       
    23  @file 
       
    24  @internalComponent 
       
    25 */
       
    26 
       
    27 #ifndef __UCMP_H__
       
    28 #define __UCMP_H__ 1
       
    29 
       
    30 #include "symbiantypes.h"
       
    31 #include <iostream>
       
    32 
       
    33 
       
    34 typedef std::istream Deserialiser;
       
    35 typedef std::ostream Serialiser;
       
    36 
       
    37 #define KMaxTInt 0x7FFFFFFF
       
    38 /**
       
    39   * @internalComponent
       
    40   */
       
    41 
       
    42 class TUnicodeCompressionState
       
    43 	{
       
    44 	public:
       
    45 	TUnicodeCompressionState();
       
    46 	void Reset();
       
    47 	static TInt32 StaticWindowIndex(TUint16 aCode);
       
    48 	static TInt32 DynamicWindowOffsetIndex(TUint16 aCode);
       
    49 	static TUint32 DynamicWindowBase(TInt32 aOffsetIndex);
       
    50 	static TBool EncodeAsIs(TUint16 aCode);
       
    51 
       
    52 	enum TPanic
       
    53 		{
       
    54 		EUnhandledByte,			// expander code fails to handle all possible byte codes
       
    55 		ENotUnicode,			// expander can't handle Unicode values outside range 0x0..0x10FFFF;
       
    56 								// that is, 16-bit codes plus 32-bit codes that can be expressed using
       
    57 								// 16-bit surrogates
       
    58 		EOutputBufferOverflow	// output buffer is not big enough
       
    59 		};
       
    60 
       
    61 	static void Panic(TPanic aPanic);
       
    62 
       
    63 	protected:
       
    64 
       
    65 	enum
       
    66 		{
       
    67 		EStaticWindows = 8,
       
    68 		EDynamicWindows = 8,
       
    69 		ESpecialBases = 7
       
    70 		};
       
    71 
       
    72 	TBool iUnicodeMode;									// TRUE if in Unicode mode as opposed to single-byte mode
       
    73 	TUint32 iActiveWindowBase;							// base of the active window - bases are 32-bit because they
       
    74 														// can be set to the surrogate area, which represents codes
       
    75 														// from 0x00010000 to 0x0010FFFF - planes 1-16 of ISO-10646.
       
    76 	static const TUint32 iStaticWindow[EStaticWindows];	// bases of the static windows
       
    77 	static const TUint32 iDynamicWindowDefault[EDynamicWindows];	// default bases of the dynamic windows
       
    78 	static const TUint16 iSpecialBase[ESpecialBases];	// bases for window offsets F9..FF
       
    79 
       
    80 	TUint32 iDynamicWindow[EDynamicWindows];			// bases of the dynamic windows
       
    81 	TInt32 iUnicodeWords;									// Unicode words processed; read by compressor, written by expander
       
    82 	TInt32 iMaxUnicodeWords;								// maximum number of Unicode words to read or write
       
    83 	TInt32 iCompressedBytes;								// compressed bytes processed: read by expander, written by compressor
       
    84 	TInt32 iMaxCompressedBytes;							// maximum number of compressed bytes to read or write
       
    85 	};
       
    86 
       
    87 /**
       
    88   * @internalComponent
       
    89   */
       
    90 
       
    91 class MUnicodeSource
       
    92 	{
       
    93 	public:
       
    94 	virtual TUint16 ReadUnicodeValue() = 0;
       
    95 	};
       
    96 
       
    97 /**
       
    98   * @internalComponent
       
    99  A class to read Unicode values directly from memory.
       
   100  */
       
   101 class TMemoryUnicodeSource: public MUnicodeSource
       
   102 	{
       
   103 	public:
       
   104 	inline TMemoryUnicodeSource(const TUint16* aPtr);
       
   105 	inline TUint16 ReadUnicodeValue();
       
   106 
       
   107 	private:
       
   108 	const TUint16* iPtr;
       
   109 	};
       
   110 
       
   111 /**Deserialiser& Deserialiser::
       
   112   * @internalComponent
       
   113  A class to read Unicode values from a stream built on a memory object.
       
   114  */
       
   115 class TMemoryStreamUnicodeSource: public MUnicodeSource
       
   116 	{
       
   117 	public:
       
   118 	inline TMemoryStreamUnicodeSource(Deserialiser& aStream);
       
   119 	inline TUint16 ReadUnicodeValue();
       
   120 
       
   121 	private:
       
   122 	Deserialiser& iStream;
       
   123 	};
       
   124 
       
   125 /**
       
   126   * @internalComponent
       
   127  */
       
   128 class MUnicodeSink
       
   129 	{
       
   130 	public:
       
   131 	virtual void WriteUnicodeValue(TUint16 aValue) = 0;
       
   132 	};
       
   133 
       
   134 /**
       
   135   * @internalComponent
       
   136  A class to write Unicode values directly to memory.
       
   137  */
       
   138 class TMemoryUnicodeSink: public MUnicodeSink
       
   139 	{
       
   140 	public:
       
   141 	inline TMemoryUnicodeSink(TUint16* aPtr);
       
   142 	inline void WriteUnicodeValue(TUint16 aValue);
       
   143 
       
   144 	private:
       
   145 	TUint16* iPtr;
       
   146 	};
       
   147 
       
   148 /**
       
   149   * @internalComponent
       
   150  A class to write Unicode values to a stream built on a memory object.
       
   151  */
       
   152 class TMemoryStreamUnicodeSink: public MUnicodeSink
       
   153 	{
       
   154 	public:
       
   155 	inline TMemoryStreamUnicodeSink(Serialiser& aStream);
       
   156 	inline void WriteUnicodeValue(TUint16 aValue);
       
   157 
       
   158 	private:
       
   159 	Serialiser& iStream;
       
   160 	};
       
   161 
       
   162 /**
       
   163   * @internalComponent
       
   164 A class to hold functions to compress text using the Standard Compression Scheme for Unicode.
       
   165 
       
   166 A note on error handling and leaving.
       
   167 
       
   168 Although all the public functions except the constructor can leave, it is possible to guarantee success: that is,
       
   169 guarantee that a call will not leave, and that compression will be completed. To do this, (i) supply a MUnicodeSource
       
   170 object with a non-leaving ReadUnicodeValue function, such as a TMemoryUnicodeSource; (ii) write output to a
       
   171 Serialiser with a non-leaving Write function, or to a buffer that you already know to be big enough, which can be
       
   172 found out using CompressedSize.
       
   173 
       
   174 This guarantee of success is particularly useful when compressing from one memory buffer to another.
       
   175 */
       
   176 class TUnicodeCompressor: public TUnicodeCompressionState
       
   177 	{
       
   178 	public:
       
   179 	TUnicodeCompressor();
       
   180 	void Compress(Serialiser& aOutput,MUnicodeSource& aInput,
       
   181 							TInt32 aMaxOutputBytes = KMaxTInt,TInt32 aMaxInputWords = KMaxTInt,
       
   182 							TInt32* aOutputBytes = 0,TInt32* aInputWords = 0);
       
   183 	void Compress(TUint8* aOutput,MUnicodeSource& aInput,
       
   184 							TInt32 aMaxOutputBytes = KMaxTInt,TInt32 aMaxInputWords = KMaxTInt,
       
   185 							TInt32* aOutputBytes = 0,TInt32* aInputWords = 0);
       
   186 	TInt32 Flush(Serialiser& aOutput,TInt32 aMaxOutputBytes,TInt32& aOutputBytes);
       
   187 	TInt32 Flush(TUint8* aOutput,TInt32 aMaxOutputBytes,TInt32& aOutputBytes);
       
   188 	static TInt32 CompressedSize(MUnicodeSource& aInput,TInt32 aInputWords);
       
   189 
       
   190 	private:
       
   191 
       
   192 	 // A structure to store a character and its treatment code
       
   193 	struct TAction
       
   194 		{
       
   195 		// Treatment codes: static and dynamic window numbers, plain ASCII or plain Unicode
       
   196 		enum
       
   197 			{
       
   198 			EPlainUnicode = -2,	// character cannot be expressed as ASCII or using static or dynamic windows
       
   199 			EPlainASCII = -1,	// character can be emitted as an ASCII code
       
   200 			EFirstDynamic = 0,	// values 0..255 are for dynamic windows with offsets at these places in the offset table
       
   201 			ELastDynamic = 255,
       
   202 			EFirstStatic = 256,	// values 256..263 are for static windows 0..7
       
   203 			ELastStatic = 263
       
   204 			};
       
   205 
       
   206 		inline TAction();
       
   207 		TAction(TUint16 aCode);
       
   208 
       
   209 		TUint16 iCode;		// Unicode value of the character
       
   210 		TInt32 iTreatment;	// treatment code: see above
       
   211 		};
       
   212 
       
   213 	void DoCompress(Serialiser* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput,
       
   214 					 TInt32 aMaxCompressedBytes,TInt32 aMaxUnicodeWords,
       
   215 					 TInt32* aCompressedBytes,TInt32* aUnicodeWords);
       
   216 	void FlushInputBuffer();
       
   217 	void FlushOutputBuffer();
       
   218 	void WriteRun();
       
   219 	void WriteCharacter(const TAction& aAction);
       
   220 	void WriteSCharacter( const TAction& aAction);
       
   221 	void WriteUCharacter(TUint16 aCode);
       
   222 	void WriteByte(TUint32 aByte);
       
   223 	void WriteCharacterFromBuffer();
       
   224 	void SelectTreatment(TInt32 aTreatment);
       
   225 
       
   226 	enum
       
   227 		{
       
   228 		EMaxInputBufferSize = 4,
       
   229 		EMaxOutputBufferSize = EMaxInputBufferSize * 3	// no Unicode character can be encoded as more than three bytes
       
   230 		};
       
   231 	TAction iInputBuffer[EMaxInputBufferSize];			// circular buffer; queue of Unicode characters to be processed
       
   232 	TInt32 iInputBufferStart;								// position of first Unicode character to be processed
       
   233 	TInt32 iInputBufferSize;								// characters in the input buffer
       
   234 	TUint8 iOutputBuffer[EMaxOutputBufferSize];			// circular buffer; queue of compressed bytes to be output
       
   235 	TInt32 iOutputBufferStart;							// position of first compressed byte to be output
       
   236 	TInt32 iOutputBufferSize;								// characters in the output buffer
       
   237 	TInt32 iDynamicWindowIndex;							// index of the current dynamic window
       
   238 	Serialiser* iOutputStream;						// if non-null, output is to this stream
       
   239 	TUint8* iOutputPointer;								// if non-null, output is to memory
       
   240 	MUnicodeSource* iInput;								// input object
       
   241 	};
       
   242 
       
   243 /**
       
   244   * @internalComponent
       
   245 
       
   246 A class to hold functions to expand text using the Standard Compression Scheme for Unicode.
       
   247 
       
   248 A note on error handling and leaving.
       
   249 
       
   250 Although all the puTUnicodeExpanderblic functions except the constructor can leave, it is possible to guarantee success: that is,
       
   251 guarantee that a call will not leave, and that expansion will be completed. To do this, (i) supply a MUnicodeSink
       
   252 object with a non-leaving WriteUnicodeValue function, such as a TMemoryUnicodeSink; (ii) read input from a Deserialiser
       
   253 with a non-leaving Read function; (iii) supply a big enough buffer to write the ouput; you can find out how big by
       
   254 calling ExpandedSize, using methods (i) and (ii) to guarantee success.
       
   255 
       
   256 This guarantee of success is particularly useful when expanding from one memory buffer to another.
       
   257 */
       
   258 class TUnicodeExpander: public TUnicodeCompressionState
       
   259 	{
       
   260 	public:
       
   261 	TUnicodeExpander();
       
   262 	void Expand(MUnicodeSink& aOutput,Deserialiser& aInput,
       
   263 						  TInt32 aMaxOutputWords = KMaxTInt,TInt32 aMaxInputBytes = KMaxTInt,
       
   264 						  TInt32* aOutputWords = 0,TInt32* aInputBytes = 0);
       
   265 	void Expand(MUnicodeSink& aOutput,const TUint8* aInput,
       
   266 						  TInt32 aMaxOutputWords = KMaxTInt,TInt32 aMaxInputBytes = KMaxTInt,
       
   267 						  TInt32* aOutputWords = 0,TInt32* aInputBytes = 0);
       
   268 	TInt32 Flush(MUnicodeSink& aOutput,TInt32 aMaxOutputWords,TInt32& aOutputWords);
       
   269 	static TInt32 ExpandedSize(Deserialiser& aInput,TInt32 aInputBytes);
       
   270 	static TInt32 ExpandedSize(const TUint8* aInput,TInt32 aInputBytes);
       
   271 
       
   272 	private:
       
   273 	void DoExpand(MUnicodeSink* aOutput,Deserialiser* aInputStream,const TUint8* aInputPointer,
       
   274 				   TInt32 aMaxOutputWords,TInt32 aMaxInputBytes,
       
   275 				   TInt32* aOutputWords,TInt32* aInputBytes);
       
   276 	void HandleByte();
       
   277 	void FlushOutputBuffer();
       
   278 	TBool HandleSByte(TUint8 aByte);
       
   279 	TBool HandleUByte(TUint8 aByte);
       
   280 	TBool ReadByte(TUint8& aByte);
       
   281 	TBool QuoteUnicode();
       
   282 	TBool DefineWindow(TInt32 aIndex);
       
   283 	TBool DefineExpansionWindow();
       
   284 	void WriteChar(TUint16 aChar);
       
   285 	void WriteChar32(TUint32 aChar);
       
   286 
       
   287 	enum
       
   288 		{
       
   289 		EMaxInputBufferSize = 3,		 				// no Unicode character can be encoded as more than 3 bytes
       
   290 		EMaxOutputBufferSize = 2						// no byte can be expanded into more than 2 Unicode characters
       
   291 		};
       
   292 	TUint8 iInputBuffer[EMaxInputBufferSize];			// buffer containing a group of compressed bytes representing
       
   293 														// a single operation; when an input source ends in the
       
   294 														// middle of an operation, this buffer enables the next
       
   295 														// expansion to start in the correct state
       
   296 	TInt32 iInputBufferStart;								// next read position in the input buffer
       
   297 	TInt32 iInputBufferSize;								// bytes in the input buffer
       
   298 	TUint16 iOutputBuffer[EMaxOutputBufferSize];		// circular buffer; queue of Unicode characters to be output
       
   299 	TInt32 iOutputBufferStart;							// position of first Unicode character to be output
       
   300 	TInt32 iOutputBufferSize;								// characters in the output buffer
       
   301 	MUnicodeSink* iOutput;								// output object
       
   302 	Deserialiser* iInputStream;							// if non-null, input is from this stream
       
   303 	const TUint8* iInputPointer;						// if non-null, input is from memory
       
   304 	};
       
   305 
       
   306 // inline functions start here
       
   307 
       
   308 inline TMemoryUnicodeSource::TMemoryUnicodeSource(const TUint16* aPtr):
       
   309 	iPtr(aPtr)
       
   310 	{
       
   311 	}
       
   312 
       
   313 inline TUint16 TMemoryUnicodeSource::ReadUnicodeValue()
       
   314 	{
       
   315 	return *iPtr++;
       
   316 	}
       
   317 
       
   318 inline TMemoryStreamUnicodeSource::TMemoryStreamUnicodeSource(Deserialiser& aStream):
       
   319 	iStream(aStream)
       
   320 	{
       
   321 	}
       
   322 
       
   323 inline TMemoryUnicodeSink::TMemoryUnicodeSink(TUint16* aPtr):
       
   324 	iPtr(aPtr)
       
   325 	{
       
   326 	}
       
   327 
       
   328 inline void TMemoryUnicodeSink::WriteUnicodeValue(TUint16 aValue)
       
   329 	{
       
   330 	*iPtr++ = aValue;
       
   331 	}
       
   332 
       
   333 inline TMemoryStreamUnicodeSink::TMemoryStreamUnicodeSink(Serialiser& aStream):
       
   334 	iStream(aStream)
       
   335 	{
       
   336 	}
       
   337 
       
   338 inline TUnicodeCompressor::TAction::TAction():
       
   339 	iCode(0),
       
   340 	iTreatment(EPlainUnicode)
       
   341 	{
       
   342 	}
       
   343 
       
   344 #endif // __UCMP_H__
       
   345