bintools/rcomp/inc/UNICODE_COMPRESSOR.H
changeset 0 044383f39525
equal deleted inserted replaced
-1:000000000000 0:044383f39525
       
     1 /*
       
     2 * Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of the License "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 * the API to the Unicode compressor
       
    16 *
       
    17 */
       
    18 
       
    19 
       
    20 #include "wide.h"
       
    21 
       
    22 
       
    23 extern void CompressUnicode(unsigned char* aOutputBuffer, int& aOutputLength, int aMaximumOutputLength, const UTF16* aInputBuffer, int aInputLength);
       
    24 
       
    25 // the Symbian things that the Symbian Unicode-compression classes use
       
    26 
       
    27 #define IMPORT_C
       
    28 #define EXPORT_C
       
    29 #ifdef __TOOLS2__ // If TOOLS2 defined, use the definition of NULL as supplied in stddef.h
       
    30 #include <stddef.h> 
       
    31 #else
       
    32 #define NULL 0
       
    33 #endif // !__TOOLS2__
       
    34 #define TRUE true
       
    35 #define FALSE false
       
    36 typedef unsigned char TUint8;
       
    37 typedef unsigned short TUint16;
       
    38 typedef unsigned long TUint32;
       
    39 typedef unsigned int TUint;
       
    40 typedef int TInt;
       
    41 typedef bool TBool;
       
    42 const TInt KMaxTInt = 0x7fffffff;
       
    43 const TInt KErrNotFound = -1;
       
    44 class RWriteStream;
       
    45 extern void Panic(int aCode);
       
    46 
       
    47 // the rest of the contents of this file is a selective copy of base\store\inc\S32UCMP.H
       
    48 
       
    49 class TUnicodeCompressionState
       
    50 	{
       
    51 	public:
       
    52 	TUnicodeCompressionState();
       
    53 	void Reset();
       
    54 	static TInt StaticWindowIndex(TUint16 aCode);
       
    55 	static TInt DynamicWindowOffsetIndex(TUint16 aCode);
       
    56 	static TUint32 DynamicWindowBase(TInt aOffsetIndex);
       
    57 	static TBool EncodeAsIs(TUint16 aCode);
       
    58 
       
    59 	enum TPanic
       
    60 		{
       
    61 		EUnhandledByte,			// expander code fails to handle all possible byte codes
       
    62 		ENotUnicode,			// expander can't handle Unicode values outside range 0x0..0x10FFFF;
       
    63 								// that is, 16-bit codes plus 32-bit codes that can be expressed using
       
    64 								// 16-bit surrogates
       
    65 		EOutputBufferOverflow	// output buffer is not big enough
       
    66 		,ECannotUseStreams // not in the file from which this file is derived (i.e. not in base\store\inc\S32UCMP.H)
       
    67 		};
       
    68 
       
    69 	static void Panic(TPanic aPanic);
       
    70 
       
    71 	protected:
       
    72 
       
    73 	enum
       
    74 		{
       
    75 		EStaticWindows = 8,
       
    76 		EDynamicWindows = 8,
       
    77 		ESpecialBases = 7
       
    78 		};
       
    79 
       
    80 	TBool iUnicodeMode;									// TRUE if in Unicode mode as opposed to single-byte mode
       
    81 	TUint32 iActiveWindowBase;							// base of the active window - bases are 32-bit because they
       
    82 														// can be set to the surrogate area, which represents codes
       
    83 														// from 0x00010000 to 0x0010FFFF - planes 1-16 of ISO-10646.
       
    84 	static const TUint32 iStaticWindow[EStaticWindows];	// bases of the static windows
       
    85 	static const TUint32 iDynamicWindowDefault[EDynamicWindows];	// default bases of the dynamic windows
       
    86 	static const TUint16 iSpecialBase[ESpecialBases];	// bases for window offsets F9..FF
       
    87 
       
    88 	TUint32 iDynamicWindow[EDynamicWindows];			// bases of the dynamic windows
       
    89 	TInt iUnicodeWords;									// Unicode words processed; read by compressor, written by expander
       
    90 	TInt iMaxUnicodeWords;								// maximum number of Unicode words to read or write
       
    91 	TInt iCompressedBytes;								// compressed bytes processed: read by expander, written by compressor
       
    92 	TInt iMaxCompressedBytes;							// maximum number of compressed bytes to read or write
       
    93 	};
       
    94 
       
    95 class MUnicodeSource
       
    96 	{
       
    97 	public:
       
    98 	virtual TUint16 ReadUnicodeValueL() = 0;
       
    99 	};
       
   100 
       
   101 // A class to read Unicode values directly from memory.
       
   102 class TMemoryUnicodeSource: public MUnicodeSource
       
   103 	{
       
   104 	public:
       
   105 	inline TMemoryUnicodeSource(const UTF16* aPtr);
       
   106 	inline TUint16 ReadUnicodeValueL();
       
   107 
       
   108 	private:
       
   109 	const UTF16* iPtr;
       
   110 	};
       
   111 
       
   112 /**
       
   113 A class to hold functions to compress text using the Standard Compression Scheme for Unicode.
       
   114 
       
   115 A note on error handling and leaving.
       
   116 
       
   117 Although all the public functions except the constructor can leave, it is possible to guarantee success: that is,
       
   118 guarantee that a call will not leave, and that compression will be completed. To do this, (i) supply a MUnicodeSource
       
   119 object with a non-leaving ReadUnicodeValueL function, such as a TMemoryUnicodeSource; (ii) write output to a
       
   120 RWriteStream with a non-leaving WriteL function, or to a buffer that you already know to be big enough, which can be
       
   121 found out using CompressedSizeL.
       
   122 
       
   123 This guarantee of success is particularly useful when compressing from one memory buffer to another.
       
   124 */
       
   125 class TUnicodeCompressor: public TUnicodeCompressionState
       
   126 	{
       
   127 	public:
       
   128 	IMPORT_C TUnicodeCompressor();
       
   129 	IMPORT_C void CompressL(TUint8* aOutput,MUnicodeSource& aInput,
       
   130 							TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt,
       
   131 							TInt* aOutputBytes = NULL,TInt* aInputWords = NULL);
       
   132 	IMPORT_C TInt FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes);
       
   133 	IMPORT_C static TInt CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords);
       
   134 
       
   135 	private:
       
   136 	// A structure to store a character and its treatment code
       
   137 	struct TAction
       
   138 		{
       
   139 		// Treatment codes: static and dynamic window numbers, plain ASCII or plain Unicode
       
   140 		enum
       
   141 			{
       
   142 			EPlainUnicode = -2,	// character cannot be expressed as ASCII or using static or dynamic windows
       
   143 			EPlainASCII = -1,	// character can be emitted as an ASCII code
       
   144 			EFirstDynamic = 0,	// values 0..255 are for dynamic windows with offsets at these places in the offset table
       
   145 			ELastDynamic = 255,
       
   146 			EFirstStatic = 256,	// values 256..263 are for static windows 0..7
       
   147 			ELastStatic = 263
       
   148 			};
       
   149 
       
   150 		inline TAction();
       
   151 		TAction(TUint16 aCode);
       
   152 
       
   153 		TUint16 iCode;		// Unicode value of the character
       
   154 		TInt iTreatment;	// treatment code: see above
       
   155 		};
       
   156 
       
   157 	void DoCompressL(RWriteStream* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput,
       
   158 					 TInt aMaxCompressedBytes,TInt aMaxUnicodeWords,
       
   159 					 TInt* aCompressedBytes,TInt* aUnicodeWords);
       
   160 	void FlushInputBufferL();
       
   161 	void FlushOutputBufferL();
       
   162 	void WriteRunL();
       
   163 	void WriteCharacter(const TAction& aAction);
       
   164 	void WriteSCharacter(const TAction& aAction);
       
   165 	void WriteUCharacter(TUint16 aCode);
       
   166 	void WriteByte(TUint aByte);
       
   167 	void WriteCharacterFromBuffer();
       
   168 	void SelectTreatment(TInt aTreatment);
       
   169 
       
   170 	enum
       
   171 		{
       
   172 		EMaxInputBufferSize = 4,
       
   173 		EMaxOutputBufferSize = EMaxInputBufferSize * 3	// no Unicode character can be encoded as more than three bytes
       
   174 		};
       
   175 	TAction iInputBuffer[EMaxInputBufferSize];			// circular buffer; queue of Unicode characters to be processed
       
   176 	TInt iInputBufferStart;								// position of first Unicode character to be processed
       
   177 	TInt iInputBufferSize;								// characters in the input buffer
       
   178 	TUint8 iOutputBuffer[EMaxOutputBufferSize];			// circular buffer; queue of compressed bytes to be output
       
   179 	TInt iOutputBufferStart;							// position of first compressed byte to be output
       
   180 	TInt iOutputBufferSize;								// characters in the output buffer
       
   181 	TInt iDynamicWindowIndex;							// index of the current dynamic window
       
   182 	RWriteStream* iOutputStream;						// if non-null, output is to this stream
       
   183 	TUint8* iOutputPointer;								// if non-null, output is to memory
       
   184 	MUnicodeSource* iInput;								// input object
       
   185 	};
       
   186 
       
   187 // inline functions start here
       
   188 inline TMemoryUnicodeSource::TMemoryUnicodeSource(const UTF16* aPtr):
       
   189 	iPtr(aPtr)
       
   190 	{
       
   191 	}
       
   192 
       
   193 inline TUint16 TMemoryUnicodeSource::ReadUnicodeValueL()
       
   194 	{
       
   195 	return *iPtr++;
       
   196 	}
       
   197 
       
   198 inline TUnicodeCompressor::TAction::TAction():
       
   199 	iCode(0),
       
   200 	iTreatment(EPlainUnicode)
       
   201 	{
       
   202 	}
       
   203