toolsandutils/e32tools/e32image/deflate/encode.cpp
changeset 0 83f4b4db085c
child 2 99082257a271
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/toolsandutils/e32tools/e32image/deflate/encode.cpp	Tue Feb 02 01:39:43 2010 +0200
@@ -0,0 +1,492 @@
+// Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies).
+// All rights reserved.
+// This component and the accompanying materials are made available
+// under the terms of the License "Eclipse Public License v1.0"
+// which accompanies this distribution, and is available
+// at the URL "http://www.eclipse.org/legal/epl-v10.html".
+//
+// Initial Contributors:
+// Nokia Corporation - initial contribution.
+//
+// Contributors:
+//
+// Description:
+// e32tools\petran\Szip\encode.cpp
+// 
+//
+
+#include "huffman.h"
+#include "panic.h"
+#include <e32base.h>
+#include <e32base_private.h>
+#include "h_utl.h"
+#include <assert.h>
+#include "farray.h"
+#include <stdlib.h>
+
+void User::Invariant()
+	{
+	fprintf(stderr, "User::Invariant() called\n");
+	exit(1);
+	}
+
+// local definitions used for Huffman code generation
+typedef TUint16 THuff;		/** @internal */
+const THuff KLeaf=0x8000;	/** @internal */
+struct TNode
+/** @internal */
+	{
+	TUint iCount;
+	THuff iLeft;
+	THuff iRight;
+	};
+
+void HuffmanLengthsL(TUint32* aLengths,const TNode* aNodes,TInt aNode,TInt aLen)
+/** recursive function to calculate the code lengths from the node tree
+
+	@internal
+*/
+	{
+	if (++aLen>Huffman::KMaxCodeLength)
+		Panic(EHuffmanBufferOverflow);
+
+	const TNode& node=aNodes[aNode];
+	TUint x=node.iLeft;
+	if (x&KLeaf)
+		aLengths[x&~KLeaf]=aLen;
+	else
+		HuffmanLengthsL(aLengths,aNodes,x,aLen);
+	x=node.iRight;
+	if (x&KLeaf)
+		aLengths[x&~KLeaf]=aLen;
+	else
+		HuffmanLengthsL(aLengths,aNodes,x,aLen);
+	}
+
+void InsertInOrder(TNode* aNodes, TInt aSize, TUint aCount, TInt aVal)
+/**	Insert the {aCount,aValue} pair into the already sorted array of nodes
+
+	@internal
+*/
+	{
+	// Uses Insertion sort following a binary search...
+	TInt l=0, r=aSize;
+	while (l < r)
+		{
+		TInt m = (l+r) >> 1;
+		if (aNodes[m].iCount<aCount)
+			r=m;
+		else
+			l=m+1;
+		}
+	HMem::Copy(aNodes+l+1,aNodes+l,sizeof(TNode)*(aSize-l));
+	aNodes[l].iCount=aCount;
+	aNodes[l].iRight=TUint16(aVal);
+	}
+
+void Huffman::HuffmanL(const TUint32 aFrequency[],TInt aNumCodes,TUint32 aHuffman[])
+/** Generate a Huffman code
+
+	This generates a Huffman code for a given set of code frequencies. The output
+	is a table of code lengths which can be used to build canonincal encoding tables
+	or decoding trees for use with the TBitInput and TBitOutput classes.
+
+	Entries in the table with a frequency of zero will have a zero code length
+	and thus no associated huffman encoding. If each such symbol should have a
+	maximum length encoding, they must be given at least a frequency of 1.
+
+	For an alphabet of n symbols, this algorithm has a transient memory overhead
+	of 8n, and a time complexity of O(n*log(n)).
+
+	@param "const TUint32 aFrequency[]" The table of code frequencies
+	@param "TInt aNumCodes" The number of codes in the table
+	@param "TUint32 aHuffman[]" The table for the output code-length table. This must be
+		the same size as the frequency table, and can safely be the same table
+
+	@leave "KErrNoMemory" If memory used for code generation cannot be allocated
+
+  	@panic "USER ???" If the number of codes exceeds Huffman::KMaxCodes
+*/
+	{
+	if(TUint(aNumCodes)>TUint(KMaxCodes))
+		Panic(EHuffmanTooManyCodes);
+
+	// Sort the values into decreasing order of frequency
+	//
+	TNode* nodes = new TNode[aNumCodes];
+	if(nodes==NULL)
+		Panic(EHuffmanOutOfMemory);
+
+	TInt lCount=0;
+
+	for (TInt ii=0;ii<aNumCodes;++ii)
+		{
+		TInt c=aFrequency[ii];
+		if (c!=0)
+			InsertInOrder(nodes,lCount++,c,ii|KLeaf);
+		}
+
+	// default code length is zero
+	HMem::FillZ(aHuffman,aNumCodes*sizeof(TUint32));
+
+	if (lCount==0)
+		{
+		// no codes with frequency>0. No code has a length
+		}
+	else if (lCount==1)
+		{
+		// special case for a single value (always encode as "0")
+		aHuffman[nodes[0].iRight&~KLeaf]=1;
+		}
+	else
+		{
+		// Huffman algorithm: pair off least frequent nodes and reorder
+		//
+		do
+			{
+			--lCount;
+			TUint c=nodes[lCount].iCount + nodes[lCount-1].iCount;
+			nodes[lCount].iLeft=nodes[lCount-1].iRight;
+			// re-order the leaves now to reflect new combined frequency 'c'
+			InsertInOrder(nodes,lCount-1,c,lCount);
+			} while (lCount>1);
+		// generate code lengths in aHuffman[]
+		HuffmanLengthsL(aHuffman,nodes,1,0);
+		}
+
+	delete [] nodes;
+
+	if(!IsValid(aHuffman,aNumCodes))
+		Panic(EHuffmanInvalidCoding);
+	}
+
+TBool Huffman::IsValid(const TUint32 aHuffman[],TInt aNumCodes)
+/** Validate a Huffman encoding
+
+	This verifies that a Huffman coding described by the code lengths is valid.
+	In particular, it ensures that no code exceeds the maximum length and
+	that it is possible to generate a canonical coding for the specified lengths.
+	
+	@param "const TUint32 aHuffman[]" The table of code lengths as generated by Huffman::HuffmanL()
+	@param "TInt aNumCodes" The number of codes in the table
+
+	@return True if the code is valid, otherwise false
+*/
+	{
+	// The code is valid if one of the following holds:
+	// (a) the code exactly fills the 'code space'
+	// (b) there is only a single symbol with code length 1
+	// (c) there are no encoded symbols
+	//
+	TUint remain=1<<KMaxCodeLength;
+	TInt totlen=0;
+	for (const TUint32* p=aHuffman+aNumCodes; p>aHuffman;)
+		{
+		TInt len=*--p;
+		if (len>0)
+			{
+			totlen+=len;
+			if (len>KMaxCodeLength)
+				return EFalse;
+			TUint c=1<<(KMaxCodeLength-len);
+			if (c>remain)
+				return EFalse;
+			remain-=c;
+			}
+		}
+
+	return remain==0 || totlen<=1;
+	}
+
+void Huffman::Encoding(const TUint32 aHuffman[],TInt aNumCodes,TUint32 aEncodeTable[])
+/** Create a canonical Huffman encoding table
+
+	This generates the huffman codes used by TBitOutput::HuffmanL() to write huffman
+	encoded data. The input is table of code lengths, as generated by Huffman::HuffmanL()
+	and must represent a valid huffman code.
+	
+	@param "const TUint32 aHuffman[]" The table of code lengths as generated by Huffman::HuffmanL()
+	@param "TInt aNumCodes" The number of codes in the table
+	@param "TUint32 aEncodeTable[]" The table for the output huffman codes. This must be
+		the same size as the code-length table, and can safely be the same table
+
+	@panic "USER ???" If the provided code is not a valid Huffman coding
+	
+	@see IsValid()
+	@see HuffmanL()
+*/
+	{
+	__ASSERT_ALWAYS(IsValid(aHuffman,aNumCodes),Panic(EHuffmanInvalidCoding));
+
+	TFixedArray<TInt,KMaxCodeLength> lenCount;
+	lenCount.Reset();
+
+	TInt ii;
+	for (ii=0;ii<aNumCodes;++ii)
+		{
+		TInt len=aHuffman[ii]-1;
+		if (len>=0)
+			++lenCount[len];
+		}
+
+	TFixedArray<TUint,KMaxCodeLength> nextCode;
+	TUint code=0;
+	for (ii=0;ii<KMaxCodeLength;++ii)
+		{
+		code<<=1;
+		nextCode[ii]=code;
+		code+=lenCount[ii];
+		}
+
+	for (ii=0;ii<aNumCodes;++ii)
+		{
+		TInt len=aHuffman[ii];
+		if (len==0)
+			aEncodeTable[ii]=0;
+		else
+			{
+			aEncodeTable[ii] = (nextCode[len-1]<<(KMaxCodeLength-len))|(len<<KMaxCodeLength);
+			++nextCode[len-1];
+			}
+		}
+	}
+
+/** the encoding table for the externalised code
+	@internal
+*/
+const TUint32 HuffmanEncoding[]=
+	{
+	0x10000000,
+	0x1c000000,
+	0x12000000,
+	0x1d000000,
+	0x26000000,
+	0x26800000,
+	0x2f000000,
+	0x37400000,
+	0x37600000,
+	0x37800000,
+	0x3fa00000,
+	0x3fb00000,
+	0x3fc00000,
+	0x3fd00000,
+	0x47e00000,
+	0x47e80000,
+	0x47f00000,
+	0x4ff80000,
+	0x57fc0000,
+	0x5ffe0000,
+	0x67ff0000,
+	0x77ff8000,
+	0x7fffa000,
+	0x7fffb000,
+	0x7fffc000,
+	0x7fffd000,
+	0x7fffe000,
+	0x87fff000,
+	0x87fff800
+	};
+
+void EncodeRunLengthL(TBitOutput& aOutput, TInt aLength)
+/** encode 0a as '0' and 0b as '1', return number of symbols created
+
+	@internal
+*/
+	{
+	if (aLength>0)
+		{
+		EncodeRunLengthL(aOutput,(aLength-1)>>1);
+		aOutput.HuffmanL(HuffmanEncoding[1-(aLength&1)]);
+		}
+	}
+
+void Huffman::ExternalizeL(TBitOutput& aOutput,const TUint32 aHuffman[],TInt aNumCodes)
+/** Store a canonical huffman encoding in compact form
+
+	As the encoding is canonical, only the code lengths of each code needs to be saved.
+
+	Due to the nature of code length tables, these can usually be stored very compactly
+	by encoding the encoding itself, hence the use of the bit output stream.
+	
+	@param "TBitOutput& aOutput" The output stream for the encoding
+	@param "const TUint32 aHuffman[]" The table of code lengths as generated by Huffman::HuffmanL()
+	@param "TInt aNumCodes" The number of huffman codes in the table
+
+	@leave "TBitOutput::HuffmanL()"
+*/
+	{
+	// We assume that the code length table is generated by the huffman generator,
+	// in which case the maxmimum code length is 27 bits.
+	//
+	// We apply three transformations to the data:
+	// 1. the data goes through a move-to-front coder
+	// 2. apply a rle-0 coder which replace runs of '0' with streams of '0a' and '0b'
+	// 3. encode the result using a predefined (average) huffman coding
+	//
+	// This can be done in a single pass over the data, avoiding the need for additional
+	// memory.
+	//
+	// initialise the list for the MTF coder
+	TFixedArray<TUint8,Huffman::KMetaCodes> list;
+	TInt i;
+	for (i=0;i<list.Count();++i)
+		list[i]=TUint8(i);
+	TInt last=0;
+
+	TInt rl=0;
+	const TUint32* p32=aHuffman;
+	const TUint32* e32=p32+aNumCodes;
+	while (p32<e32)
+		{
+		TInt c=*p32++;
+		if (c==last)
+			++rl;	// repeat of last symbol
+		else
+			{
+			// encode run-length
+			EncodeRunLengthL(aOutput,rl);
+			rl=0;
+			// find code in MTF list
+			TInt j;
+			for (j=1;list[j]!=c;++j)
+				;
+			// store this code
+			aOutput.HuffmanL(HuffmanEncoding[j+1]);
+			// adjust list for MTF algorithm
+			while (--j>0)
+				list[j+1]=list[j];
+			list[1]=TUint8(last);
+			last=c;
+			}
+		}
+	// encod any remaining run-length
+	EncodeRunLengthL(aOutput,rl);
+	}
+
+
+TBitOutput::TBitOutput()
+/** Construct a bit stream output object
+
+	Following construction the bit stream is ready for writing bits, but will first call
+	OverflowL() as the output buffer is 'full'. A derived class can detect this state as
+	Ptr() will return null.
+*/
+	:iCode(0),iBits(-8),iPtr(0),iEnd(0)
+	{}
+
+TBitOutput::TBitOutput(TUint8* aBuf,TInt aSize)
+/** Construct a bit stream output object over a buffer
+
+	Data will be written to the buffer until it is full, at which point OverflowL() will
+	be called. This should handle the data and then can Set() again to reset the buffer
+	for further output.
+	
+	@param "TUint8* aBuf" The buffer for output
+	@param "TInt aSize" The size of the buffer in bytes
+*/
+	:iCode(0),iBits(-8),iPtr(aBuf),iEnd(aBuf+aSize)
+	{}
+
+void TBitOutput::HuffmanL(TUint aHuffCode)
+/** Write a huffman code
+
+	This expects a huffman code value as generated by Huffman::Encoding()
+
+	@param "TUint aHuffCode" The huffman code write to the stream
+
+	@leave "OverflowL()" If the output buffer is full, OverflowL() is called
+*/
+	{
+	DoWriteL(aHuffCode<<(32-Huffman::KMaxCodeLength),aHuffCode>>Huffman::KMaxCodeLength);
+	}
+
+void TBitOutput::WriteL(TUint aValue,TInt aLength)
+/** Write an arbitrary integer value
+
+	Write an unsigned integer using the number of bits specified. Only
+	the low order bits of the value are written to the output, most
+	significant bit first.
+
+	@param "TUint aValue" The value to write to the stream
+	@param "TUint aLength" The number of bits to output
+
+	@leave "OverflowL()" If the output buffer is full, OverflowL() is called
+*/
+	{
+	if (aLength)
+		DoWriteL(aValue<<=32-aLength,aLength);
+	}
+
+void TBitOutput::PadL(TUint aPadding)
+/** Pad the bitstream to the next byte boundary
+
+	Terminate the bitstream by padding the last byte with the requested value.
+	Following this operation the bitstream can continue to be used, the data will
+	start at the next byte.
+
+	@param "TUint aPadding" The bit value to pad the final byte with
+
+	@leave "OverflowL()" If the output buffer is full, OverflowL() is called
+*/
+	{
+	if (iBits>-8)
+		WriteL(aPadding?0xffffffffu:0,-iBits);
+	}
+
+void TBitOutput::DoWriteL(TUint aBits,TInt aSize)
+/** Write the higher order bits to the stream
+	
+	@internal
+*/
+	{
+	if (aSize>25)
+		{
+		// cannot process >25 bits in a single pass
+		// so do the top 8 bits first
+		ASSERT(aSize<=32);
+		DoWriteL(aBits&0xff000000u,8);
+		aBits<<=8;
+		aSize-=8;
+		}
+
+	TInt bits=iBits;
+	TUint code=iCode|(aBits>>(bits+8));
+	bits+=aSize;
+	if (bits>=0)
+		{
+		TUint8* p=iPtr;
+		do
+			{
+			if (p==iEnd)
+				{
+				// run out of buffer space so invoke the overflow handler
+				iPtr=p;
+				OverflowL();
+				p=iPtr;
+				ASSERT(p!=iEnd);
+				}
+			*p++=TUint8(code>>24);
+			code<<=8;
+			bits-=8;
+			} while (bits>=0);
+		iPtr=p;
+		}
+	iCode=code;
+	iBits=bits;
+	}
+
+void TBitOutput::OverflowL()
+/** Handle a full output buffer
+
+	This virtual function is called when the output buffer is full. It should deal
+	with the data in the buffer before reseting the buffer using Set(), allowing
+	further data to be written.
+
+	A derived class can replace this to write the data to a file (for example)
+	before marking the buffer as empty.
+
+	@leave "KErrOverflow" The default implementation leaves
+*/
+	{
+	Panic(EHuffmanBufferOverflow);
+	}