e32tools/e32lib/e32image/deflate/encode.cpp
author Richard Taylor <richard.i.taylor@nokia.com>
Fri, 30 Apr 2010 13:26:13 +0100
branchwip
changeset 504 ea75c3a64a17
parent 0 044383f39525
permissions -rw-r--r--
change to lowercase method names

// Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies).
// All rights reserved.
// This component and the accompanying materials are made available
// under the terms of "Eclipse Public License v1.0"
// which accompanies this distribution, and is available
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
//
// Initial Contributors:
// Nokia Corporation - initial contribution.
//
// Contributors:
//
// Description:
// e32tools\petran\Szip\encode.cpp
// 
//

#include "huffman.h"
#include "panic.h"
#include <e32base.h>
#include "h_utl.h"
#include <assert.h>
#include "farray.h"
#include <stdlib.h>

void User::Invariant()
	{
	fprintf(stderr, "User::Invariant() called\n");
	exit(1);
	}

// local definitions used for Huffman code generation
typedef TUint16 THuff;		/** @internal */
const THuff KLeaf=0x8000;	/** @internal */
struct TNode
/** @internal */
	{
	TUint iCount;
	THuff iLeft;
	THuff iRight;
	};

void HuffmanLengthsL(TUint32* aLengths,const TNode* aNodes,TInt aNode,TInt aLen)
/** recursive function to calculate the code lengths from the node tree

	@internal
*/
	{
	if (++aLen>Huffman::KMaxCodeLength)
		Panic(EHuffmanBufferOverflow);

	const TNode& node=aNodes[aNode];
	TUint x=node.iLeft;
	if (x&KLeaf)
		aLengths[x&~KLeaf]=aLen;
	else
		HuffmanLengthsL(aLengths,aNodes,x,aLen);
	x=node.iRight;
	if (x&KLeaf)
		aLengths[x&~KLeaf]=aLen;
	else
		HuffmanLengthsL(aLengths,aNodes,x,aLen);
	}

void InsertInOrder(TNode* aNodes, TInt aSize, TUint aCount, TInt aVal)
/**	Insert the {aCount,aValue} pair into the already sorted array of nodes

	@internal
*/
	{
	// Uses Insertion sort following a binary search...
	TInt l=0, r=aSize;
	while (l < r)
		{
		TInt m = (l+r) >> 1;
		if (aNodes[m].iCount<aCount)
			r=m;
		else
			l=m+1;
		}
	HMem::Copy(aNodes+l+1,aNodes+l,sizeof(TNode)*(aSize-l));
	aNodes[l].iCount=aCount;
	aNodes[l].iRight=TUint16(aVal);
	}

void Huffman::HuffmanL(const TUint32 aFrequency[],TInt aNumCodes,TUint32 aHuffman[])
/** Generate a Huffman code

	This generates a Huffman code for a given set of code frequencies. The output
	is a table of code lengths which can be used to build canonincal encoding tables
	or decoding trees for use with the TBitInput and TBitOutput classes.

	Entries in the table with a frequency of zero will have a zero code length
	and thus no associated huffman encoding. If each such symbol should have a
	maximum length encoding, they must be given at least a frequency of 1.

	For an alphabet of n symbols, this algorithm has a transient memory overhead
	of 8n, and a time complexity of O(n*log(n)).

	@param "const TUint32 aFrequency[]" The table of code frequencies
	@param "TInt aNumCodes" The number of codes in the table
	@param "TUint32 aHuffman[]" The table for the output code-length table. This must be
		the same size as the frequency table, and can safely be the same table

	@leave "KErrNoMemory" If memory used for code generation cannot be allocated

  	@panic "USER ???" If the number of codes exceeds Huffman::KMaxCodes
*/
	{
	if(TUint(aNumCodes)>TUint(KMaxCodes))
		Panic(EHuffmanTooManyCodes);

	// Sort the values into decreasing order of frequency
	//
	TNode* nodes = new TNode[aNumCodes];
	if(nodes==NULL)
		Panic(EHuffmanOutOfMemory);

	TInt lCount=0;

	for (TInt ii=0;ii<aNumCodes;++ii)
		{
		TInt c=aFrequency[ii];
		if (c!=0)
			InsertInOrder(nodes,lCount++,c,ii|KLeaf);
		}

	// default code length is zero
	HMem::FillZ(aHuffman,aNumCodes*sizeof(TUint32));

	if (lCount==0)
		{
		// no codes with frequency>0. No code has a length
		}
	else if (lCount==1)
		{
		// special case for a single value (always encode as "0")
		aHuffman[nodes[0].iRight&~KLeaf]=1;
		}
	else
		{
		// Huffman algorithm: pair off least frequent nodes and reorder
		//
		do
			{
			--lCount;
			TUint c=nodes[lCount].iCount + nodes[lCount-1].iCount;
			nodes[lCount].iLeft=nodes[lCount-1].iRight;
			// re-order the leaves now to reflect new combined frequency 'c'
			InsertInOrder(nodes,lCount-1,c,lCount);
			} while (lCount>1);
		// generate code lengths in aHuffman[]
		HuffmanLengthsL(aHuffman,nodes,1,0);
		}

	delete [] nodes;

	if(!IsValid(aHuffman,aNumCodes))
		Panic(EHuffmanInvalidCoding);
	}

TBool Huffman::IsValid(const TUint32 aHuffman[],TInt aNumCodes)
/** Validate a Huffman encoding

	This verifies that a Huffman coding described by the code lengths is valid.
	In particular, it ensures that no code exceeds the maximum length and
	that it is possible to generate a canonical coding for the specified lengths.
	
	@param "const TUint32 aHuffman[]" The table of code lengths as generated by Huffman::HuffmanL()
	@param "TInt aNumCodes" The number of codes in the table

	@return True if the code is valid, otherwise false
*/
	{
	// The code is valid if one of the following holds:
	// (a) the code exactly fills the 'code space'
	// (b) there is only a single symbol with code length 1
	// (c) there are no encoded symbols
	//
	TUint remain=1<<KMaxCodeLength;
	TInt totlen=0;
	for (const TUint32* p=aHuffman+aNumCodes; p>aHuffman;)
		{
		TInt len=*--p;
		if (len>0)
			{
			totlen+=len;
			if (len>KMaxCodeLength)
				return EFalse;
			TUint c=1<<(KMaxCodeLength-len);
			if (c>remain)
				return EFalse;
			remain-=c;
			}
		}

	return remain==0 || totlen<=1;
	}

void Huffman::Encoding(const TUint32 aHuffman[],TInt aNumCodes,TUint32 aEncodeTable[])
/** Create a canonical Huffman encoding table

	This generates the huffman codes used by TBitOutput::HuffmanL() to write huffman
	encoded data. The input is table of code lengths, as generated by Huffman::HuffmanL()
	and must represent a valid huffman code.
	
	@param "const TUint32 aHuffman[]" The table of code lengths as generated by Huffman::HuffmanL()
	@param "TInt aNumCodes" The number of codes in the table
	@param "TUint32 aEncodeTable[]" The table for the output huffman codes. This must be
		the same size as the code-length table, and can safely be the same table

	@panic "USER ???" If the provided code is not a valid Huffman coding
	
	@see IsValid()
	@see HuffmanL()
*/
	{
	__ASSERT_ALWAYS(IsValid(aHuffman,aNumCodes),Panic(EHuffmanInvalidCoding));

	TFixedArray<TInt,KMaxCodeLength> lenCount;
	lenCount.Reset();

	TInt ii;
	for (ii=0;ii<aNumCodes;++ii)
		{
		TInt len=aHuffman[ii]-1;
		if (len>=0)
			++lenCount[len];
		}

	TFixedArray<TUint,KMaxCodeLength> nextCode;
	TUint code=0;
	for (ii=0;ii<KMaxCodeLength;++ii)
		{
		code<<=1;
		nextCode[ii]=code;
		code+=lenCount[ii];
		}

	for (ii=0;ii<aNumCodes;++ii)
		{
		TInt len=aHuffman[ii];
		if (len==0)
			aEncodeTable[ii]=0;
		else
			{
			aEncodeTable[ii] = (nextCode[len-1]<<(KMaxCodeLength-len))|(len<<KMaxCodeLength);
			++nextCode[len-1];
			}
		}
	}

/** the encoding table for the externalised code
	@internal
*/
const TUint32 HuffmanEncoding[]=
	{
	0x10000000,
	0x1c000000,
	0x12000000,
	0x1d000000,
	0x26000000,
	0x26800000,
	0x2f000000,
	0x37400000,
	0x37600000,
	0x37800000,
	0x3fa00000,
	0x3fb00000,
	0x3fc00000,
	0x3fd00000,
	0x47e00000,
	0x47e80000,
	0x47f00000,
	0x4ff80000,
	0x57fc0000,
	0x5ffe0000,
	0x67ff0000,
	0x77ff8000,
	0x7fffa000,
	0x7fffb000,
	0x7fffc000,
	0x7fffd000,
	0x7fffe000,
	0x87fff000,
	0x87fff800
	};

void EncodeRunLengthL(TBitOutput& aOutput, TInt aLength)
/** encode 0a as '0' and 0b as '1', return number of symbols created

	@internal
*/
	{
	if (aLength>0)
		{
		EncodeRunLengthL(aOutput,(aLength-1)>>1);
		aOutput.HuffmanL(HuffmanEncoding[1-(aLength&1)]);
		}
	}

void Huffman::ExternalizeL(TBitOutput& aOutput,const TUint32 aHuffman[],TInt aNumCodes)
/** Store a canonical huffman encoding in compact form

	As the encoding is canonical, only the code lengths of each code needs to be saved.

	Due to the nature of code length tables, these can usually be stored very compactly
	by encoding the encoding itself, hence the use of the bit output stream.
	
	@param "TBitOutput& aOutput" The output stream for the encoding
	@param "const TUint32 aHuffman[]" The table of code lengths as generated by Huffman::HuffmanL()
	@param "TInt aNumCodes" The number of huffman codes in the table

	@leave "TBitOutput::HuffmanL()"
*/
	{
	// We assume that the code length table is generated by the huffman generator,
	// in which case the maxmimum code length is 27 bits.
	//
	// We apply three transformations to the data:
	// 1. the data goes through a move-to-front coder
	// 2. apply a rle-0 coder which replace runs of '0' with streams of '0a' and '0b'
	// 3. encode the result using a predefined (average) huffman coding
	//
	// This can be done in a single pass over the data, avoiding the need for additional
	// memory.
	//
	// initialise the list for the MTF coder
	TFixedArray<TUint8,Huffman::KMetaCodes> list;
	TInt i;
	for (i=0;i<list.Count();++i)
		list[i]=TUint8(i);
	TInt last=0;

	TInt rl=0;
	const TUint32* p32=aHuffman;
	const TUint32* e32=p32+aNumCodes;
	while (p32<e32)
		{
		TInt c=*p32++;
		if (c==last)
			++rl;	// repeat of last symbol
		else
			{
			// encode run-length
			EncodeRunLengthL(aOutput,rl);
			rl=0;
			// find code in MTF list
			TInt j;
			for (j=1;list[j]!=c;++j)
				;
			// store this code
			aOutput.HuffmanL(HuffmanEncoding[j+1]);
			// adjust list for MTF algorithm
			while (--j>0)
				list[j+1]=list[j];
			list[1]=TUint8(last);
			last=c;
			}
		}
	// encod any remaining run-length
	EncodeRunLengthL(aOutput,rl);
	}


TBitOutput::TBitOutput()
/** Construct a bit stream output object

	Following construction the bit stream is ready for writing bits, but will first call
	OverflowL() as the output buffer is 'full'. A derived class can detect this state as
	Ptr() will return null.
*/
	:iCode(0),iBits(-8),iPtr(0),iEnd(0)
	{}

TBitOutput::TBitOutput(TUint8* aBuf,TInt aSize)
/** Construct a bit stream output object over a buffer

	Data will be written to the buffer until it is full, at which point OverflowL() will
	be called. This should handle the data and then can Set() again to reset the buffer
	for further output.
	
	@param "TUint8* aBuf" The buffer for output
	@param "TInt aSize" The size of the buffer in bytes
*/
	:iCode(0),iBits(-8),iPtr(aBuf),iEnd(aBuf+aSize)
	{}

void TBitOutput::HuffmanL(TUint aHuffCode)
/** Write a huffman code

	This expects a huffman code value as generated by Huffman::Encoding()

	@param "TUint aHuffCode" The huffman code write to the stream

	@leave "OverflowL()" If the output buffer is full, OverflowL() is called
*/
	{
	DoWriteL(aHuffCode<<(32-Huffman::KMaxCodeLength),aHuffCode>>Huffman::KMaxCodeLength);
	}

void TBitOutput::WriteL(TUint aValue,TInt aLength)
/** Write an arbitrary integer value

	Write an unsigned integer using the number of bits specified. Only
	the low order bits of the value are written to the output, most
	significant bit first.

	@param "TUint aValue" The value to write to the stream
	@param "TUint aLength" The number of bits to output

	@leave "OverflowL()" If the output buffer is full, OverflowL() is called
*/
	{
	if (aLength)
		DoWriteL(aValue<<=32-aLength,aLength);
	}

void TBitOutput::PadL(TUint aPadding)
/** Pad the bitstream to the next byte boundary

	Terminate the bitstream by padding the last byte with the requested value.
	Following this operation the bitstream can continue to be used, the data will
	start at the next byte.

	@param "TUint aPadding" The bit value to pad the final byte with

	@leave "OverflowL()" If the output buffer is full, OverflowL() is called
*/
	{
	if (iBits>-8)
		WriteL(aPadding?0xffffffffu:0,-iBits);
	}

void TBitOutput::DoWriteL(TUint aBits,TInt aSize)
/** Write the higher order bits to the stream
	
	@internal
*/
	{
	if (aSize>25)
		{
		// cannot process >25 bits in a single pass
		// so do the top 8 bits first
		ASSERT(aSize<=32);
		DoWriteL(aBits&0xff000000u,8);
		aBits<<=8;
		aSize-=8;
		}

	TInt bits=iBits;
	TUint code=iCode|(aBits>>(bits+8));
	bits+=aSize;
	if (bits>=0)
		{
		TUint8* p=iPtr;
		do
			{
			if (p==iEnd)
				{
				// run out of buffer space so invoke the overflow handler
				iPtr=p;
				OverflowL();
				p=iPtr;
				ASSERT(p!=iEnd);
				}
			*p++=TUint8(code>>24);
			code<<=8;
			bits-=8;
			} while (bits>=0);
		iPtr=p;
		}
	iCode=code;
	iBits=bits;
	}

void TBitOutput::OverflowL()
/** Handle a full output buffer

	This virtual function is called when the output buffer is full. It should deal
	with the data in the buffer before reseting the buffer using Set(), allowing
	further data to be written.

	A derived class can replace this to write the data to a file (for example)
	before marking the buffer as empty.

	@leave "KErrOverflow" The default implementation leaves
*/
	{
	Panic(EHuffmanBufferOverflow);
	}