kernel/eka/euser/us_encode.cpp
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Mon, 18 Jan 2010 21:31:10 +0200
changeset 8 538db54a451d
parent 0 a41df078684a
permissions -rw-r--r--
Revision: 201003 Kit: 201003

// Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies).
// All rights reserved.
// This component and the accompanying materials are made available
// under the terms of the License "Eclipse Public License v1.0"
// which accompanies this distribution, and is available
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
//
// Initial Contributors:
// Nokia Corporation - initial contribution.
//
// Contributors:
//
// Description:
// e32\euser\us_encode.cpp
// 
//

#include "e32huffman.h"
#include <e32base.h>
#include <e32base_private.h>
#include <e32panic.h>

_LIT(KCat,"Huffman");
// local definitions used for Huffman code generation
typedef TUint16 THuff;		/** @internal */
const THuff KLeaf=0x8000;	/** @internal */
struct TNode
/** @internal */
	{
	TUint iCount;
	THuff iLeft;
	THuff iRight;
	};

/** recursive function to calculate the code lengths from the node tree

	@internalComponent
*/
void HuffmanLengthsL(TUint32* aLengths,const TNode* aNodes,TInt aNode,TInt aLen)
	{
	if (++aLen>Huffman::KMaxCodeLength)
		User::Leave(KErrOverflow);

	const TNode& node=aNodes[aNode];
	TUint x=node.iLeft;
	if (x&KLeaf)
		aLengths[x&~KLeaf]=aLen;
	else
		HuffmanLengthsL(aLengths,aNodes,x,aLen);
	x=node.iRight;
	if (x&KLeaf)
		aLengths[x&~KLeaf]=aLen;
	else
		HuffmanLengthsL(aLengths,aNodes,x,aLen);
	}

/**	Insert the {aCount,aValue} pair into the already sorted array of nodes

	@internalComponent
*/
void InsertInOrder(TNode* aNodes, TInt aSize, TUint aCount, TInt aVal)
	{
	// Uses Insertion sort following a binary search...
	TInt l=0, r=aSize;
	while (l < r)
		{
		TInt m = (l+r) >> 1;
		if (aNodes[m].iCount<aCount)
			r=m;
		else
			l=m+1;
		}
	Mem::Copy(aNodes+l+1,aNodes+l,sizeof(TNode)*(aSize-l));
	aNodes[l].iCount=aCount;
	aNodes[l].iRight=TUint16(aVal);
	}

/** Generate a Huffman code

	This generates a Huffman code for a given set of code frequencies. The output
	is a table of code lengths which can be used to build canonincal encoding tables
	or decoding trees for use with the TBitInput and TBitOutput classes.

	Entries in the table with a frequency of zero will have a zero code length
	and thus no associated huffman encoding. If each such symbol should have a
	maximum length encoding, they must be given at least a frequency of 1.

	For an alphabet of n symbols, this algorithm has a transient memory overhead
	of 8n, and a time complexity of O(n*log(n)).

	@param aFrequency The table of code frequencies
	@param aNumCodes The number of codes in the table
	@param aHuffman The table for the output code-length table. This must be
		the same size as the frequency table, and can safely be the same table

	@leave KErrNoMemory If memory used for code generation cannot be allocated

  	@panic "USER ???" If the number of codes exceeds Huffman::KMaxCodes
*/
EXPORT_C void Huffman::HuffmanL(const TUint32 aFrequency[],TInt aNumCodes,TUint32 aHuffman[])
	{
	__ASSERT_ALWAYS(TUint(aNumCodes)<=TUint(KMaxCodes),User::Panic(KCat,EHuffmanTooManyCodes));

	// Sort the values into decreasing order of frequency
	//
	TNode* nodes = new(ELeave) TNode[aNumCodes];
	CleanupArrayDeletePushL(nodes);
	TInt lCount=0;

	for (TInt ii=0;ii<aNumCodes;++ii)
		{
		TInt c=aFrequency[ii];
		if (c!=0)
			InsertInOrder(nodes,lCount++,c,ii|KLeaf);
		}

	// default code length is zero
	Mem::FillZ(aHuffman,aNumCodes*sizeof(TUint32));

	if (lCount==0)
		{
		// no codes with frequency>0. No code has a length
		}
	else if (lCount==1)
		{
		// special case for a single value (always encode as "0")
		aHuffman[nodes[0].iRight&~KLeaf]=1;
		}
	else
		{
		// Huffman algorithm: pair off least frequent nodes and reorder
		//
		do
			{
			--lCount;
			TUint c=nodes[lCount].iCount + nodes[lCount-1].iCount;
			nodes[lCount].iLeft=nodes[lCount-1].iRight;
			// re-order the leaves now to reflect new combined frequency 'c'
			InsertInOrder(nodes,lCount-1,c,lCount);
			} while (lCount>1);
		// generate code lengths in aHuffman[]
		HuffmanLengthsL(aHuffman,nodes,1,0);
		}
	CleanupStack::PopAndDestroy(nodes);

	__ASSERT_DEBUG(IsValid(aHuffman,aNumCodes),User::Panic(KCat,EHuffmanInvalidCoding));
	}

/** Validate a Huffman encoding

	This verifies that a Huffman coding described by the code lengths is valid.
	In particular, it ensures that no code exceeds the maximum length and
	that it is possible to generate a canonical coding for the specified lengths.
	
	@param aHuffman The table of code lengths as generated by Huffman::HuffmanL()
	@param aNumCodes The number of codes in the table

	@return True if the code is valid, otherwise false
*/
EXPORT_C TBool Huffman::IsValid(const TUint32 aHuffman[],TInt aNumCodes)
	{
	// The code is valid if one of the following holds:
	// (a) the code exactly fills the 'code space'
	// (b) there is only a single symbol with code length 1
	// (c) there are no encoded symbols
	//
	TUint remain=1<<KMaxCodeLength;
	TInt totlen=0;
	for (const TUint32* p=aHuffman+aNumCodes; p>aHuffman;)
		{
		TInt len=*--p;
		if (len>0)
			{
			totlen+=len;
			if (len>KMaxCodeLength)
				return EFalse;
			TUint c=1<<(KMaxCodeLength-len);
			if (c>remain)
				return EFalse;
			remain-=c;
			}
		}

	return remain==0 || totlen<=1;
	}

/** Create a canonical Huffman encoding table

	This generates the huffman codes used by TBitOutput::HuffmanL() to write huffman
	encoded data. The input is table of code lengths, as generated by Huffman::HuffmanL()
	and must represent a valid huffman code.
	
	@param aHuffman The table of code lengths as generated by Huffman::HuffmanL()
	@param aNumCodes The number of codes in the table
	@param aEncodeTable The table for the output huffman codes. This must be
		the same size as the code-length table, and can safely be the same table

	@panic "USER ???" If the provided code is not a valid Huffman coding
	
	@see IsValid()
	@see HuffmanL()
*/
EXPORT_C void Huffman::Encoding(const TUint32 aHuffman[],TInt aNumCodes,TUint32 aEncodeTable[])
	{
	__ASSERT_ALWAYS(IsValid(aHuffman,aNumCodes),User::Panic(KCat,EHuffmanInvalidCoding));

	TFixedArray<TInt,KMaxCodeLength> lenCount;
	lenCount.Reset();

	TInt ii;
	for (ii=0;ii<aNumCodes;++ii)
		{
		TInt len=aHuffman[ii]-1;
		if (len>=0)
			++lenCount[len];
		}

	TFixedArray<TUint,KMaxCodeLength> nextCode;
	TUint code=0;
	for (ii=0;ii<KMaxCodeLength;++ii)
		{
		code<<=1;
		nextCode[ii]=code;
		code+=lenCount[ii];
		}

	for (ii=0;ii<aNumCodes;++ii)
		{
		TInt len=aHuffman[ii];
		if (len==0)
			aEncodeTable[ii]=0;
		else
			{
			aEncodeTable[ii] = (nextCode[len-1]<<(KMaxCodeLength-len))|(len<<KMaxCodeLength);
			++nextCode[len-1];
			}
		}
	}

/** the encoding table for the externalised code
	@internalComponent
*/
const TUint32 HuffmanEncoding[]=
	{
	0x10000000,
	0x1c000000,
	0x12000000,
	0x1d000000,
	0x26000000,
	0x26800000,
	0x2f000000,
	0x37400000,
	0x37600000,
	0x37800000,
	0x3fa00000,
	0x3fb00000,
	0x3fc00000,
	0x3fd00000,
	0x47e00000,
	0x47e80000,
	0x47f00000,
	0x4ff80000,
	0x57fc0000,
	0x5ffe0000,
	0x67ff0000,
	0x77ff8000,
	0x7fffa000,
	0x7fffb000,
	0x7fffc000,
	0x7fffd000,
	0x7fffe000,
	0x87fff000,
	0x87fff800
	};

/** encode 0a as '0' and 0b as '1', return number of symbols created

	@internalComponent
*/
void EncodeRunLengthL(TBitOutput& aOutput, TInt aLength)
	{
	if (aLength>0)
		{
		EncodeRunLengthL(aOutput,(aLength-1)>>1);
		aOutput.HuffmanL(HuffmanEncoding[1-(aLength&1)]);
		}
	}

/** Store a canonical huffman encoding in compact form

	As the encoding is canonical, only the code lengths of each code needs to be saved.

	Due to the nature of code length tables, these can usually be stored very compactly
	by encoding the encoding itself, hence the use of the bit output stream.
	
	@param aOutput The output stream for the encoding
	@param aHuffman The table of code lengths as generated by Huffman::HuffmanL()
	@param aNumCodes The number of huffman codes in the table

	@leave TBitOutput::HuffmanL()
*/
EXPORT_C void Huffman::ExternalizeL(TBitOutput& aOutput,const TUint32 aHuffman[],TInt aNumCodes)
	{
	// We assume that the code length table is generated by the huffman generator,
	// in which case the maxmimum code length is 27 bits.
	//
	// We apply three transformations to the data:
	// 1. the data goes through a move-to-front coder
	// 2. apply a rle-0 coder which replace runs of '0' with streams of '0a' and '0b'
	// 3. encode the result using a predefined (average) huffman coding
	//
	// This can be done in a single pass over the data, avoiding the need for additional
	// memory.
	//
	// initialise the list for the MTF coder
	TFixedArray<TUint8,Huffman::KMetaCodes> list;
	TInt i;
	for (i=0;i<list.Count();++i)
		list[i]=TUint8(i);
	TInt last=0;

	TInt rl=0;
	const TUint32* p32=aHuffman;
	const TUint32* e32=p32+aNumCodes;
	while (p32<e32)
		{
		TInt c=*p32++;
		if (c==last)
			++rl;	// repeat of last symbol
		else
			{
			// encode run-length
			EncodeRunLengthL(aOutput,rl);
			rl=0;
			// find code in MTF list
			TInt j;
			for (j=1;list[j]!=c;++j)
				;
			// store this code
			aOutput.HuffmanL(HuffmanEncoding[j+1]);
			// adjust list for MTF algorithm
			while (--j>0)
				list[j+1]=list[j];
			list[1]=TUint8(last);
			last=c;
			}
		}
	// encod any remaining run-length
	EncodeRunLengthL(aOutput,rl);
	}


/** Construct a bit stream output object

	Following construction the bit stream is ready for writing bits, but will first call
	OverflowL() as the output buffer is 'full'. A derived class can detect this state as
	Ptr() will return null.
*/
EXPORT_C TBitOutput::TBitOutput()
	:iCode(0),iBits(-8),iPtr(0),iEnd(0)
	{}

/** Construct a bit stream output object over a buffer

	Data will be written to the buffer until it is full, at which point OverflowL() will
	be called. This should handle the data and then can Set() again to reset the buffer
	for further output.
	
	@param aBuf The buffer for output
	@param aSize The size of the buffer in bytes
*/
EXPORT_C TBitOutput::TBitOutput(TUint8* aBuf,TInt aSize)
	:iCode(0),iBits(-8),iPtr(aBuf),iEnd(aBuf+aSize)
	{}

/** Write a huffman code

	This expects a huffman code value as generated by Huffman::Encoding()

	@param aHuffCode The huffman code write to the stream

	@leave OverflowL() If the output buffer is full, OverflowL() is called
*/
EXPORT_C void TBitOutput::HuffmanL(TUint aHuffCode)
	{
	DoWriteL(aHuffCode<<(32-Huffman::KMaxCodeLength),aHuffCode>>Huffman::KMaxCodeLength);
	}

/** Write an arbitrary integer value

	Write an unsigned integer using the number of bits specified. Only
	the low order bits of the value are written to the output, most
	significant bit first.

	@param aValue The value to write to the stream
	@param aLength The number of bits to output

	@leave OverflowL() If the output buffer is full, OverflowL() is called
*/
EXPORT_C void TBitOutput::WriteL(TUint aValue,TInt aLength)
	{
	if (aLength)
		DoWriteL(aValue<<=32-aLength,aLength);
	}

/** Pad the bitstream to the next byte boundary

	Terminate the bitstream by padding the last byte with the requested value.
	Following this operation the bitstream can continue to be used, the data will
	start at the next byte.

	@param aPadding The bit value to pad the final byte with

	@leave OverflowL() If the output buffer is full, OverflowL() is called
*/
EXPORT_C void TBitOutput::PadL(TUint aPadding)
	{
	if (iBits>-8)
		WriteL(aPadding?0xffffffffu:0,-iBits);
	}

/** Write the higher order bits to the stream
	
	@internalComponent
*/
void TBitOutput::DoWriteL(TUint aBits,TInt aSize)
	{
	if (aSize>25)
		{
		// cannot process >25 bits in a single pass
		// so do the top 8 bits first
		ASSERT(aSize<=32);
		DoWriteL(aBits&0xff000000u,8);
		aBits<<=8;
		aSize-=8;
		}

	TInt bits=iBits;
	TUint code=iCode|(aBits>>(bits+8));
	bits+=aSize;
	if (bits>=0)
		{
		TUint8* p=iPtr;
		do
			{
			if (p==iEnd)
				{
				// run out of buffer space so invoke the overflow handler
				iPtr=p;
				OverflowL();
				p=iPtr;
				ASSERT(p!=iEnd);
				}
			*p++=TUint8(code>>24);
			code<<=8;
			bits-=8;
			} while (bits>=0);
		iPtr=p;
		}
	iCode=code;
	iBits=bits;
	}

/** Handle a full output buffer

	This virtual function is called when the output buffer is full. It should deal
	with the data in the buffer before reseting the buffer using Set(), allowing
	further data to be written.

	A derived class can replace this to write the data to a file (for example)
	before marking the buffer as empty.

	@leave KErrOverflow The default implementation leaves
*/
void TBitOutput::OverflowL()
	{
	User::Leave(KErrOverflow);
	}