// Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies).
// All rights reserved.
// This component and the accompanying materials are made available
// under the terms of the License "Eclipse Public License v1.0"
// which accompanies this distribution, and is available
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
//
// Initial Contributors:
// Nokia Corporation - initial contribution.
//
// Contributors:
//
// Description:
// e32\euser\us_encode.cpp
//
//
#include "e32huffman.h"
#include <e32base.h>
#include <e32base_private.h>
#include <e32panic.h>
_LIT(KCat,"Huffman");
// local definitions used for Huffman code generation
typedef TUint16 THuff; /** @internal */
const THuff KLeaf=0x8000; /** @internal */
struct TNode
/** @internal */
{
TUint iCount;
THuff iLeft;
THuff iRight;
};
/** recursive function to calculate the code lengths from the node tree
@internalComponent
*/
void HuffmanLengthsL(TUint32* aLengths,const TNode* aNodes,TInt aNode,TInt aLen)
{
if (++aLen>Huffman::KMaxCodeLength)
User::Leave(KErrOverflow);
const TNode& node=aNodes[aNode];
TUint x=node.iLeft;
if (x&KLeaf)
aLengths[x&~KLeaf]=aLen;
else
HuffmanLengthsL(aLengths,aNodes,x,aLen);
x=node.iRight;
if (x&KLeaf)
aLengths[x&~KLeaf]=aLen;
else
HuffmanLengthsL(aLengths,aNodes,x,aLen);
}
/** Insert the {aCount,aValue} pair into the already sorted array of nodes
@internalComponent
*/
void InsertInOrder(TNode* aNodes, TInt aSize, TUint aCount, TInt aVal)
{
// Uses Insertion sort following a binary search...
TInt l=0, r=aSize;
while (l < r)
{
TInt m = (l+r) >> 1;
if (aNodes[m].iCount<aCount)
r=m;
else
l=m+1;
}
Mem::Copy(aNodes+l+1,aNodes+l,sizeof(TNode)*(aSize-l));
aNodes[l].iCount=aCount;
aNodes[l].iRight=TUint16(aVal);
}
/** Generate a Huffman code
This generates a Huffman code for a given set of code frequencies. The output
is a table of code lengths which can be used to build canonincal encoding tables
or decoding trees for use with the TBitInput and TBitOutput classes.
Entries in the table with a frequency of zero will have a zero code length
and thus no associated huffman encoding. If each such symbol should have a
maximum length encoding, they must be given at least a frequency of 1.
For an alphabet of n symbols, this algorithm has a transient memory overhead
of 8n, and a time complexity of O(n*log(n)).
@param aFrequency The table of code frequencies
@param aNumCodes The number of codes in the table
@param aHuffman The table for the output code-length table. This must be
the same size as the frequency table, and can safely be the same table
@leave KErrNoMemory If memory used for code generation cannot be allocated
@panic "USER ???" If the number of codes exceeds Huffman::KMaxCodes
*/
EXPORT_C void Huffman::HuffmanL(const TUint32 aFrequency[],TInt aNumCodes,TUint32 aHuffman[])
{
__ASSERT_ALWAYS(TUint(aNumCodes)<=TUint(KMaxCodes),User::Panic(KCat,EHuffmanTooManyCodes));
// Sort the values into decreasing order of frequency
//
TNode* nodes = new(ELeave) TNode[aNumCodes];
CleanupArrayDeletePushL(nodes);
TInt lCount=0;
for (TInt ii=0;ii<aNumCodes;++ii)
{
TInt c=aFrequency[ii];
if (c!=0)
InsertInOrder(nodes,lCount++,c,ii|KLeaf);
}
// default code length is zero
Mem::FillZ(aHuffman,aNumCodes*sizeof(TUint32));
if (lCount==0)
{
// no codes with frequency>0. No code has a length
}
else if (lCount==1)
{
// special case for a single value (always encode as "0")
aHuffman[nodes[0].iRight&~KLeaf]=1;
}
else
{
// Huffman algorithm: pair off least frequent nodes and reorder
//
do
{
--lCount;
TUint c=nodes[lCount].iCount + nodes[lCount-1].iCount;
nodes[lCount].iLeft=nodes[lCount-1].iRight;
// re-order the leaves now to reflect new combined frequency 'c'
InsertInOrder(nodes,lCount-1,c,lCount);
} while (lCount>1);
// generate code lengths in aHuffman[]
HuffmanLengthsL(aHuffman,nodes,1,0);
}
CleanupStack::PopAndDestroy(nodes);
__ASSERT_DEBUG(IsValid(aHuffman,aNumCodes),User::Panic(KCat,EHuffmanInvalidCoding));
}
/** Validate a Huffman encoding
This verifies that a Huffman coding described by the code lengths is valid.
In particular, it ensures that no code exceeds the maximum length and
that it is possible to generate a canonical coding for the specified lengths.
@param aHuffman The table of code lengths as generated by Huffman::HuffmanL()
@param aNumCodes The number of codes in the table
@return True if the code is valid, otherwise false
*/
EXPORT_C TBool Huffman::IsValid(const TUint32 aHuffman[],TInt aNumCodes)
{
// The code is valid if one of the following holds:
// (a) the code exactly fills the 'code space'
// (b) there is only a single symbol with code length 1
// (c) there are no encoded symbols
//
TUint remain=1<<KMaxCodeLength;
TInt totlen=0;
for (const TUint32* p=aHuffman+aNumCodes; p>aHuffman;)
{
TInt len=*--p;
if (len>0)
{
totlen+=len;
if (len>KMaxCodeLength)
return EFalse;
TUint c=1<<(KMaxCodeLength-len);
if (c>remain)
return EFalse;
remain-=c;
}
}
return remain==0 || totlen<=1;
}
/** Create a canonical Huffman encoding table
This generates the huffman codes used by TBitOutput::HuffmanL() to write huffman
encoded data. The input is table of code lengths, as generated by Huffman::HuffmanL()
and must represent a valid huffman code.
@param aHuffman The table of code lengths as generated by Huffman::HuffmanL()
@param aNumCodes The number of codes in the table
@param aEncodeTable The table for the output huffman codes. This must be
the same size as the code-length table, and can safely be the same table
@panic "USER ???" If the provided code is not a valid Huffman coding
@see IsValid()
@see HuffmanL()
*/
EXPORT_C void Huffman::Encoding(const TUint32 aHuffman[],TInt aNumCodes,TUint32 aEncodeTable[])
{
__ASSERT_ALWAYS(IsValid(aHuffman,aNumCodes),User::Panic(KCat,EHuffmanInvalidCoding));
TFixedArray<TInt,KMaxCodeLength> lenCount;
lenCount.Reset();
TInt ii;
for (ii=0;ii<aNumCodes;++ii)
{
TInt len=aHuffman[ii]-1;
if (len>=0)
++lenCount[len];
}
TFixedArray<TUint,KMaxCodeLength> nextCode;
TUint code=0;
for (ii=0;ii<KMaxCodeLength;++ii)
{
code<<=1;
nextCode[ii]=code;
code+=lenCount[ii];
}
for (ii=0;ii<aNumCodes;++ii)
{
TInt len=aHuffman[ii];
if (len==0)
aEncodeTable[ii]=0;
else
{
aEncodeTable[ii] = (nextCode[len-1]<<(KMaxCodeLength-len))|(len<<KMaxCodeLength);
++nextCode[len-1];
}
}
}
/** the encoding table for the externalised code
@internalComponent
*/
const TUint32 HuffmanEncoding[]=
{
0x10000000,
0x1c000000,
0x12000000,
0x1d000000,
0x26000000,
0x26800000,
0x2f000000,
0x37400000,
0x37600000,
0x37800000,
0x3fa00000,
0x3fb00000,
0x3fc00000,
0x3fd00000,
0x47e00000,
0x47e80000,
0x47f00000,
0x4ff80000,
0x57fc0000,
0x5ffe0000,
0x67ff0000,
0x77ff8000,
0x7fffa000,
0x7fffb000,
0x7fffc000,
0x7fffd000,
0x7fffe000,
0x87fff000,
0x87fff800
};
/** encode 0a as '0' and 0b as '1', return number of symbols created
@internalComponent
*/
void EncodeRunLengthL(TBitOutput& aOutput, TInt aLength)
{
if (aLength>0)
{
EncodeRunLengthL(aOutput,(aLength-1)>>1);
aOutput.HuffmanL(HuffmanEncoding[1-(aLength&1)]);
}
}
/** Store a canonical huffman encoding in compact form
As the encoding is canonical, only the code lengths of each code needs to be saved.
Due to the nature of code length tables, these can usually be stored very compactly
by encoding the encoding itself, hence the use of the bit output stream.
@param aOutput The output stream for the encoding
@param aHuffman The table of code lengths as generated by Huffman::HuffmanL()
@param aNumCodes The number of huffman codes in the table
@leave TBitOutput::HuffmanL()
*/
EXPORT_C void Huffman::ExternalizeL(TBitOutput& aOutput,const TUint32 aHuffman[],TInt aNumCodes)
{
// We assume that the code length table is generated by the huffman generator,
// in which case the maxmimum code length is 27 bits.
//
// We apply three transformations to the data:
// 1. the data goes through a move-to-front coder
// 2. apply a rle-0 coder which replace runs of '0' with streams of '0a' and '0b'
// 3. encode the result using a predefined (average) huffman coding
//
// This can be done in a single pass over the data, avoiding the need for additional
// memory.
//
// initialise the list for the MTF coder
TFixedArray<TUint8,Huffman::KMetaCodes> list;
TInt i;
for (i=0;i<list.Count();++i)
list[i]=TUint8(i);
TInt last=0;
TInt rl=0;
const TUint32* p32=aHuffman;
const TUint32* e32=p32+aNumCodes;
while (p32<e32)
{
TInt c=*p32++;
if (c==last)
++rl; // repeat of last symbol
else
{
// encode run-length
EncodeRunLengthL(aOutput,rl);
rl=0;
// find code in MTF list
TInt j;
for (j=1;list[j]!=c;++j)
;
// store this code
aOutput.HuffmanL(HuffmanEncoding[j+1]);
// adjust list for MTF algorithm
while (--j>0)
list[j+1]=list[j];
list[1]=TUint8(last);
last=c;
}
}
// encod any remaining run-length
EncodeRunLengthL(aOutput,rl);
}
/** Construct a bit stream output object
Following construction the bit stream is ready for writing bits, but will first call
OverflowL() as the output buffer is 'full'. A derived class can detect this state as
Ptr() will return null.
*/
EXPORT_C TBitOutput::TBitOutput()
:iCode(0),iBits(-8),iPtr(0),iEnd(0)
{}
/** Construct a bit stream output object over a buffer
Data will be written to the buffer until it is full, at which point OverflowL() will
be called. This should handle the data and then can Set() again to reset the buffer
for further output.
@param aBuf The buffer for output
@param aSize The size of the buffer in bytes
*/
EXPORT_C TBitOutput::TBitOutput(TUint8* aBuf,TInt aSize)
:iCode(0),iBits(-8),iPtr(aBuf),iEnd(aBuf+aSize)
{}
/** Write a huffman code
This expects a huffman code value as generated by Huffman::Encoding()
@param aHuffCode The huffman code write to the stream
@leave OverflowL() If the output buffer is full, OverflowL() is called
*/
EXPORT_C void TBitOutput::HuffmanL(TUint aHuffCode)
{
DoWriteL(aHuffCode<<(32-Huffman::KMaxCodeLength),aHuffCode>>Huffman::KMaxCodeLength);
}
/** Write an arbitrary integer value
Write an unsigned integer using the number of bits specified. Only
the low order bits of the value are written to the output, most
significant bit first.
@param aValue The value to write to the stream
@param aLength The number of bits to output
@leave OverflowL() If the output buffer is full, OverflowL() is called
*/
EXPORT_C void TBitOutput::WriteL(TUint aValue,TInt aLength)
{
if (aLength)
DoWriteL(aValue<<=32-aLength,aLength);
}
/** Pad the bitstream to the next byte boundary
Terminate the bitstream by padding the last byte with the requested value.
Following this operation the bitstream can continue to be used, the data will
start at the next byte.
@param aPadding The bit value to pad the final byte with
@leave OverflowL() If the output buffer is full, OverflowL() is called
*/
EXPORT_C void TBitOutput::PadL(TUint aPadding)
{
if (iBits>-8)
WriteL(aPadding?0xffffffffu:0,-iBits);
}
/** Write the higher order bits to the stream
@internalComponent
*/
void TBitOutput::DoWriteL(TUint aBits,TInt aSize)
{
if (aSize>25)
{
// cannot process >25 bits in a single pass
// so do the top 8 bits first
ASSERT(aSize<=32);
DoWriteL(aBits&0xff000000u,8);
aBits<<=8;
aSize-=8;
}
TInt bits=iBits;
TUint code=iCode|(aBits>>(bits+8));
bits+=aSize;
if (bits>=0)
{
TUint8* p=iPtr;
do
{
if (p==iEnd)
{
// run out of buffer space so invoke the overflow handler
iPtr=p;
OverflowL();
p=iPtr;
ASSERT(p!=iEnd);
}
*p++=TUint8(code>>24);
code<<=8;
bits-=8;
} while (bits>=0);
iPtr=p;
}
iCode=code;
iBits=bits;
}
/** Handle a full output buffer
This virtual function is called when the output buffer is full. It should deal
with the data in the buffer before reseting the buffer using Set(), allowing
further data to be written.
A derived class can replace this to write the data to a file (for example)
before marking the buffer as empty.
@leave KErrOverflow The default implementation leaves
*/
void TBitOutput::OverflowL()
{
User::Leave(KErrOverflow);
}