// Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies).// All rights reserved.// This component and the accompanying materials are made available// under the terms of "Eclipse Public License v1.0"// which accompanies this distribution, and is available// at the URL "http://www.eclipse.org/legal/epl-v10.html".//// Initial Contributors:// Nokia Corporation - initial contribution.//// Contributors://// Description:// e32tools\petran\Szip\encode.cpp// //#include "huffman.h"#include "panic.h"#include <e32base.h>#include "h_utl.h"#include <assert.h>#include "farray.h"#include <stdlib.h>void User::Invariant() { fprintf(stderr, "User::Invariant() called\n"); exit(1); }// local definitions used for Huffman code generationtypedef TUint16 THuff; /** @internal */const THuff KLeaf=0x8000; /** @internal */struct TNode/** @internal */ { TUint iCount; THuff iLeft; THuff iRight; };void HuffmanLengthsL(TUint32* aLengths,const TNode* aNodes,TInt aNode,TInt aLen)/** recursive function to calculate the code lengths from the node tree @internal*/ { if (++aLen>Huffman::KMaxCodeLength) Panic(EHuffmanBufferOverflow); const TNode& node=aNodes[aNode]; TUint x=node.iLeft; if (x&KLeaf) aLengths[x&~KLeaf]=aLen; else HuffmanLengthsL(aLengths,aNodes,x,aLen); x=node.iRight; if (x&KLeaf) aLengths[x&~KLeaf]=aLen; else HuffmanLengthsL(aLengths,aNodes,x,aLen); }void InsertInOrder(TNode* aNodes, TInt aSize, TUint aCount, TInt aVal)/** Insert the {aCount,aValue} pair into the already sorted array of nodes @internal*/ { // Uses Insertion sort following a binary search... TInt l=0, r=aSize; while (l < r) { TInt m = (l+r) >> 1; if (aNodes[m].iCount<aCount) r=m; else l=m+1; } HMem::Copy(aNodes+l+1,aNodes+l,sizeof(TNode)*(aSize-l)); aNodes[l].iCount=aCount; aNodes[l].iRight=TUint16(aVal); }void Huffman::HuffmanL(const TUint32 aFrequency[],TInt aNumCodes,TUint32 aHuffman[])/** Generate a Huffman code This generates a Huffman code for a given set of code frequencies. The output is a table of code lengths which can be used to build canonincal encoding tables or decoding trees for use with the TBitInput and TBitOutput classes. Entries in the table with a frequency of zero will have a zero code length and thus no associated huffman encoding. If each such symbol should have a maximum length encoding, they must be given at least a frequency of 1. For an alphabet of n symbols, this algorithm has a transient memory overhead of 8n, and a time complexity of O(n*log(n)). @param "const TUint32 aFrequency[]" The table of code frequencies @param "TInt aNumCodes" The number of codes in the table @param "TUint32 aHuffman[]" The table for the output code-length table. This must be the same size as the frequency table, and can safely be the same table @leave "KErrNoMemory" If memory used for code generation cannot be allocated @panic "USER ???" If the number of codes exceeds Huffman::KMaxCodes*/ { if(TUint(aNumCodes)>TUint(KMaxCodes)) Panic(EHuffmanTooManyCodes); // Sort the values into decreasing order of frequency // TNode* nodes = new TNode[aNumCodes]; if(nodes==NULL) Panic(EHuffmanOutOfMemory); TInt lCount=0; for (TInt ii=0;ii<aNumCodes;++ii) { TInt c=aFrequency[ii]; if (c!=0) InsertInOrder(nodes,lCount++,c,ii|KLeaf); } // default code length is zero HMem::FillZ(aHuffman,aNumCodes*sizeof(TUint32)); if (lCount==0) { // no codes with frequency>0. No code has a length } else if (lCount==1) { // special case for a single value (always encode as "0") aHuffman[nodes[0].iRight&~KLeaf]=1; } else { // Huffman algorithm: pair off least frequent nodes and reorder // do { --lCount; TUint c=nodes[lCount].iCount + nodes[lCount-1].iCount; nodes[lCount].iLeft=nodes[lCount-1].iRight; // re-order the leaves now to reflect new combined frequency 'c' InsertInOrder(nodes,lCount-1,c,lCount); } while (lCount>1); // generate code lengths in aHuffman[] HuffmanLengthsL(aHuffman,nodes,1,0); } delete [] nodes; if(!IsValid(aHuffman,aNumCodes)) Panic(EHuffmanInvalidCoding); }TBool Huffman::IsValid(const TUint32 aHuffman[],TInt aNumCodes)/** Validate a Huffman encoding This verifies that a Huffman coding described by the code lengths is valid. In particular, it ensures that no code exceeds the maximum length and that it is possible to generate a canonical coding for the specified lengths. @param "const TUint32 aHuffman[]" The table of code lengths as generated by Huffman::HuffmanL() @param "TInt aNumCodes" The number of codes in the table @return True if the code is valid, otherwise false*/ { // The code is valid if one of the following holds: // (a) the code exactly fills the 'code space' // (b) there is only a single symbol with code length 1 // (c) there are no encoded symbols // TUint remain=1<<KMaxCodeLength; TInt totlen=0; for (const TUint32* p=aHuffman+aNumCodes; p>aHuffman;) { TInt len=*--p; if (len>0) { totlen+=len; if (len>KMaxCodeLength) return EFalse; TUint c=1<<(KMaxCodeLength-len); if (c>remain) return EFalse; remain-=c; } } return remain==0 || totlen<=1; }void Huffman::Encoding(const TUint32 aHuffman[],TInt aNumCodes,TUint32 aEncodeTable[])/** Create a canonical Huffman encoding table This generates the huffman codes used by TBitOutput::HuffmanL() to write huffman encoded data. The input is table of code lengths, as generated by Huffman::HuffmanL() and must represent a valid huffman code. @param "const TUint32 aHuffman[]" The table of code lengths as generated by Huffman::HuffmanL() @param "TInt aNumCodes" The number of codes in the table @param "TUint32 aEncodeTable[]" The table for the output huffman codes. This must be the same size as the code-length table, and can safely be the same table @panic "USER ???" If the provided code is not a valid Huffman coding @see IsValid() @see HuffmanL()*/ { __ASSERT_ALWAYS(IsValid(aHuffman,aNumCodes),Panic(EHuffmanInvalidCoding)); TFixedArray<TInt,KMaxCodeLength> lenCount; lenCount.Reset(); TInt ii; for (ii=0;ii<aNumCodes;++ii) { TInt len=aHuffman[ii]-1; if (len>=0) ++lenCount[len]; } TFixedArray<TUint,KMaxCodeLength> nextCode; TUint code=0; for (ii=0;ii<KMaxCodeLength;++ii) { code<<=1; nextCode[ii]=code; code+=lenCount[ii]; } for (ii=0;ii<aNumCodes;++ii) { TInt len=aHuffman[ii]; if (len==0) aEncodeTable[ii]=0; else { aEncodeTable[ii] = (nextCode[len-1]<<(KMaxCodeLength-len))|(len<<KMaxCodeLength); ++nextCode[len-1]; } } }/** the encoding table for the externalised code @internal*/const TUint32 HuffmanEncoding[]= { 0x10000000, 0x1c000000, 0x12000000, 0x1d000000, 0x26000000, 0x26800000, 0x2f000000, 0x37400000, 0x37600000, 0x37800000, 0x3fa00000, 0x3fb00000, 0x3fc00000, 0x3fd00000, 0x47e00000, 0x47e80000, 0x47f00000, 0x4ff80000, 0x57fc0000, 0x5ffe0000, 0x67ff0000, 0x77ff8000, 0x7fffa000, 0x7fffb000, 0x7fffc000, 0x7fffd000, 0x7fffe000, 0x87fff000, 0x87fff800 };void EncodeRunLengthL(TBitOutput& aOutput, TInt aLength)/** encode 0a as '0' and 0b as '1', return number of symbols created @internal*/ { if (aLength>0) { EncodeRunLengthL(aOutput,(aLength-1)>>1); aOutput.HuffmanL(HuffmanEncoding[1-(aLength&1)]); } }void Huffman::ExternalizeL(TBitOutput& aOutput,const TUint32 aHuffman[],TInt aNumCodes)/** Store a canonical huffman encoding in compact form As the encoding is canonical, only the code lengths of each code needs to be saved. Due to the nature of code length tables, these can usually be stored very compactly by encoding the encoding itself, hence the use of the bit output stream. @param "TBitOutput& aOutput" The output stream for the encoding @param "const TUint32 aHuffman[]" The table of code lengths as generated by Huffman::HuffmanL() @param "TInt aNumCodes" The number of huffman codes in the table @leave "TBitOutput::HuffmanL()"*/ { // We assume that the code length table is generated by the huffman generator, // in which case the maxmimum code length is 27 bits. // // We apply three transformations to the data: // 1. the data goes through a move-to-front coder // 2. apply a rle-0 coder which replace runs of '0' with streams of '0a' and '0b' // 3. encode the result using a predefined (average) huffman coding // // This can be done in a single pass over the data, avoiding the need for additional // memory. // // initialise the list for the MTF coder TFixedArray<TUint8,Huffman::KMetaCodes> list; TInt i; for (i=0;i<list.Count();++i) list[i]=TUint8(i); TInt last=0; TInt rl=0; const TUint32* p32=aHuffman; const TUint32* e32=p32+aNumCodes; while (p32<e32) { TInt c=*p32++; if (c==last) ++rl; // repeat of last symbol else { // encode run-length EncodeRunLengthL(aOutput,rl); rl=0; // find code in MTF list TInt j; for (j=1;list[j]!=c;++j) ; // store this code aOutput.HuffmanL(HuffmanEncoding[j+1]); // adjust list for MTF algorithm while (--j>0) list[j+1]=list[j]; list[1]=TUint8(last); last=c; } } // encod any remaining run-length EncodeRunLengthL(aOutput,rl); }TBitOutput::TBitOutput()/** Construct a bit stream output object Following construction the bit stream is ready for writing bits, but will first call OverflowL() as the output buffer is 'full'. A derived class can detect this state as Ptr() will return null.*/ :iCode(0),iBits(-8),iPtr(0),iEnd(0) {}TBitOutput::TBitOutput(TUint8* aBuf,TInt aSize)/** Construct a bit stream output object over a buffer Data will be written to the buffer until it is full, at which point OverflowL() will be called. This should handle the data and then can Set() again to reset the buffer for further output. @param "TUint8* aBuf" The buffer for output @param "TInt aSize" The size of the buffer in bytes*/ :iCode(0),iBits(-8),iPtr(aBuf),iEnd(aBuf+aSize) {}void TBitOutput::HuffmanL(TUint aHuffCode)/** Write a huffman code This expects a huffman code value as generated by Huffman::Encoding() @param "TUint aHuffCode" The huffman code write to the stream @leave "OverflowL()" If the output buffer is full, OverflowL() is called*/ { DoWriteL(aHuffCode<<(32-Huffman::KMaxCodeLength),aHuffCode>>Huffman::KMaxCodeLength); }void TBitOutput::WriteL(TUint aValue,TInt aLength)/** Write an arbitrary integer value Write an unsigned integer using the number of bits specified. Only the low order bits of the value are written to the output, most significant bit first. @param "TUint aValue" The value to write to the stream @param "TUint aLength" The number of bits to output @leave "OverflowL()" If the output buffer is full, OverflowL() is called*/ { if (aLength) DoWriteL(aValue<<=32-aLength,aLength); }void TBitOutput::PadL(TUint aPadding)/** Pad the bitstream to the next byte boundary Terminate the bitstream by padding the last byte with the requested value. Following this operation the bitstream can continue to be used, the data will start at the next byte. @param "TUint aPadding" The bit value to pad the final byte with @leave "OverflowL()" If the output buffer is full, OverflowL() is called*/ { if (iBits>-8) WriteL(aPadding?0xffffffffu:0,-iBits); }void TBitOutput::DoWriteL(TUint aBits,TInt aSize)/** Write the higher order bits to the stream @internal*/ { if (aSize>25) { // cannot process >25 bits in a single pass // so do the top 8 bits first ASSERT(aSize<=32); DoWriteL(aBits&0xff000000u,8); aBits<<=8; aSize-=8; } TInt bits=iBits; TUint code=iCode|(aBits>>(bits+8)); bits+=aSize; if (bits>=0) { TUint8* p=iPtr; do { if (p==iEnd) { // run out of buffer space so invoke the overflow handler iPtr=p; OverflowL(); p=iPtr; ASSERT(p!=iEnd); } *p++=TUint8(code>>24); code<<=8; bits-=8; } while (bits>=0); iPtr=p; } iCode=code; iBits=bits; }void TBitOutput::OverflowL()/** Handle a full output buffer This virtual function is called when the output buffer is full. It should deal with the data in the buffer before reseting the buffer using Set(), allowing further data to be written. A derived class can replace this to write the data to a file (for example) before marking the buffer as empty. @leave "KErrOverflow" The default implementation leaves*/ { Panic(EHuffmanBufferOverflow); }