toolsandutils/e32tools/e32image/deflate/encode.cpp
changeset 0 83f4b4db085c
child 18 99082257a271
equal deleted inserted replaced
-1:000000000000 0:83f4b4db085c
       
     1 // Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of the License "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // e32tools\petran\Szip\encode.cpp
       
    15 // 
       
    16 //
       
    17 
       
    18 #include "huffman.h"
       
    19 #include "panic.h"
       
    20 #include <e32base.h>
       
    21 #include <e32base_private.h>
       
    22 #include "h_utl.h"
       
    23 #include <assert.h>
       
    24 #include "farray.h"
       
    25 #include <stdlib.h>
       
    26 
       
    27 void User::Invariant()
       
    28 	{
       
    29 	fprintf(stderr, "User::Invariant() called\n");
       
    30 	exit(1);
       
    31 	}
       
    32 
       
    33 // local definitions used for Huffman code generation
       
    34 typedef TUint16 THuff;		/** @internal */
       
    35 const THuff KLeaf=0x8000;	/** @internal */
       
    36 struct TNode
       
    37 /** @internal */
       
    38 	{
       
    39 	TUint iCount;
       
    40 	THuff iLeft;
       
    41 	THuff iRight;
       
    42 	};
       
    43 
       
    44 void HuffmanLengthsL(TUint32* aLengths,const TNode* aNodes,TInt aNode,TInt aLen)
       
    45 /** recursive function to calculate the code lengths from the node tree
       
    46 
       
    47 	@internal
       
    48 */
       
    49 	{
       
    50 	if (++aLen>Huffman::KMaxCodeLength)
       
    51 		Panic(EHuffmanBufferOverflow);
       
    52 
       
    53 	const TNode& node=aNodes[aNode];
       
    54 	TUint x=node.iLeft;
       
    55 	if (x&KLeaf)
       
    56 		aLengths[x&~KLeaf]=aLen;
       
    57 	else
       
    58 		HuffmanLengthsL(aLengths,aNodes,x,aLen);
       
    59 	x=node.iRight;
       
    60 	if (x&KLeaf)
       
    61 		aLengths[x&~KLeaf]=aLen;
       
    62 	else
       
    63 		HuffmanLengthsL(aLengths,aNodes,x,aLen);
       
    64 	}
       
    65 
       
    66 void InsertInOrder(TNode* aNodes, TInt aSize, TUint aCount, TInt aVal)
       
    67 /**	Insert the {aCount,aValue} pair into the already sorted array of nodes
       
    68 
       
    69 	@internal
       
    70 */
       
    71 	{
       
    72 	// Uses Insertion sort following a binary search...
       
    73 	TInt l=0, r=aSize;
       
    74 	while (l < r)
       
    75 		{
       
    76 		TInt m = (l+r) >> 1;
       
    77 		if (aNodes[m].iCount<aCount)
       
    78 			r=m;
       
    79 		else
       
    80 			l=m+1;
       
    81 		}
       
    82 	HMem::Copy(aNodes+l+1,aNodes+l,sizeof(TNode)*(aSize-l));
       
    83 	aNodes[l].iCount=aCount;
       
    84 	aNodes[l].iRight=TUint16(aVal);
       
    85 	}
       
    86 
       
    87 void Huffman::HuffmanL(const TUint32 aFrequency[],TInt aNumCodes,TUint32 aHuffman[])
       
    88 /** Generate a Huffman code
       
    89 
       
    90 	This generates a Huffman code for a given set of code frequencies. The output
       
    91 	is a table of code lengths which can be used to build canonincal encoding tables
       
    92 	or decoding trees for use with the TBitInput and TBitOutput classes.
       
    93 
       
    94 	Entries in the table with a frequency of zero will have a zero code length
       
    95 	and thus no associated huffman encoding. If each such symbol should have a
       
    96 	maximum length encoding, they must be given at least a frequency of 1.
       
    97 
       
    98 	For an alphabet of n symbols, this algorithm has a transient memory overhead
       
    99 	of 8n, and a time complexity of O(n*log(n)).
       
   100 
       
   101 	@param "const TUint32 aFrequency[]" The table of code frequencies
       
   102 	@param "TInt aNumCodes" The number of codes in the table
       
   103 	@param "TUint32 aHuffman[]" The table for the output code-length table. This must be
       
   104 		the same size as the frequency table, and can safely be the same table
       
   105 
       
   106 	@leave "KErrNoMemory" If memory used for code generation cannot be allocated
       
   107 
       
   108   	@panic "USER ???" If the number of codes exceeds Huffman::KMaxCodes
       
   109 */
       
   110 	{
       
   111 	if(TUint(aNumCodes)>TUint(KMaxCodes))
       
   112 		Panic(EHuffmanTooManyCodes);
       
   113 
       
   114 	// Sort the values into decreasing order of frequency
       
   115 	//
       
   116 	TNode* nodes = new TNode[aNumCodes];
       
   117 	if(nodes==NULL)
       
   118 		Panic(EHuffmanOutOfMemory);
       
   119 
       
   120 	TInt lCount=0;
       
   121 
       
   122 	for (TInt ii=0;ii<aNumCodes;++ii)
       
   123 		{
       
   124 		TInt c=aFrequency[ii];
       
   125 		if (c!=0)
       
   126 			InsertInOrder(nodes,lCount++,c,ii|KLeaf);
       
   127 		}
       
   128 
       
   129 	// default code length is zero
       
   130 	HMem::FillZ(aHuffman,aNumCodes*sizeof(TUint32));
       
   131 
       
   132 	if (lCount==0)
       
   133 		{
       
   134 		// no codes with frequency>0. No code has a length
       
   135 		}
       
   136 	else if (lCount==1)
       
   137 		{
       
   138 		// special case for a single value (always encode as "0")
       
   139 		aHuffman[nodes[0].iRight&~KLeaf]=1;
       
   140 		}
       
   141 	else
       
   142 		{
       
   143 		// Huffman algorithm: pair off least frequent nodes and reorder
       
   144 		//
       
   145 		do
       
   146 			{
       
   147 			--lCount;
       
   148 			TUint c=nodes[lCount].iCount + nodes[lCount-1].iCount;
       
   149 			nodes[lCount].iLeft=nodes[lCount-1].iRight;
       
   150 			// re-order the leaves now to reflect new combined frequency 'c'
       
   151 			InsertInOrder(nodes,lCount-1,c,lCount);
       
   152 			} while (lCount>1);
       
   153 		// generate code lengths in aHuffman[]
       
   154 		HuffmanLengthsL(aHuffman,nodes,1,0);
       
   155 		}
       
   156 
       
   157 	delete [] nodes;
       
   158 
       
   159 	if(!IsValid(aHuffman,aNumCodes))
       
   160 		Panic(EHuffmanInvalidCoding);
       
   161 	}
       
   162 
       
   163 TBool Huffman::IsValid(const TUint32 aHuffman[],TInt aNumCodes)
       
   164 /** Validate a Huffman encoding
       
   165 
       
   166 	This verifies that a Huffman coding described by the code lengths is valid.
       
   167 	In particular, it ensures that no code exceeds the maximum length and
       
   168 	that it is possible to generate a canonical coding for the specified lengths.
       
   169 	
       
   170 	@param "const TUint32 aHuffman[]" The table of code lengths as generated by Huffman::HuffmanL()
       
   171 	@param "TInt aNumCodes" The number of codes in the table
       
   172 
       
   173 	@return True if the code is valid, otherwise false
       
   174 */
       
   175 	{
       
   176 	// The code is valid if one of the following holds:
       
   177 	// (a) the code exactly fills the 'code space'
       
   178 	// (b) there is only a single symbol with code length 1
       
   179 	// (c) there are no encoded symbols
       
   180 	//
       
   181 	TUint remain=1<<KMaxCodeLength;
       
   182 	TInt totlen=0;
       
   183 	for (const TUint32* p=aHuffman+aNumCodes; p>aHuffman;)
       
   184 		{
       
   185 		TInt len=*--p;
       
   186 		if (len>0)
       
   187 			{
       
   188 			totlen+=len;
       
   189 			if (len>KMaxCodeLength)
       
   190 				return EFalse;
       
   191 			TUint c=1<<(KMaxCodeLength-len);
       
   192 			if (c>remain)
       
   193 				return EFalse;
       
   194 			remain-=c;
       
   195 			}
       
   196 		}
       
   197 
       
   198 	return remain==0 || totlen<=1;
       
   199 	}
       
   200 
       
   201 void Huffman::Encoding(const TUint32 aHuffman[],TInt aNumCodes,TUint32 aEncodeTable[])
       
   202 /** Create a canonical Huffman encoding table
       
   203 
       
   204 	This generates the huffman codes used by TBitOutput::HuffmanL() to write huffman
       
   205 	encoded data. The input is table of code lengths, as generated by Huffman::HuffmanL()
       
   206 	and must represent a valid huffman code.
       
   207 	
       
   208 	@param "const TUint32 aHuffman[]" The table of code lengths as generated by Huffman::HuffmanL()
       
   209 	@param "TInt aNumCodes" The number of codes in the table
       
   210 	@param "TUint32 aEncodeTable[]" The table for the output huffman codes. This must be
       
   211 		the same size as the code-length table, and can safely be the same table
       
   212 
       
   213 	@panic "USER ???" If the provided code is not a valid Huffman coding
       
   214 	
       
   215 	@see IsValid()
       
   216 	@see HuffmanL()
       
   217 */
       
   218 	{
       
   219 	__ASSERT_ALWAYS(IsValid(aHuffman,aNumCodes),Panic(EHuffmanInvalidCoding));
       
   220 
       
   221 	TFixedArray<TInt,KMaxCodeLength> lenCount;
       
   222 	lenCount.Reset();
       
   223 
       
   224 	TInt ii;
       
   225 	for (ii=0;ii<aNumCodes;++ii)
       
   226 		{
       
   227 		TInt len=aHuffman[ii]-1;
       
   228 		if (len>=0)
       
   229 			++lenCount[len];
       
   230 		}
       
   231 
       
   232 	TFixedArray<TUint,KMaxCodeLength> nextCode;
       
   233 	TUint code=0;
       
   234 	for (ii=0;ii<KMaxCodeLength;++ii)
       
   235 		{
       
   236 		code<<=1;
       
   237 		nextCode[ii]=code;
       
   238 		code+=lenCount[ii];
       
   239 		}
       
   240 
       
   241 	for (ii=0;ii<aNumCodes;++ii)
       
   242 		{
       
   243 		TInt len=aHuffman[ii];
       
   244 		if (len==0)
       
   245 			aEncodeTable[ii]=0;
       
   246 		else
       
   247 			{
       
   248 			aEncodeTable[ii] = (nextCode[len-1]<<(KMaxCodeLength-len))|(len<<KMaxCodeLength);
       
   249 			++nextCode[len-1];
       
   250 			}
       
   251 		}
       
   252 	}
       
   253 
       
   254 /** the encoding table for the externalised code
       
   255 	@internal
       
   256 */
       
   257 const TUint32 HuffmanEncoding[]=
       
   258 	{
       
   259 	0x10000000,
       
   260 	0x1c000000,
       
   261 	0x12000000,
       
   262 	0x1d000000,
       
   263 	0x26000000,
       
   264 	0x26800000,
       
   265 	0x2f000000,
       
   266 	0x37400000,
       
   267 	0x37600000,
       
   268 	0x37800000,
       
   269 	0x3fa00000,
       
   270 	0x3fb00000,
       
   271 	0x3fc00000,
       
   272 	0x3fd00000,
       
   273 	0x47e00000,
       
   274 	0x47e80000,
       
   275 	0x47f00000,
       
   276 	0x4ff80000,
       
   277 	0x57fc0000,
       
   278 	0x5ffe0000,
       
   279 	0x67ff0000,
       
   280 	0x77ff8000,
       
   281 	0x7fffa000,
       
   282 	0x7fffb000,
       
   283 	0x7fffc000,
       
   284 	0x7fffd000,
       
   285 	0x7fffe000,
       
   286 	0x87fff000,
       
   287 	0x87fff800
       
   288 	};
       
   289 
       
   290 void EncodeRunLengthL(TBitOutput& aOutput, TInt aLength)
       
   291 /** encode 0a as '0' and 0b as '1', return number of symbols created
       
   292 
       
   293 	@internal
       
   294 */
       
   295 	{
       
   296 	if (aLength>0)
       
   297 		{
       
   298 		EncodeRunLengthL(aOutput,(aLength-1)>>1);
       
   299 		aOutput.HuffmanL(HuffmanEncoding[1-(aLength&1)]);
       
   300 		}
       
   301 	}
       
   302 
       
   303 void Huffman::ExternalizeL(TBitOutput& aOutput,const TUint32 aHuffman[],TInt aNumCodes)
       
   304 /** Store a canonical huffman encoding in compact form
       
   305 
       
   306 	As the encoding is canonical, only the code lengths of each code needs to be saved.
       
   307 
       
   308 	Due to the nature of code length tables, these can usually be stored very compactly
       
   309 	by encoding the encoding itself, hence the use of the bit output stream.
       
   310 	
       
   311 	@param "TBitOutput& aOutput" The output stream for the encoding
       
   312 	@param "const TUint32 aHuffman[]" The table of code lengths as generated by Huffman::HuffmanL()
       
   313 	@param "TInt aNumCodes" The number of huffman codes in the table
       
   314 
       
   315 	@leave "TBitOutput::HuffmanL()"
       
   316 */
       
   317 	{
       
   318 	// We assume that the code length table is generated by the huffman generator,
       
   319 	// in which case the maxmimum code length is 27 bits.
       
   320 	//
       
   321 	// We apply three transformations to the data:
       
   322 	// 1. the data goes through a move-to-front coder
       
   323 	// 2. apply a rle-0 coder which replace runs of '0' with streams of '0a' and '0b'
       
   324 	// 3. encode the result using a predefined (average) huffman coding
       
   325 	//
       
   326 	// This can be done in a single pass over the data, avoiding the need for additional
       
   327 	// memory.
       
   328 	//
       
   329 	// initialise the list for the MTF coder
       
   330 	TFixedArray<TUint8,Huffman::KMetaCodes> list;
       
   331 	TInt i;
       
   332 	for (i=0;i<list.Count();++i)
       
   333 		list[i]=TUint8(i);
       
   334 	TInt last=0;
       
   335 
       
   336 	TInt rl=0;
       
   337 	const TUint32* p32=aHuffman;
       
   338 	const TUint32* e32=p32+aNumCodes;
       
   339 	while (p32<e32)
       
   340 		{
       
   341 		TInt c=*p32++;
       
   342 		if (c==last)
       
   343 			++rl;	// repeat of last symbol
       
   344 		else
       
   345 			{
       
   346 			// encode run-length
       
   347 			EncodeRunLengthL(aOutput,rl);
       
   348 			rl=0;
       
   349 			// find code in MTF list
       
   350 			TInt j;
       
   351 			for (j=1;list[j]!=c;++j)
       
   352 				;
       
   353 			// store this code
       
   354 			aOutput.HuffmanL(HuffmanEncoding[j+1]);
       
   355 			// adjust list for MTF algorithm
       
   356 			while (--j>0)
       
   357 				list[j+1]=list[j];
       
   358 			list[1]=TUint8(last);
       
   359 			last=c;
       
   360 			}
       
   361 		}
       
   362 	// encod any remaining run-length
       
   363 	EncodeRunLengthL(aOutput,rl);
       
   364 	}
       
   365 
       
   366 
       
   367 TBitOutput::TBitOutput()
       
   368 /** Construct a bit stream output object
       
   369 
       
   370 	Following construction the bit stream is ready for writing bits, but will first call
       
   371 	OverflowL() as the output buffer is 'full'. A derived class can detect this state as
       
   372 	Ptr() will return null.
       
   373 */
       
   374 	:iCode(0),iBits(-8),iPtr(0),iEnd(0)
       
   375 	{}
       
   376 
       
   377 TBitOutput::TBitOutput(TUint8* aBuf,TInt aSize)
       
   378 /** Construct a bit stream output object over a buffer
       
   379 
       
   380 	Data will be written to the buffer until it is full, at which point OverflowL() will
       
   381 	be called. This should handle the data and then can Set() again to reset the buffer
       
   382 	for further output.
       
   383 	
       
   384 	@param "TUint8* aBuf" The buffer for output
       
   385 	@param "TInt aSize" The size of the buffer in bytes
       
   386 */
       
   387 	:iCode(0),iBits(-8),iPtr(aBuf),iEnd(aBuf+aSize)
       
   388 	{}
       
   389 
       
   390 void TBitOutput::HuffmanL(TUint aHuffCode)
       
   391 /** Write a huffman code
       
   392 
       
   393 	This expects a huffman code value as generated by Huffman::Encoding()
       
   394 
       
   395 	@param "TUint aHuffCode" The huffman code write to the stream
       
   396 
       
   397 	@leave "OverflowL()" If the output buffer is full, OverflowL() is called
       
   398 */
       
   399 	{
       
   400 	DoWriteL(aHuffCode<<(32-Huffman::KMaxCodeLength),aHuffCode>>Huffman::KMaxCodeLength);
       
   401 	}
       
   402 
       
   403 void TBitOutput::WriteL(TUint aValue,TInt aLength)
       
   404 /** Write an arbitrary integer value
       
   405 
       
   406 	Write an unsigned integer using the number of bits specified. Only
       
   407 	the low order bits of the value are written to the output, most
       
   408 	significant bit first.
       
   409 
       
   410 	@param "TUint aValue" The value to write to the stream
       
   411 	@param "TUint aLength" The number of bits to output
       
   412 
       
   413 	@leave "OverflowL()" If the output buffer is full, OverflowL() is called
       
   414 */
       
   415 	{
       
   416 	if (aLength)
       
   417 		DoWriteL(aValue<<=32-aLength,aLength);
       
   418 	}
       
   419 
       
   420 void TBitOutput::PadL(TUint aPadding)
       
   421 /** Pad the bitstream to the next byte boundary
       
   422 
       
   423 	Terminate the bitstream by padding the last byte with the requested value.
       
   424 	Following this operation the bitstream can continue to be used, the data will
       
   425 	start at the next byte.
       
   426 
       
   427 	@param "TUint aPadding" The bit value to pad the final byte with
       
   428 
       
   429 	@leave "OverflowL()" If the output buffer is full, OverflowL() is called
       
   430 */
       
   431 	{
       
   432 	if (iBits>-8)
       
   433 		WriteL(aPadding?0xffffffffu:0,-iBits);
       
   434 	}
       
   435 
       
   436 void TBitOutput::DoWriteL(TUint aBits,TInt aSize)
       
   437 /** Write the higher order bits to the stream
       
   438 	
       
   439 	@internal
       
   440 */
       
   441 	{
       
   442 	if (aSize>25)
       
   443 		{
       
   444 		// cannot process >25 bits in a single pass
       
   445 		// so do the top 8 bits first
       
   446 		ASSERT(aSize<=32);
       
   447 		DoWriteL(aBits&0xff000000u,8);
       
   448 		aBits<<=8;
       
   449 		aSize-=8;
       
   450 		}
       
   451 
       
   452 	TInt bits=iBits;
       
   453 	TUint code=iCode|(aBits>>(bits+8));
       
   454 	bits+=aSize;
       
   455 	if (bits>=0)
       
   456 		{
       
   457 		TUint8* p=iPtr;
       
   458 		do
       
   459 			{
       
   460 			if (p==iEnd)
       
   461 				{
       
   462 				// run out of buffer space so invoke the overflow handler
       
   463 				iPtr=p;
       
   464 				OverflowL();
       
   465 				p=iPtr;
       
   466 				ASSERT(p!=iEnd);
       
   467 				}
       
   468 			*p++=TUint8(code>>24);
       
   469 			code<<=8;
       
   470 			bits-=8;
       
   471 			} while (bits>=0);
       
   472 		iPtr=p;
       
   473 		}
       
   474 	iCode=code;
       
   475 	iBits=bits;
       
   476 	}
       
   477 
       
   478 void TBitOutput::OverflowL()
       
   479 /** Handle a full output buffer
       
   480 
       
   481 	This virtual function is called when the output buffer is full. It should deal
       
   482 	with the data in the buffer before reseting the buffer using Set(), allowing
       
   483 	further data to be written.
       
   484 
       
   485 	A derived class can replace this to write the data to a file (for example)
       
   486 	before marking the buffer as empty.
       
   487 
       
   488 	@leave "KErrOverflow" The default implementation leaves
       
   489 */
       
   490 	{
       
   491 	Panic(EHuffmanBufferOverflow);
       
   492 	}