e32tools/e32lib/e32image/deflate/encode.cpp
changeset 0 044383f39525
equal deleted inserted replaced
-1:000000000000 0:044383f39525
       
     1 // Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // e32tools\petran\Szip\encode.cpp
       
    15 // 
       
    16 //
       
    17 
       
    18 #include "huffman.h"
       
    19 #include "panic.h"
       
    20 #include <e32base.h>
       
    21 #include "h_utl.h"
       
    22 #include <assert.h>
       
    23 #include "farray.h"
       
    24 #include <stdlib.h>
       
    25 
       
    26 void User::Invariant()
       
    27 	{
       
    28 	fprintf(stderr, "User::Invariant() called\n");
       
    29 	exit(1);
       
    30 	}
       
    31 
       
    32 // local definitions used for Huffman code generation
       
    33 typedef TUint16 THuff;		/** @internal */
       
    34 const THuff KLeaf=0x8000;	/** @internal */
       
    35 struct TNode
       
    36 /** @internal */
       
    37 	{
       
    38 	TUint iCount;
       
    39 	THuff iLeft;
       
    40 	THuff iRight;
       
    41 	};
       
    42 
       
    43 void HuffmanLengthsL(TUint32* aLengths,const TNode* aNodes,TInt aNode,TInt aLen)
       
    44 /** recursive function to calculate the code lengths from the node tree
       
    45 
       
    46 	@internal
       
    47 */
       
    48 	{
       
    49 	if (++aLen>Huffman::KMaxCodeLength)
       
    50 		Panic(EHuffmanBufferOverflow);
       
    51 
       
    52 	const TNode& node=aNodes[aNode];
       
    53 	TUint x=node.iLeft;
       
    54 	if (x&KLeaf)
       
    55 		aLengths[x&~KLeaf]=aLen;
       
    56 	else
       
    57 		HuffmanLengthsL(aLengths,aNodes,x,aLen);
       
    58 	x=node.iRight;
       
    59 	if (x&KLeaf)
       
    60 		aLengths[x&~KLeaf]=aLen;
       
    61 	else
       
    62 		HuffmanLengthsL(aLengths,aNodes,x,aLen);
       
    63 	}
       
    64 
       
    65 void InsertInOrder(TNode* aNodes, TInt aSize, TUint aCount, TInt aVal)
       
    66 /**	Insert the {aCount,aValue} pair into the already sorted array of nodes
       
    67 
       
    68 	@internal
       
    69 */
       
    70 	{
       
    71 	// Uses Insertion sort following a binary search...
       
    72 	TInt l=0, r=aSize;
       
    73 	while (l < r)
       
    74 		{
       
    75 		TInt m = (l+r) >> 1;
       
    76 		if (aNodes[m].iCount<aCount)
       
    77 			r=m;
       
    78 		else
       
    79 			l=m+1;
       
    80 		}
       
    81 	HMem::Copy(aNodes+l+1,aNodes+l,sizeof(TNode)*(aSize-l));
       
    82 	aNodes[l].iCount=aCount;
       
    83 	aNodes[l].iRight=TUint16(aVal);
       
    84 	}
       
    85 
       
    86 void Huffman::HuffmanL(const TUint32 aFrequency[],TInt aNumCodes,TUint32 aHuffman[])
       
    87 /** Generate a Huffman code
       
    88 
       
    89 	This generates a Huffman code for a given set of code frequencies. The output
       
    90 	is a table of code lengths which can be used to build canonincal encoding tables
       
    91 	or decoding trees for use with the TBitInput and TBitOutput classes.
       
    92 
       
    93 	Entries in the table with a frequency of zero will have a zero code length
       
    94 	and thus no associated huffman encoding. If each such symbol should have a
       
    95 	maximum length encoding, they must be given at least a frequency of 1.
       
    96 
       
    97 	For an alphabet of n symbols, this algorithm has a transient memory overhead
       
    98 	of 8n, and a time complexity of O(n*log(n)).
       
    99 
       
   100 	@param "const TUint32 aFrequency[]" The table of code frequencies
       
   101 	@param "TInt aNumCodes" The number of codes in the table
       
   102 	@param "TUint32 aHuffman[]" The table for the output code-length table. This must be
       
   103 		the same size as the frequency table, and can safely be the same table
       
   104 
       
   105 	@leave "KErrNoMemory" If memory used for code generation cannot be allocated
       
   106 
       
   107   	@panic "USER ???" If the number of codes exceeds Huffman::KMaxCodes
       
   108 */
       
   109 	{
       
   110 	if(TUint(aNumCodes)>TUint(KMaxCodes))
       
   111 		Panic(EHuffmanTooManyCodes);
       
   112 
       
   113 	// Sort the values into decreasing order of frequency
       
   114 	//
       
   115 	TNode* nodes = new TNode[aNumCodes];
       
   116 	if(nodes==NULL)
       
   117 		Panic(EHuffmanOutOfMemory);
       
   118 
       
   119 	TInt lCount=0;
       
   120 
       
   121 	for (TInt ii=0;ii<aNumCodes;++ii)
       
   122 		{
       
   123 		TInt c=aFrequency[ii];
       
   124 		if (c!=0)
       
   125 			InsertInOrder(nodes,lCount++,c,ii|KLeaf);
       
   126 		}
       
   127 
       
   128 	// default code length is zero
       
   129 	HMem::FillZ(aHuffman,aNumCodes*sizeof(TUint32));
       
   130 
       
   131 	if (lCount==0)
       
   132 		{
       
   133 		// no codes with frequency>0. No code has a length
       
   134 		}
       
   135 	else if (lCount==1)
       
   136 		{
       
   137 		// special case for a single value (always encode as "0")
       
   138 		aHuffman[nodes[0].iRight&~KLeaf]=1;
       
   139 		}
       
   140 	else
       
   141 		{
       
   142 		// Huffman algorithm: pair off least frequent nodes and reorder
       
   143 		//
       
   144 		do
       
   145 			{
       
   146 			--lCount;
       
   147 			TUint c=nodes[lCount].iCount + nodes[lCount-1].iCount;
       
   148 			nodes[lCount].iLeft=nodes[lCount-1].iRight;
       
   149 			// re-order the leaves now to reflect new combined frequency 'c'
       
   150 			InsertInOrder(nodes,lCount-1,c,lCount);
       
   151 			} while (lCount>1);
       
   152 		// generate code lengths in aHuffman[]
       
   153 		HuffmanLengthsL(aHuffman,nodes,1,0);
       
   154 		}
       
   155 
       
   156 	delete [] nodes;
       
   157 
       
   158 	if(!IsValid(aHuffman,aNumCodes))
       
   159 		Panic(EHuffmanInvalidCoding);
       
   160 	}
       
   161 
       
   162 TBool Huffman::IsValid(const TUint32 aHuffman[],TInt aNumCodes)
       
   163 /** Validate a Huffman encoding
       
   164 
       
   165 	This verifies that a Huffman coding described by the code lengths is valid.
       
   166 	In particular, it ensures that no code exceeds the maximum length and
       
   167 	that it is possible to generate a canonical coding for the specified lengths.
       
   168 	
       
   169 	@param "const TUint32 aHuffman[]" The table of code lengths as generated by Huffman::HuffmanL()
       
   170 	@param "TInt aNumCodes" The number of codes in the table
       
   171 
       
   172 	@return True if the code is valid, otherwise false
       
   173 */
       
   174 	{
       
   175 	// The code is valid if one of the following holds:
       
   176 	// (a) the code exactly fills the 'code space'
       
   177 	// (b) there is only a single symbol with code length 1
       
   178 	// (c) there are no encoded symbols
       
   179 	//
       
   180 	TUint remain=1<<KMaxCodeLength;
       
   181 	TInt totlen=0;
       
   182 	for (const TUint32* p=aHuffman+aNumCodes; p>aHuffman;)
       
   183 		{
       
   184 		TInt len=*--p;
       
   185 		if (len>0)
       
   186 			{
       
   187 			totlen+=len;
       
   188 			if (len>KMaxCodeLength)
       
   189 				return EFalse;
       
   190 			TUint c=1<<(KMaxCodeLength-len);
       
   191 			if (c>remain)
       
   192 				return EFalse;
       
   193 			remain-=c;
       
   194 			}
       
   195 		}
       
   196 
       
   197 	return remain==0 || totlen<=1;
       
   198 	}
       
   199 
       
   200 void Huffman::Encoding(const TUint32 aHuffman[],TInt aNumCodes,TUint32 aEncodeTable[])
       
   201 /** Create a canonical Huffman encoding table
       
   202 
       
   203 	This generates the huffman codes used by TBitOutput::HuffmanL() to write huffman
       
   204 	encoded data. The input is table of code lengths, as generated by Huffman::HuffmanL()
       
   205 	and must represent a valid huffman code.
       
   206 	
       
   207 	@param "const TUint32 aHuffman[]" The table of code lengths as generated by Huffman::HuffmanL()
       
   208 	@param "TInt aNumCodes" The number of codes in the table
       
   209 	@param "TUint32 aEncodeTable[]" The table for the output huffman codes. This must be
       
   210 		the same size as the code-length table, and can safely be the same table
       
   211 
       
   212 	@panic "USER ???" If the provided code is not a valid Huffman coding
       
   213 	
       
   214 	@see IsValid()
       
   215 	@see HuffmanL()
       
   216 */
       
   217 	{
       
   218 	__ASSERT_ALWAYS(IsValid(aHuffman,aNumCodes),Panic(EHuffmanInvalidCoding));
       
   219 
       
   220 	TFixedArray<TInt,KMaxCodeLength> lenCount;
       
   221 	lenCount.Reset();
       
   222 
       
   223 	TInt ii;
       
   224 	for (ii=0;ii<aNumCodes;++ii)
       
   225 		{
       
   226 		TInt len=aHuffman[ii]-1;
       
   227 		if (len>=0)
       
   228 			++lenCount[len];
       
   229 		}
       
   230 
       
   231 	TFixedArray<TUint,KMaxCodeLength> nextCode;
       
   232 	TUint code=0;
       
   233 	for (ii=0;ii<KMaxCodeLength;++ii)
       
   234 		{
       
   235 		code<<=1;
       
   236 		nextCode[ii]=code;
       
   237 		code+=lenCount[ii];
       
   238 		}
       
   239 
       
   240 	for (ii=0;ii<aNumCodes;++ii)
       
   241 		{
       
   242 		TInt len=aHuffman[ii];
       
   243 		if (len==0)
       
   244 			aEncodeTable[ii]=0;
       
   245 		else
       
   246 			{
       
   247 			aEncodeTable[ii] = (nextCode[len-1]<<(KMaxCodeLength-len))|(len<<KMaxCodeLength);
       
   248 			++nextCode[len-1];
       
   249 			}
       
   250 		}
       
   251 	}
       
   252 
       
   253 /** the encoding table for the externalised code
       
   254 	@internal
       
   255 */
       
   256 const TUint32 HuffmanEncoding[]=
       
   257 	{
       
   258 	0x10000000,
       
   259 	0x1c000000,
       
   260 	0x12000000,
       
   261 	0x1d000000,
       
   262 	0x26000000,
       
   263 	0x26800000,
       
   264 	0x2f000000,
       
   265 	0x37400000,
       
   266 	0x37600000,
       
   267 	0x37800000,
       
   268 	0x3fa00000,
       
   269 	0x3fb00000,
       
   270 	0x3fc00000,
       
   271 	0x3fd00000,
       
   272 	0x47e00000,
       
   273 	0x47e80000,
       
   274 	0x47f00000,
       
   275 	0x4ff80000,
       
   276 	0x57fc0000,
       
   277 	0x5ffe0000,
       
   278 	0x67ff0000,
       
   279 	0x77ff8000,
       
   280 	0x7fffa000,
       
   281 	0x7fffb000,
       
   282 	0x7fffc000,
       
   283 	0x7fffd000,
       
   284 	0x7fffe000,
       
   285 	0x87fff000,
       
   286 	0x87fff800
       
   287 	};
       
   288 
       
   289 void EncodeRunLengthL(TBitOutput& aOutput, TInt aLength)
       
   290 /** encode 0a as '0' and 0b as '1', return number of symbols created
       
   291 
       
   292 	@internal
       
   293 */
       
   294 	{
       
   295 	if (aLength>0)
       
   296 		{
       
   297 		EncodeRunLengthL(aOutput,(aLength-1)>>1);
       
   298 		aOutput.HuffmanL(HuffmanEncoding[1-(aLength&1)]);
       
   299 		}
       
   300 	}
       
   301 
       
   302 void Huffman::ExternalizeL(TBitOutput& aOutput,const TUint32 aHuffman[],TInt aNumCodes)
       
   303 /** Store a canonical huffman encoding in compact form
       
   304 
       
   305 	As the encoding is canonical, only the code lengths of each code needs to be saved.
       
   306 
       
   307 	Due to the nature of code length tables, these can usually be stored very compactly
       
   308 	by encoding the encoding itself, hence the use of the bit output stream.
       
   309 	
       
   310 	@param "TBitOutput& aOutput" The output stream for the encoding
       
   311 	@param "const TUint32 aHuffman[]" The table of code lengths as generated by Huffman::HuffmanL()
       
   312 	@param "TInt aNumCodes" The number of huffman codes in the table
       
   313 
       
   314 	@leave "TBitOutput::HuffmanL()"
       
   315 */
       
   316 	{
       
   317 	// We assume that the code length table is generated by the huffman generator,
       
   318 	// in which case the maxmimum code length is 27 bits.
       
   319 	//
       
   320 	// We apply three transformations to the data:
       
   321 	// 1. the data goes through a move-to-front coder
       
   322 	// 2. apply a rle-0 coder which replace runs of '0' with streams of '0a' and '0b'
       
   323 	// 3. encode the result using a predefined (average) huffman coding
       
   324 	//
       
   325 	// This can be done in a single pass over the data, avoiding the need for additional
       
   326 	// memory.
       
   327 	//
       
   328 	// initialise the list for the MTF coder
       
   329 	TFixedArray<TUint8,Huffman::KMetaCodes> list;
       
   330 	TInt i;
       
   331 	for (i=0;i<list.Count();++i)
       
   332 		list[i]=TUint8(i);
       
   333 	TInt last=0;
       
   334 
       
   335 	TInt rl=0;
       
   336 	const TUint32* p32=aHuffman;
       
   337 	const TUint32* e32=p32+aNumCodes;
       
   338 	while (p32<e32)
       
   339 		{
       
   340 		TInt c=*p32++;
       
   341 		if (c==last)
       
   342 			++rl;	// repeat of last symbol
       
   343 		else
       
   344 			{
       
   345 			// encode run-length
       
   346 			EncodeRunLengthL(aOutput,rl);
       
   347 			rl=0;
       
   348 			// find code in MTF list
       
   349 			TInt j;
       
   350 			for (j=1;list[j]!=c;++j)
       
   351 				;
       
   352 			// store this code
       
   353 			aOutput.HuffmanL(HuffmanEncoding[j+1]);
       
   354 			// adjust list for MTF algorithm
       
   355 			while (--j>0)
       
   356 				list[j+1]=list[j];
       
   357 			list[1]=TUint8(last);
       
   358 			last=c;
       
   359 			}
       
   360 		}
       
   361 	// encod any remaining run-length
       
   362 	EncodeRunLengthL(aOutput,rl);
       
   363 	}
       
   364 
       
   365 
       
   366 TBitOutput::TBitOutput()
       
   367 /** Construct a bit stream output object
       
   368 
       
   369 	Following construction the bit stream is ready for writing bits, but will first call
       
   370 	OverflowL() as the output buffer is 'full'. A derived class can detect this state as
       
   371 	Ptr() will return null.
       
   372 */
       
   373 	:iCode(0),iBits(-8),iPtr(0),iEnd(0)
       
   374 	{}
       
   375 
       
   376 TBitOutput::TBitOutput(TUint8* aBuf,TInt aSize)
       
   377 /** Construct a bit stream output object over a buffer
       
   378 
       
   379 	Data will be written to the buffer until it is full, at which point OverflowL() will
       
   380 	be called. This should handle the data and then can Set() again to reset the buffer
       
   381 	for further output.
       
   382 	
       
   383 	@param "TUint8* aBuf" The buffer for output
       
   384 	@param "TInt aSize" The size of the buffer in bytes
       
   385 */
       
   386 	:iCode(0),iBits(-8),iPtr(aBuf),iEnd(aBuf+aSize)
       
   387 	{}
       
   388 
       
   389 void TBitOutput::HuffmanL(TUint aHuffCode)
       
   390 /** Write a huffman code
       
   391 
       
   392 	This expects a huffman code value as generated by Huffman::Encoding()
       
   393 
       
   394 	@param "TUint aHuffCode" The huffman code write to the stream
       
   395 
       
   396 	@leave "OverflowL()" If the output buffer is full, OverflowL() is called
       
   397 */
       
   398 	{
       
   399 	DoWriteL(aHuffCode<<(32-Huffman::KMaxCodeLength),aHuffCode>>Huffman::KMaxCodeLength);
       
   400 	}
       
   401 
       
   402 void TBitOutput::WriteL(TUint aValue,TInt aLength)
       
   403 /** Write an arbitrary integer value
       
   404 
       
   405 	Write an unsigned integer using the number of bits specified. Only
       
   406 	the low order bits of the value are written to the output, most
       
   407 	significant bit first.
       
   408 
       
   409 	@param "TUint aValue" The value to write to the stream
       
   410 	@param "TUint aLength" The number of bits to output
       
   411 
       
   412 	@leave "OverflowL()" If the output buffer is full, OverflowL() is called
       
   413 */
       
   414 	{
       
   415 	if (aLength)
       
   416 		DoWriteL(aValue<<=32-aLength,aLength);
       
   417 	}
       
   418 
       
   419 void TBitOutput::PadL(TUint aPadding)
       
   420 /** Pad the bitstream to the next byte boundary
       
   421 
       
   422 	Terminate the bitstream by padding the last byte with the requested value.
       
   423 	Following this operation the bitstream can continue to be used, the data will
       
   424 	start at the next byte.
       
   425 
       
   426 	@param "TUint aPadding" The bit value to pad the final byte with
       
   427 
       
   428 	@leave "OverflowL()" If the output buffer is full, OverflowL() is called
       
   429 */
       
   430 	{
       
   431 	if (iBits>-8)
       
   432 		WriteL(aPadding?0xffffffffu:0,-iBits);
       
   433 	}
       
   434 
       
   435 void TBitOutput::DoWriteL(TUint aBits,TInt aSize)
       
   436 /** Write the higher order bits to the stream
       
   437 	
       
   438 	@internal
       
   439 */
       
   440 	{
       
   441 	if (aSize>25)
       
   442 		{
       
   443 		// cannot process >25 bits in a single pass
       
   444 		// so do the top 8 bits first
       
   445 		ASSERT(aSize<=32);
       
   446 		DoWriteL(aBits&0xff000000u,8);
       
   447 		aBits<<=8;
       
   448 		aSize-=8;
       
   449 		}
       
   450 
       
   451 	TInt bits=iBits;
       
   452 	TUint code=iCode|(aBits>>(bits+8));
       
   453 	bits+=aSize;
       
   454 	if (bits>=0)
       
   455 		{
       
   456 		TUint8* p=iPtr;
       
   457 		do
       
   458 			{
       
   459 			if (p==iEnd)
       
   460 				{
       
   461 				// run out of buffer space so invoke the overflow handler
       
   462 				iPtr=p;
       
   463 				OverflowL();
       
   464 				p=iPtr;
       
   465 				ASSERT(p!=iEnd);
       
   466 				}
       
   467 			*p++=TUint8(code>>24);
       
   468 			code<<=8;
       
   469 			bits-=8;
       
   470 			} while (bits>=0);
       
   471 		iPtr=p;
       
   472 		}
       
   473 	iCode=code;
       
   474 	iBits=bits;
       
   475 	}
       
   476 
       
   477 void TBitOutput::OverflowL()
       
   478 /** Handle a full output buffer
       
   479 
       
   480 	This virtual function is called when the output buffer is full. It should deal
       
   481 	with the data in the buffer before reseting the buffer using Set(), allowing
       
   482 	further data to be written.
       
   483 
       
   484 	A derived class can replace this to write the data to a file (for example)
       
   485 	before marking the buffer as empty.
       
   486 
       
   487 	@leave "KErrOverflow" The default implementation leaves
       
   488 */
       
   489 	{
       
   490 	Panic(EHuffmanBufferOverflow);
       
   491 	}