kernel/eka/euser/us_encode.cpp
changeset 0 a41df078684a
equal deleted inserted replaced
-1:000000000000 0:a41df078684a
       
     1 // Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of the License "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // e32\euser\us_encode.cpp
       
    15 // 
       
    16 //
       
    17 
       
    18 #include "e32huffman.h"
       
    19 #include <e32base.h>
       
    20 #include <e32base_private.h>
       
    21 #include <e32panic.h>
       
    22 
       
    23 _LIT(KCat,"Huffman");
       
    24 // local definitions used for Huffman code generation
       
    25 typedef TUint16 THuff;		/** @internal */
       
    26 const THuff KLeaf=0x8000;	/** @internal */
       
    27 struct TNode
       
    28 /** @internal */
       
    29 	{
       
    30 	TUint iCount;
       
    31 	THuff iLeft;
       
    32 	THuff iRight;
       
    33 	};
       
    34 
       
    35 /** recursive function to calculate the code lengths from the node tree
       
    36 
       
    37 	@internalComponent
       
    38 */
       
    39 void HuffmanLengthsL(TUint32* aLengths,const TNode* aNodes,TInt aNode,TInt aLen)
       
    40 	{
       
    41 	if (++aLen>Huffman::KMaxCodeLength)
       
    42 		User::Leave(KErrOverflow);
       
    43 
       
    44 	const TNode& node=aNodes[aNode];
       
    45 	TUint x=node.iLeft;
       
    46 	if (x&KLeaf)
       
    47 		aLengths[x&~KLeaf]=aLen;
       
    48 	else
       
    49 		HuffmanLengthsL(aLengths,aNodes,x,aLen);
       
    50 	x=node.iRight;
       
    51 	if (x&KLeaf)
       
    52 		aLengths[x&~KLeaf]=aLen;
       
    53 	else
       
    54 		HuffmanLengthsL(aLengths,aNodes,x,aLen);
       
    55 	}
       
    56 
       
    57 /**	Insert the {aCount,aValue} pair into the already sorted array of nodes
       
    58 
       
    59 	@internalComponent
       
    60 */
       
    61 void InsertInOrder(TNode* aNodes, TInt aSize, TUint aCount, TInt aVal)
       
    62 	{
       
    63 	// Uses Insertion sort following a binary search...
       
    64 	TInt l=0, r=aSize;
       
    65 	while (l < r)
       
    66 		{
       
    67 		TInt m = (l+r) >> 1;
       
    68 		if (aNodes[m].iCount<aCount)
       
    69 			r=m;
       
    70 		else
       
    71 			l=m+1;
       
    72 		}
       
    73 	Mem::Copy(aNodes+l+1,aNodes+l,sizeof(TNode)*(aSize-l));
       
    74 	aNodes[l].iCount=aCount;
       
    75 	aNodes[l].iRight=TUint16(aVal);
       
    76 	}
       
    77 
       
    78 /** Generate a Huffman code
       
    79 
       
    80 	This generates a Huffman code for a given set of code frequencies. The output
       
    81 	is a table of code lengths which can be used to build canonincal encoding tables
       
    82 	or decoding trees for use with the TBitInput and TBitOutput classes.
       
    83 
       
    84 	Entries in the table with a frequency of zero will have a zero code length
       
    85 	and thus no associated huffman encoding. If each such symbol should have a
       
    86 	maximum length encoding, they must be given at least a frequency of 1.
       
    87 
       
    88 	For an alphabet of n symbols, this algorithm has a transient memory overhead
       
    89 	of 8n, and a time complexity of O(n*log(n)).
       
    90 
       
    91 	@param aFrequency The table of code frequencies
       
    92 	@param aNumCodes The number of codes in the table
       
    93 	@param aHuffman The table for the output code-length table. This must be
       
    94 		the same size as the frequency table, and can safely be the same table
       
    95 
       
    96 	@leave KErrNoMemory If memory used for code generation cannot be allocated
       
    97 
       
    98   	@panic "USER ???" If the number of codes exceeds Huffman::KMaxCodes
       
    99 */
       
   100 EXPORT_C void Huffman::HuffmanL(const TUint32 aFrequency[],TInt aNumCodes,TUint32 aHuffman[])
       
   101 	{
       
   102 	__ASSERT_ALWAYS(TUint(aNumCodes)<=TUint(KMaxCodes),User::Panic(KCat,EHuffmanTooManyCodes));
       
   103 
       
   104 	// Sort the values into decreasing order of frequency
       
   105 	//
       
   106 	TNode* nodes = new(ELeave) TNode[aNumCodes];
       
   107 	CleanupArrayDeletePushL(nodes);
       
   108 	TInt lCount=0;
       
   109 
       
   110 	for (TInt ii=0;ii<aNumCodes;++ii)
       
   111 		{
       
   112 		TInt c=aFrequency[ii];
       
   113 		if (c!=0)
       
   114 			InsertInOrder(nodes,lCount++,c,ii|KLeaf);
       
   115 		}
       
   116 
       
   117 	// default code length is zero
       
   118 	Mem::FillZ(aHuffman,aNumCodes*sizeof(TUint32));
       
   119 
       
   120 	if (lCount==0)
       
   121 		{
       
   122 		// no codes with frequency>0. No code has a length
       
   123 		}
       
   124 	else if (lCount==1)
       
   125 		{
       
   126 		// special case for a single value (always encode as "0")
       
   127 		aHuffman[nodes[0].iRight&~KLeaf]=1;
       
   128 		}
       
   129 	else
       
   130 		{
       
   131 		// Huffman algorithm: pair off least frequent nodes and reorder
       
   132 		//
       
   133 		do
       
   134 			{
       
   135 			--lCount;
       
   136 			TUint c=nodes[lCount].iCount + nodes[lCount-1].iCount;
       
   137 			nodes[lCount].iLeft=nodes[lCount-1].iRight;
       
   138 			// re-order the leaves now to reflect new combined frequency 'c'
       
   139 			InsertInOrder(nodes,lCount-1,c,lCount);
       
   140 			} while (lCount>1);
       
   141 		// generate code lengths in aHuffman[]
       
   142 		HuffmanLengthsL(aHuffman,nodes,1,0);
       
   143 		}
       
   144 	CleanupStack::PopAndDestroy(nodes);
       
   145 
       
   146 	__ASSERT_DEBUG(IsValid(aHuffman,aNumCodes),User::Panic(KCat,EHuffmanInvalidCoding));
       
   147 	}
       
   148 
       
   149 /** Validate a Huffman encoding
       
   150 
       
   151 	This verifies that a Huffman coding described by the code lengths is valid.
       
   152 	In particular, it ensures that no code exceeds the maximum length and
       
   153 	that it is possible to generate a canonical coding for the specified lengths.
       
   154 	
       
   155 	@param aHuffman The table of code lengths as generated by Huffman::HuffmanL()
       
   156 	@param aNumCodes The number of codes in the table
       
   157 
       
   158 	@return True if the code is valid, otherwise false
       
   159 */
       
   160 EXPORT_C TBool Huffman::IsValid(const TUint32 aHuffman[],TInt aNumCodes)
       
   161 	{
       
   162 	// The code is valid if one of the following holds:
       
   163 	// (a) the code exactly fills the 'code space'
       
   164 	// (b) there is only a single symbol with code length 1
       
   165 	// (c) there are no encoded symbols
       
   166 	//
       
   167 	TUint remain=1<<KMaxCodeLength;
       
   168 	TInt totlen=0;
       
   169 	for (const TUint32* p=aHuffman+aNumCodes; p>aHuffman;)
       
   170 		{
       
   171 		TInt len=*--p;
       
   172 		if (len>0)
       
   173 			{
       
   174 			totlen+=len;
       
   175 			if (len>KMaxCodeLength)
       
   176 				return EFalse;
       
   177 			TUint c=1<<(KMaxCodeLength-len);
       
   178 			if (c>remain)
       
   179 				return EFalse;
       
   180 			remain-=c;
       
   181 			}
       
   182 		}
       
   183 
       
   184 	return remain==0 || totlen<=1;
       
   185 	}
       
   186 
       
   187 /** Create a canonical Huffman encoding table
       
   188 
       
   189 	This generates the huffman codes used by TBitOutput::HuffmanL() to write huffman
       
   190 	encoded data. The input is table of code lengths, as generated by Huffman::HuffmanL()
       
   191 	and must represent a valid huffman code.
       
   192 	
       
   193 	@param aHuffman The table of code lengths as generated by Huffman::HuffmanL()
       
   194 	@param aNumCodes The number of codes in the table
       
   195 	@param aEncodeTable The table for the output huffman codes. This must be
       
   196 		the same size as the code-length table, and can safely be the same table
       
   197 
       
   198 	@panic "USER ???" If the provided code is not a valid Huffman coding
       
   199 	
       
   200 	@see IsValid()
       
   201 	@see HuffmanL()
       
   202 */
       
   203 EXPORT_C void Huffman::Encoding(const TUint32 aHuffman[],TInt aNumCodes,TUint32 aEncodeTable[])
       
   204 	{
       
   205 	__ASSERT_ALWAYS(IsValid(aHuffman,aNumCodes),User::Panic(KCat,EHuffmanInvalidCoding));
       
   206 
       
   207 	TFixedArray<TInt,KMaxCodeLength> lenCount;
       
   208 	lenCount.Reset();
       
   209 
       
   210 	TInt ii;
       
   211 	for (ii=0;ii<aNumCodes;++ii)
       
   212 		{
       
   213 		TInt len=aHuffman[ii]-1;
       
   214 		if (len>=0)
       
   215 			++lenCount[len];
       
   216 		}
       
   217 
       
   218 	TFixedArray<TUint,KMaxCodeLength> nextCode;
       
   219 	TUint code=0;
       
   220 	for (ii=0;ii<KMaxCodeLength;++ii)
       
   221 		{
       
   222 		code<<=1;
       
   223 		nextCode[ii]=code;
       
   224 		code+=lenCount[ii];
       
   225 		}
       
   226 
       
   227 	for (ii=0;ii<aNumCodes;++ii)
       
   228 		{
       
   229 		TInt len=aHuffman[ii];
       
   230 		if (len==0)
       
   231 			aEncodeTable[ii]=0;
       
   232 		else
       
   233 			{
       
   234 			aEncodeTable[ii] = (nextCode[len-1]<<(KMaxCodeLength-len))|(len<<KMaxCodeLength);
       
   235 			++nextCode[len-1];
       
   236 			}
       
   237 		}
       
   238 	}
       
   239 
       
   240 /** the encoding table for the externalised code
       
   241 	@internalComponent
       
   242 */
       
   243 const TUint32 HuffmanEncoding[]=
       
   244 	{
       
   245 	0x10000000,
       
   246 	0x1c000000,
       
   247 	0x12000000,
       
   248 	0x1d000000,
       
   249 	0x26000000,
       
   250 	0x26800000,
       
   251 	0x2f000000,
       
   252 	0x37400000,
       
   253 	0x37600000,
       
   254 	0x37800000,
       
   255 	0x3fa00000,
       
   256 	0x3fb00000,
       
   257 	0x3fc00000,
       
   258 	0x3fd00000,
       
   259 	0x47e00000,
       
   260 	0x47e80000,
       
   261 	0x47f00000,
       
   262 	0x4ff80000,
       
   263 	0x57fc0000,
       
   264 	0x5ffe0000,
       
   265 	0x67ff0000,
       
   266 	0x77ff8000,
       
   267 	0x7fffa000,
       
   268 	0x7fffb000,
       
   269 	0x7fffc000,
       
   270 	0x7fffd000,
       
   271 	0x7fffe000,
       
   272 	0x87fff000,
       
   273 	0x87fff800
       
   274 	};
       
   275 
       
   276 /** encode 0a as '0' and 0b as '1', return number of symbols created
       
   277 
       
   278 	@internalComponent
       
   279 */
       
   280 void EncodeRunLengthL(TBitOutput& aOutput, TInt aLength)
       
   281 	{
       
   282 	if (aLength>0)
       
   283 		{
       
   284 		EncodeRunLengthL(aOutput,(aLength-1)>>1);
       
   285 		aOutput.HuffmanL(HuffmanEncoding[1-(aLength&1)]);
       
   286 		}
       
   287 	}
       
   288 
       
   289 /** Store a canonical huffman encoding in compact form
       
   290 
       
   291 	As the encoding is canonical, only the code lengths of each code needs to be saved.
       
   292 
       
   293 	Due to the nature of code length tables, these can usually be stored very compactly
       
   294 	by encoding the encoding itself, hence the use of the bit output stream.
       
   295 	
       
   296 	@param aOutput The output stream for the encoding
       
   297 	@param aHuffman The table of code lengths as generated by Huffman::HuffmanL()
       
   298 	@param aNumCodes The number of huffman codes in the table
       
   299 
       
   300 	@leave TBitOutput::HuffmanL()
       
   301 */
       
   302 EXPORT_C void Huffman::ExternalizeL(TBitOutput& aOutput,const TUint32 aHuffman[],TInt aNumCodes)
       
   303 	{
       
   304 	// We assume that the code length table is generated by the huffman generator,
       
   305 	// in which case the maxmimum code length is 27 bits.
       
   306 	//
       
   307 	// We apply three transformations to the data:
       
   308 	// 1. the data goes through a move-to-front coder
       
   309 	// 2. apply a rle-0 coder which replace runs of '0' with streams of '0a' and '0b'
       
   310 	// 3. encode the result using a predefined (average) huffman coding
       
   311 	//
       
   312 	// This can be done in a single pass over the data, avoiding the need for additional
       
   313 	// memory.
       
   314 	//
       
   315 	// initialise the list for the MTF coder
       
   316 	TFixedArray<TUint8,Huffman::KMetaCodes> list;
       
   317 	TInt i;
       
   318 	for (i=0;i<list.Count();++i)
       
   319 		list[i]=TUint8(i);
       
   320 	TInt last=0;
       
   321 
       
   322 	TInt rl=0;
       
   323 	const TUint32* p32=aHuffman;
       
   324 	const TUint32* e32=p32+aNumCodes;
       
   325 	while (p32<e32)
       
   326 		{
       
   327 		TInt c=*p32++;
       
   328 		if (c==last)
       
   329 			++rl;	// repeat of last symbol
       
   330 		else
       
   331 			{
       
   332 			// encode run-length
       
   333 			EncodeRunLengthL(aOutput,rl);
       
   334 			rl=0;
       
   335 			// find code in MTF list
       
   336 			TInt j;
       
   337 			for (j=1;list[j]!=c;++j)
       
   338 				;
       
   339 			// store this code
       
   340 			aOutput.HuffmanL(HuffmanEncoding[j+1]);
       
   341 			// adjust list for MTF algorithm
       
   342 			while (--j>0)
       
   343 				list[j+1]=list[j];
       
   344 			list[1]=TUint8(last);
       
   345 			last=c;
       
   346 			}
       
   347 		}
       
   348 	// encod any remaining run-length
       
   349 	EncodeRunLengthL(aOutput,rl);
       
   350 	}
       
   351 
       
   352 
       
   353 /** Construct a bit stream output object
       
   354 
       
   355 	Following construction the bit stream is ready for writing bits, but will first call
       
   356 	OverflowL() as the output buffer is 'full'. A derived class can detect this state as
       
   357 	Ptr() will return null.
       
   358 */
       
   359 EXPORT_C TBitOutput::TBitOutput()
       
   360 	:iCode(0),iBits(-8),iPtr(0),iEnd(0)
       
   361 	{}
       
   362 
       
   363 /** Construct a bit stream output object over a buffer
       
   364 
       
   365 	Data will be written to the buffer until it is full, at which point OverflowL() will
       
   366 	be called. This should handle the data and then can Set() again to reset the buffer
       
   367 	for further output.
       
   368 	
       
   369 	@param aBuf The buffer for output
       
   370 	@param aSize The size of the buffer in bytes
       
   371 */
       
   372 EXPORT_C TBitOutput::TBitOutput(TUint8* aBuf,TInt aSize)
       
   373 	:iCode(0),iBits(-8),iPtr(aBuf),iEnd(aBuf+aSize)
       
   374 	{}
       
   375 
       
   376 /** Write a huffman code
       
   377 
       
   378 	This expects a huffman code value as generated by Huffman::Encoding()
       
   379 
       
   380 	@param aHuffCode The huffman code write to the stream
       
   381 
       
   382 	@leave OverflowL() If the output buffer is full, OverflowL() is called
       
   383 */
       
   384 EXPORT_C void TBitOutput::HuffmanL(TUint aHuffCode)
       
   385 	{
       
   386 	DoWriteL(aHuffCode<<(32-Huffman::KMaxCodeLength),aHuffCode>>Huffman::KMaxCodeLength);
       
   387 	}
       
   388 
       
   389 /** Write an arbitrary integer value
       
   390 
       
   391 	Write an unsigned integer using the number of bits specified. Only
       
   392 	the low order bits of the value are written to the output, most
       
   393 	significant bit first.
       
   394 
       
   395 	@param aValue The value to write to the stream
       
   396 	@param aLength The number of bits to output
       
   397 
       
   398 	@leave OverflowL() If the output buffer is full, OverflowL() is called
       
   399 */
       
   400 EXPORT_C void TBitOutput::WriteL(TUint aValue,TInt aLength)
       
   401 	{
       
   402 	if (aLength)
       
   403 		DoWriteL(aValue<<=32-aLength,aLength);
       
   404 	}
       
   405 
       
   406 /** Pad the bitstream to the next byte boundary
       
   407 
       
   408 	Terminate the bitstream by padding the last byte with the requested value.
       
   409 	Following this operation the bitstream can continue to be used, the data will
       
   410 	start at the next byte.
       
   411 
       
   412 	@param aPadding The bit value to pad the final byte with
       
   413 
       
   414 	@leave OverflowL() If the output buffer is full, OverflowL() is called
       
   415 */
       
   416 EXPORT_C void TBitOutput::PadL(TUint aPadding)
       
   417 	{
       
   418 	if (iBits>-8)
       
   419 		WriteL(aPadding?0xffffffffu:0,-iBits);
       
   420 	}
       
   421 
       
   422 /** Write the higher order bits to the stream
       
   423 	
       
   424 	@internalComponent
       
   425 */
       
   426 void TBitOutput::DoWriteL(TUint aBits,TInt aSize)
       
   427 	{
       
   428 	if (aSize>25)
       
   429 		{
       
   430 		// cannot process >25 bits in a single pass
       
   431 		// so do the top 8 bits first
       
   432 		ASSERT(aSize<=32);
       
   433 		DoWriteL(aBits&0xff000000u,8);
       
   434 		aBits<<=8;
       
   435 		aSize-=8;
       
   436 		}
       
   437 
       
   438 	TInt bits=iBits;
       
   439 	TUint code=iCode|(aBits>>(bits+8));
       
   440 	bits+=aSize;
       
   441 	if (bits>=0)
       
   442 		{
       
   443 		TUint8* p=iPtr;
       
   444 		do
       
   445 			{
       
   446 			if (p==iEnd)
       
   447 				{
       
   448 				// run out of buffer space so invoke the overflow handler
       
   449 				iPtr=p;
       
   450 				OverflowL();
       
   451 				p=iPtr;
       
   452 				ASSERT(p!=iEnd);
       
   453 				}
       
   454 			*p++=TUint8(code>>24);
       
   455 			code<<=8;
       
   456 			bits-=8;
       
   457 			} while (bits>=0);
       
   458 		iPtr=p;
       
   459 		}
       
   460 	iCode=code;
       
   461 	iBits=bits;
       
   462 	}
       
   463 
       
   464 /** Handle a full output buffer
       
   465 
       
   466 	This virtual function is called when the output buffer is full. It should deal
       
   467 	with the data in the buffer before reseting the buffer using Set(), allowing
       
   468 	further data to be written.
       
   469 
       
   470 	A derived class can replace this to write the data to a file (for example)
       
   471 	before marking the buffer as empty.
       
   472 
       
   473 	@leave KErrOverflow The default implementation leaves
       
   474 */
       
   475 void TBitOutput::OverflowL()
       
   476 	{
       
   477 	User::Leave(KErrOverflow);
       
   478 	}