e32tools/elf2e32/source/byte_pair.cpp
changeset 0 044383f39525
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/e32tools/elf2e32/source/byte_pair.cpp	Tue Oct 27 16:36:35 2009 +0000
@@ -0,0 +1,488 @@
+// Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
+// All rights reserved.
+// This component and the accompanying materials are made available
+// under the terms of "Eclipse Public License v1.0"
+// which accompanies this distribution, and is available
+// at the URL "http://www.eclipse.org/legal/epl-v10.html".
+//
+// Initial Contributors:
+// Nokia Corporation - initial contribution.
+//
+// Contributors:
+//
+// Description:
+//
+
+#include "byte_pair.h"
+
+
+#undef ASSERT
+#define ASSERT(c)	if(!(c))	\
+						{		\
+						__BREAKPOINT()	\
+						}
+
+const TInt MaxBlockSize = 0x1000;
+
+TUint16 PairCount[0x10000];
+TUint16 PairBuffer[MaxBlockSize*2];
+
+TUint16 GlobalPairs[0x10000] = {0};
+TUint16 GlobalTokenCounts[0x100] = {0};
+
+TUint16 ByteCount[0x100+4];
+
+void CountBytes(TUint8* data, TInt size)
+	{
+	memset(ByteCount,0,sizeof(ByteCount));
+	TUint8* dataEnd = data+size;
+	while(data<dataEnd)
+		++ByteCount[*data++];
+	}
+
+
+inline void ByteUsed(TInt b)
+	{
+	ByteCount[b] = 0xffff;
+	}
+
+
+// 11915620
+// 11913551  	return -ByteCount[b1]-ByteCount[b2];
+// 11913185
+
+#if 0
+int TieBreak(int b1,int b2)
+	{
+	int i;
+	int x = 0;
+	for(i=0; i<0x100; i++)
+		x += PairCount[(b1<<8)+i];
+	int y = 0;
+	for(i=b2; i<0x10000; i+=0x100)
+		y += PairCount[i];
+	return -x-y;
+	}
+#endif
+
+int TieBreak(int b1,int b2)
+	{
+	return -ByteCount[b1]-ByteCount[b2];
+	}
+
+TInt MostCommonPair(TInt& pair, TUint8* data, TInt size, TInt minFrequency, TInt marker)
+	{
+	memset(PairCount,0,sizeof(PairCount));
+	TUint8* dataEnd = data+size-1;
+	TInt pairsFound = 0;
+	TInt lastPair = -1;
+	while(data<dataEnd)
+		{
+		TInt b1 = *data++;
+		if(b1==marker)
+			{
+			// skip marker and following byte
+			lastPair = -1;
+			++data;
+			continue;
+			}
+		TInt b2 = *data;
+		if(b2==marker)
+			{
+			// skip marker and following byte
+			lastPair = -1;
+			data+=2;
+			continue;
+			}
+		TInt p = (b2<<8)|b1;
+		if(p==lastPair)
+			{
+			// ensure a pair of identical bytes don't get double counted
+			lastPair = -1;
+			continue;
+			}
+		lastPair = p;
+		++PairCount[p];
+		if(PairCount[p]==minFrequency)
+			PairBuffer[pairsFound++] = (TUint16)p;
+		}
+
+	TInt bestCount = -1;
+	TInt bestPair = -1;
+	TInt bestTieBreak = 0;
+	TInt p;
+	while(pairsFound--)
+		{
+		p = PairBuffer[pairsFound];
+		TInt f=PairCount[p];
+		if(f>bestCount)
+			{
+			bestCount = f;
+			bestPair = p;
+			bestTieBreak = TieBreak(p&0xff,p>>8);
+			}
+		else if(f==bestCount)
+			{
+			TInt tieBreak = TieBreak(p&0xff,p>>8);
+			if(tieBreak>bestTieBreak)
+				{
+				bestCount = f;
+				bestPair = p;
+				bestTieBreak = tieBreak;
+				}
+			}
+		}
+	pair = bestPair;
+	return bestCount;
+	}
+
+
+TInt LeastCommonByte(TInt& byte)
+	{
+	TInt bestCount = 0xffff;
+	TInt bestByte = -1;
+	for(TInt b=0; b<0x100; b++)
+		{
+		TInt f = ByteCount[b];
+		if(f<bestCount)
+			{
+			bestCount = f;
+			bestByte = b;
+			}
+		}
+	byte = bestByte;
+	return bestCount;
+	}
+
+
+TInt Pak(TUint8* dst, TUint8* src, TInt size)
+	{
+	TInt originalSize = size;
+	TUint8* dst2 = dst+size*2;
+	TUint8* in = src;
+	TUint8* out = dst;
+
+	TUint8 tokens[0x100*3];
+	TInt tokenCount = 0;
+
+	CountBytes(in,size);
+
+	TInt marker = -1;
+	TInt overhead = 1+3+LeastCommonByte(marker);
+	ByteUsed(marker);
+
+	TUint8* inEnd = in+size;
+	TUint8* outStart = out;
+	while(in<inEnd)
+		{
+		TInt b=*in++;
+		if(b==marker)
+			*out++ = (TUint8)b;
+		*out++ = (TUint8)b;
+		}
+	size = out-outStart;
+
+	TInt outToggle = 1;
+	in = dst;
+	out = dst2;
+
+	for(TInt r=256; r>0; --r)
+		{
+		TInt byte;
+		TInt byteCount = LeastCommonByte(byte);
+		TInt pair;
+		TInt pairCount = MostCommonPair(pair,in,size,overhead+1,marker);
+		TInt saving = pairCount-byteCount;
+		if(saving<=overhead)
+			break;
+
+		overhead = 3;
+		if(tokenCount>=32)
+			overhead = 2;
+
+		TUint8* d=tokens+3*tokenCount;
+		++tokenCount;
+		*d++ = (TUint8)byte;
+		ByteUsed(byte);
+		*d++ = (TUint8)pair;
+		ByteUsed(pair&0xff);
+		*d++ = (TUint8)(pair>>8);
+		ByteUsed(pair>>8);
+		++GlobalPairs[pair];
+
+		inEnd = in+size;
+		outStart = out;
+		while(in<inEnd)
+			{
+			TInt b=*in++;
+			if(b==marker)
+				{
+				*out++ = (TUint8)marker;
+				b = *in++;
+				}
+			else if(b==byte)
+				{
+				*out++ = (TUint8)marker;
+				--byteCount;
+				}
+			else if(b==(pair&0xff) && in<inEnd && *in==(pair>>8))
+				{
+				++in;
+				b = byte;
+				--pairCount;
+				}
+			*out++ = (TUint8)b;
+			}
+		ASSERT(!byteCount);
+		ASSERT(!pairCount);
+		size = out-outStart;
+
+		outToggle ^= 1;
+		if(outToggle)
+			{
+			in = dst;
+			out = dst2;
+			}
+		else
+			{
+			in = dst2;
+			out = dst;
+			}
+		}
+
+	// sort tokens with a bubble sort...
+	for(TInt x=0; x<tokenCount-1; x++)
+		for(TInt y=x+1; y<tokenCount; y++)
+			if(tokens[x*3]>tokens[y*3])
+				{
+				TInt z = tokens[x*3];
+				tokens[x*3] = tokens[y*3];
+				tokens[y*3] = (TUint8)z;
+				z = tokens[x*3+1];
+				tokens[x*3+1] = tokens[y*3+1];
+				tokens[y*3+1] = (TUint8)z;
+				z = tokens[x*3+2];
+				tokens[x*3+2] = tokens[y*3+2];
+				tokens[y*3+2] = (TUint8)z;
+				}
+
+	// check for not being able to compress...
+	if(size>originalSize)
+		{
+		*dst++ = 0; // store zero token count
+		memcpy(dst,src,originalSize); // store original data
+		return originalSize+1;
+		}
+
+	// make sure data is in second buffer (dst2)
+	if(in!=dst2)
+		memcpy(dst2,dst,size);
+
+	// store tokens...
+	TUint8* originalDst = dst;
+	*dst++ = (TUint8)tokenCount;
+	if(tokenCount)
+		{
+		*dst++ = (TUint8)marker;
+		if(tokenCount<32)
+			{
+			memcpy(dst,tokens,tokenCount*3);
+			dst += tokenCount*3;
+			}
+		else
+			{
+			TUint8* bitMask = dst;
+			memset(bitMask,0,32);
+			dst += 32;
+			TUint8* d=tokens;
+			do
+				{
+				TInt t=*d++;
+				bitMask[t>>3] |= (1<<(t&7));
+				*dst++ = *d++;
+				*dst++ = *d++;
+				}
+			while(--tokenCount);
+			}
+		}
+	// store data...
+	memcpy(dst,dst2,size);
+	dst += size;
+
+	// get stats...
+	++GlobalTokenCounts[tokenCount];
+
+	// return total size of compressed data...
+	return dst-originalDst;
+	}
+
+
+TInt Unpak(TUint8* dst, TInt dstSize, TUint8* src, TInt srcSize, TUint8*& srcNext)
+	{
+	TUint8* dstStart = dst;
+	TUint8* dstEnd = dst+dstSize;
+	TUint8* srcEnd = src+srcSize;
+
+	TUint32 LUT[0x100/2];
+	TUint8* LUT0 = (TUint8*)LUT;
+	TUint8* LUT1 = LUT0+0x100;
+
+	TUint8 stack[0x100];
+	TUint8* stackStart = stack+sizeof(stack);
+	TUint8* sp = stackStart;
+
+	TUint32 marker = ~0u;
+	TInt numTokens;
+	TUint32 p1;
+	TUint32 p2;
+
+	TUint32* l = (TUint32*)LUT;
+	TUint32 b = 0x03020100;
+	TUint32 step = 0x04040404;
+	do
+		{
+		*l++ = b;
+		b += step;
+		}
+	while(b>step);
+
+	if(src>=srcEnd)
+		goto error;
+	numTokens = *src++;
+	if(numTokens)
+		{
+		if(src>=srcEnd)
+			goto error;
+		marker = *src++;
+		LUT0[marker] = (TUint8)~marker;
+
+		if(numTokens<32)
+			{
+			TUint8* tokenEnd = src+3*numTokens;
+			if(tokenEnd>srcEnd)
+				goto error;
+			do
+				{
+				TInt b = *src++;
+				TInt p1 = *src++;
+				TInt p2 = *src++;
+				LUT0[b] = (TUint8)p1;
+				LUT1[b] = (TUint8)p2;
+				}
+			while(src<tokenEnd);
+			}
+		else
+			{
+			TUint8* bitMask = src;
+			src += 32;
+			if(src>srcEnd)
+				goto error;
+			TInt b=0;
+			do
+				{
+				TUint8 mask = bitMask[b>>3];
+				if(mask&(1<<(b&7)))
+					{
+					if(src>srcEnd)
+						goto error;
+					TInt p1 = *src++;
+					if(src>srcEnd)
+						goto error;
+					TInt p2 = *src++;
+					LUT0[b] = (TUint8)p1;
+					LUT1[b] = (TUint8)p2;		
+					--numTokens;
+					}
+				++b;
+				}
+			while(b<0x100);
+			if(numTokens)
+				goto error;
+			}
+		}
+
+	if(src>=srcEnd)
+		goto error;
+	b = *src++;
+	if(dst>=dstEnd)
+		goto error;
+	p1 = LUT0[b];
+	if(p1!=b)
+		goto not_single;
+next:
+	if(src>=srcEnd)
+		goto done_s;
+	b = *src++;
+	*dst++ = (TUint8)p1;
+	if(dst>=dstEnd)
+		goto done_d;
+	p1 = LUT0[b];
+	if(p1==b)
+		goto next;
+
+not_single:
+	if(b==marker)
+		goto do_marker;
+
+do_pair:
+	p2 = LUT1[b];
+	b = p1;
+	p1 = LUT0[b];
+	if(sp<=stack)
+		goto error;
+	*--sp = (TUint8)p2;
+
+recurse:
+	if(b!=p1)
+		goto do_pair;
+
+	if(sp==stackStart)
+		goto next;
+	b = *sp++;
+	if(dst>=dstEnd)
+		goto error;
+	*dst++ = (TUint8)p1;
+	p1 = LUT0[b];
+	goto recurse;
+
+do_marker:
+	if(src>=srcEnd)
+		goto error;
+	p1 = *src++;
+	goto next;
+
+error:
+	srcNext = 0;
+	return KErrCorrupt;
+
+done_s:
+	*dst++ = (TUint8)p1;
+	srcNext = src;
+	return dst-dstStart;
+
+done_d:
+	if(dst>=dstEnd)
+		--src;
+	srcNext = src;
+	return dst-dstStart;
+	}
+
+
+TUint8 PakBuffer[MaxBlockSize*4];
+TUint8 UnpakBuffer[MaxBlockSize];
+
+
+TInt BytePairCompress(TUint8* dst, TUint8* src, TInt size)
+	{
+	ASSERT(size<=MaxBlockSize);
+	TInt compressedSize = Pak(PakBuffer,src,size);
+	TUint8* pakEnd;
+	TInt us = Unpak(UnpakBuffer,MaxBlockSize,PakBuffer,compressedSize,pakEnd);
+	ASSERT(us==size)
+	ASSERT(pakEnd==PakBuffer+compressedSize)
+	ASSERT(!memcmp(src,UnpakBuffer,size))
+	if(compressedSize>=size)
+		return KErrTooBig;
+	memcpy(dst,PakBuffer,compressedSize);
+	return compressedSize;
+	}