e32tools/elf2e32/source/byte_pair.cpp
changeset 0 044383f39525
equal deleted inserted replaced
-1:000000000000 0:044383f39525
       
     1 // Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 //
       
    15 
       
    16 #include "byte_pair.h"
       
    17 
       
    18 
       
    19 #undef ASSERT
       
    20 #define ASSERT(c)	if(!(c))	\
       
    21 						{		\
       
    22 						__BREAKPOINT()	\
       
    23 						}
       
    24 
       
    25 const TInt MaxBlockSize = 0x1000;
       
    26 
       
    27 TUint16 PairCount[0x10000];
       
    28 TUint16 PairBuffer[MaxBlockSize*2];
       
    29 
       
    30 TUint16 GlobalPairs[0x10000] = {0};
       
    31 TUint16 GlobalTokenCounts[0x100] = {0};
       
    32 
       
    33 TUint16 ByteCount[0x100+4];
       
    34 
       
    35 void CountBytes(TUint8* data, TInt size)
       
    36 	{
       
    37 	memset(ByteCount,0,sizeof(ByteCount));
       
    38 	TUint8* dataEnd = data+size;
       
    39 	while(data<dataEnd)
       
    40 		++ByteCount[*data++];
       
    41 	}
       
    42 
       
    43 
       
    44 inline void ByteUsed(TInt b)
       
    45 	{
       
    46 	ByteCount[b] = 0xffff;
       
    47 	}
       
    48 
       
    49 
       
    50 // 11915620
       
    51 // 11913551  	return -ByteCount[b1]-ByteCount[b2];
       
    52 // 11913185
       
    53 
       
    54 #if 0
       
    55 int TieBreak(int b1,int b2)
       
    56 	{
       
    57 	int i;
       
    58 	int x = 0;
       
    59 	for(i=0; i<0x100; i++)
       
    60 		x += PairCount[(b1<<8)+i];
       
    61 	int y = 0;
       
    62 	for(i=b2; i<0x10000; i+=0x100)
       
    63 		y += PairCount[i];
       
    64 	return -x-y;
       
    65 	}
       
    66 #endif
       
    67 
       
    68 int TieBreak(int b1,int b2)
       
    69 	{
       
    70 	return -ByteCount[b1]-ByteCount[b2];
       
    71 	}
       
    72 
       
    73 TInt MostCommonPair(TInt& pair, TUint8* data, TInt size, TInt minFrequency, TInt marker)
       
    74 	{
       
    75 	memset(PairCount,0,sizeof(PairCount));
       
    76 	TUint8* dataEnd = data+size-1;
       
    77 	TInt pairsFound = 0;
       
    78 	TInt lastPair = -1;
       
    79 	while(data<dataEnd)
       
    80 		{
       
    81 		TInt b1 = *data++;
       
    82 		if(b1==marker)
       
    83 			{
       
    84 			// skip marker and following byte
       
    85 			lastPair = -1;
       
    86 			++data;
       
    87 			continue;
       
    88 			}
       
    89 		TInt b2 = *data;
       
    90 		if(b2==marker)
       
    91 			{
       
    92 			// skip marker and following byte
       
    93 			lastPair = -1;
       
    94 			data+=2;
       
    95 			continue;
       
    96 			}
       
    97 		TInt p = (b2<<8)|b1;
       
    98 		if(p==lastPair)
       
    99 			{
       
   100 			// ensure a pair of identical bytes don't get double counted
       
   101 			lastPair = -1;
       
   102 			continue;
       
   103 			}
       
   104 		lastPair = p;
       
   105 		++PairCount[p];
       
   106 		if(PairCount[p]==minFrequency)
       
   107 			PairBuffer[pairsFound++] = (TUint16)p;
       
   108 		}
       
   109 
       
   110 	TInt bestCount = -1;
       
   111 	TInt bestPair = -1;
       
   112 	TInt bestTieBreak = 0;
       
   113 	TInt p;
       
   114 	while(pairsFound--)
       
   115 		{
       
   116 		p = PairBuffer[pairsFound];
       
   117 		TInt f=PairCount[p];
       
   118 		if(f>bestCount)
       
   119 			{
       
   120 			bestCount = f;
       
   121 			bestPair = p;
       
   122 			bestTieBreak = TieBreak(p&0xff,p>>8);
       
   123 			}
       
   124 		else if(f==bestCount)
       
   125 			{
       
   126 			TInt tieBreak = TieBreak(p&0xff,p>>8);
       
   127 			if(tieBreak>bestTieBreak)
       
   128 				{
       
   129 				bestCount = f;
       
   130 				bestPair = p;
       
   131 				bestTieBreak = tieBreak;
       
   132 				}
       
   133 			}
       
   134 		}
       
   135 	pair = bestPair;
       
   136 	return bestCount;
       
   137 	}
       
   138 
       
   139 
       
   140 TInt LeastCommonByte(TInt& byte)
       
   141 	{
       
   142 	TInt bestCount = 0xffff;
       
   143 	TInt bestByte = -1;
       
   144 	for(TInt b=0; b<0x100; b++)
       
   145 		{
       
   146 		TInt f = ByteCount[b];
       
   147 		if(f<bestCount)
       
   148 			{
       
   149 			bestCount = f;
       
   150 			bestByte = b;
       
   151 			}
       
   152 		}
       
   153 	byte = bestByte;
       
   154 	return bestCount;
       
   155 	}
       
   156 
       
   157 
       
   158 TInt Pak(TUint8* dst, TUint8* src, TInt size)
       
   159 	{
       
   160 	TInt originalSize = size;
       
   161 	TUint8* dst2 = dst+size*2;
       
   162 	TUint8* in = src;
       
   163 	TUint8* out = dst;
       
   164 
       
   165 	TUint8 tokens[0x100*3];
       
   166 	TInt tokenCount = 0;
       
   167 
       
   168 	CountBytes(in,size);
       
   169 
       
   170 	TInt marker = -1;
       
   171 	TInt overhead = 1+3+LeastCommonByte(marker);
       
   172 	ByteUsed(marker);
       
   173 
       
   174 	TUint8* inEnd = in+size;
       
   175 	TUint8* outStart = out;
       
   176 	while(in<inEnd)
       
   177 		{
       
   178 		TInt b=*in++;
       
   179 		if(b==marker)
       
   180 			*out++ = (TUint8)b;
       
   181 		*out++ = (TUint8)b;
       
   182 		}
       
   183 	size = out-outStart;
       
   184 
       
   185 	TInt outToggle = 1;
       
   186 	in = dst;
       
   187 	out = dst2;
       
   188 
       
   189 	for(TInt r=256; r>0; --r)
       
   190 		{
       
   191 		TInt byte;
       
   192 		TInt byteCount = LeastCommonByte(byte);
       
   193 		TInt pair;
       
   194 		TInt pairCount = MostCommonPair(pair,in,size,overhead+1,marker);
       
   195 		TInt saving = pairCount-byteCount;
       
   196 		if(saving<=overhead)
       
   197 			break;
       
   198 
       
   199 		overhead = 3;
       
   200 		if(tokenCount>=32)
       
   201 			overhead = 2;
       
   202 
       
   203 		TUint8* d=tokens+3*tokenCount;
       
   204 		++tokenCount;
       
   205 		*d++ = (TUint8)byte;
       
   206 		ByteUsed(byte);
       
   207 		*d++ = (TUint8)pair;
       
   208 		ByteUsed(pair&0xff);
       
   209 		*d++ = (TUint8)(pair>>8);
       
   210 		ByteUsed(pair>>8);
       
   211 		++GlobalPairs[pair];
       
   212 
       
   213 		inEnd = in+size;
       
   214 		outStart = out;
       
   215 		while(in<inEnd)
       
   216 			{
       
   217 			TInt b=*in++;
       
   218 			if(b==marker)
       
   219 				{
       
   220 				*out++ = (TUint8)marker;
       
   221 				b = *in++;
       
   222 				}
       
   223 			else if(b==byte)
       
   224 				{
       
   225 				*out++ = (TUint8)marker;
       
   226 				--byteCount;
       
   227 				}
       
   228 			else if(b==(pair&0xff) && in<inEnd && *in==(pair>>8))
       
   229 				{
       
   230 				++in;
       
   231 				b = byte;
       
   232 				--pairCount;
       
   233 				}
       
   234 			*out++ = (TUint8)b;
       
   235 			}
       
   236 		ASSERT(!byteCount);
       
   237 		ASSERT(!pairCount);
       
   238 		size = out-outStart;
       
   239 
       
   240 		outToggle ^= 1;
       
   241 		if(outToggle)
       
   242 			{
       
   243 			in = dst;
       
   244 			out = dst2;
       
   245 			}
       
   246 		else
       
   247 			{
       
   248 			in = dst2;
       
   249 			out = dst;
       
   250 			}
       
   251 		}
       
   252 
       
   253 	// sort tokens with a bubble sort...
       
   254 	for(TInt x=0; x<tokenCount-1; x++)
       
   255 		for(TInt y=x+1; y<tokenCount; y++)
       
   256 			if(tokens[x*3]>tokens[y*3])
       
   257 				{
       
   258 				TInt z = tokens[x*3];
       
   259 				tokens[x*3] = tokens[y*3];
       
   260 				tokens[y*3] = (TUint8)z;
       
   261 				z = tokens[x*3+1];
       
   262 				tokens[x*3+1] = tokens[y*3+1];
       
   263 				tokens[y*3+1] = (TUint8)z;
       
   264 				z = tokens[x*3+2];
       
   265 				tokens[x*3+2] = tokens[y*3+2];
       
   266 				tokens[y*3+2] = (TUint8)z;
       
   267 				}
       
   268 
       
   269 	// check for not being able to compress...
       
   270 	if(size>originalSize)
       
   271 		{
       
   272 		*dst++ = 0; // store zero token count
       
   273 		memcpy(dst,src,originalSize); // store original data
       
   274 		return originalSize+1;
       
   275 		}
       
   276 
       
   277 	// make sure data is in second buffer (dst2)
       
   278 	if(in!=dst2)
       
   279 		memcpy(dst2,dst,size);
       
   280 
       
   281 	// store tokens...
       
   282 	TUint8* originalDst = dst;
       
   283 	*dst++ = (TUint8)tokenCount;
       
   284 	if(tokenCount)
       
   285 		{
       
   286 		*dst++ = (TUint8)marker;
       
   287 		if(tokenCount<32)
       
   288 			{
       
   289 			memcpy(dst,tokens,tokenCount*3);
       
   290 			dst += tokenCount*3;
       
   291 			}
       
   292 		else
       
   293 			{
       
   294 			TUint8* bitMask = dst;
       
   295 			memset(bitMask,0,32);
       
   296 			dst += 32;
       
   297 			TUint8* d=tokens;
       
   298 			do
       
   299 				{
       
   300 				TInt t=*d++;
       
   301 				bitMask[t>>3] |= (1<<(t&7));
       
   302 				*dst++ = *d++;
       
   303 				*dst++ = *d++;
       
   304 				}
       
   305 			while(--tokenCount);
       
   306 			}
       
   307 		}
       
   308 	// store data...
       
   309 	memcpy(dst,dst2,size);
       
   310 	dst += size;
       
   311 
       
   312 	// get stats...
       
   313 	++GlobalTokenCounts[tokenCount];
       
   314 
       
   315 	// return total size of compressed data...
       
   316 	return dst-originalDst;
       
   317 	}
       
   318 
       
   319 
       
   320 TInt Unpak(TUint8* dst, TInt dstSize, TUint8* src, TInt srcSize, TUint8*& srcNext)
       
   321 	{
       
   322 	TUint8* dstStart = dst;
       
   323 	TUint8* dstEnd = dst+dstSize;
       
   324 	TUint8* srcEnd = src+srcSize;
       
   325 
       
   326 	TUint32 LUT[0x100/2];
       
   327 	TUint8* LUT0 = (TUint8*)LUT;
       
   328 	TUint8* LUT1 = LUT0+0x100;
       
   329 
       
   330 	TUint8 stack[0x100];
       
   331 	TUint8* stackStart = stack+sizeof(stack);
       
   332 	TUint8* sp = stackStart;
       
   333 
       
   334 	TUint32 marker = ~0u;
       
   335 	TInt numTokens;
       
   336 	TUint32 p1;
       
   337 	TUint32 p2;
       
   338 
       
   339 	TUint32* l = (TUint32*)LUT;
       
   340 	TUint32 b = 0x03020100;
       
   341 	TUint32 step = 0x04040404;
       
   342 	do
       
   343 		{
       
   344 		*l++ = b;
       
   345 		b += step;
       
   346 		}
       
   347 	while(b>step);
       
   348 
       
   349 	if(src>=srcEnd)
       
   350 		goto error;
       
   351 	numTokens = *src++;
       
   352 	if(numTokens)
       
   353 		{
       
   354 		if(src>=srcEnd)
       
   355 			goto error;
       
   356 		marker = *src++;
       
   357 		LUT0[marker] = (TUint8)~marker;
       
   358 
       
   359 		if(numTokens<32)
       
   360 			{
       
   361 			TUint8* tokenEnd = src+3*numTokens;
       
   362 			if(tokenEnd>srcEnd)
       
   363 				goto error;
       
   364 			do
       
   365 				{
       
   366 				TInt b = *src++;
       
   367 				TInt p1 = *src++;
       
   368 				TInt p2 = *src++;
       
   369 				LUT0[b] = (TUint8)p1;
       
   370 				LUT1[b] = (TUint8)p2;
       
   371 				}
       
   372 			while(src<tokenEnd);
       
   373 			}
       
   374 		else
       
   375 			{
       
   376 			TUint8* bitMask = src;
       
   377 			src += 32;
       
   378 			if(src>srcEnd)
       
   379 				goto error;
       
   380 			TInt b=0;
       
   381 			do
       
   382 				{
       
   383 				TUint8 mask = bitMask[b>>3];
       
   384 				if(mask&(1<<(b&7)))
       
   385 					{
       
   386 					if(src>srcEnd)
       
   387 						goto error;
       
   388 					TInt p1 = *src++;
       
   389 					if(src>srcEnd)
       
   390 						goto error;
       
   391 					TInt p2 = *src++;
       
   392 					LUT0[b] = (TUint8)p1;
       
   393 					LUT1[b] = (TUint8)p2;		
       
   394 					--numTokens;
       
   395 					}
       
   396 				++b;
       
   397 				}
       
   398 			while(b<0x100);
       
   399 			if(numTokens)
       
   400 				goto error;
       
   401 			}
       
   402 		}
       
   403 
       
   404 	if(src>=srcEnd)
       
   405 		goto error;
       
   406 	b = *src++;
       
   407 	if(dst>=dstEnd)
       
   408 		goto error;
       
   409 	p1 = LUT0[b];
       
   410 	if(p1!=b)
       
   411 		goto not_single;
       
   412 next:
       
   413 	if(src>=srcEnd)
       
   414 		goto done_s;
       
   415 	b = *src++;
       
   416 	*dst++ = (TUint8)p1;
       
   417 	if(dst>=dstEnd)
       
   418 		goto done_d;
       
   419 	p1 = LUT0[b];
       
   420 	if(p1==b)
       
   421 		goto next;
       
   422 
       
   423 not_single:
       
   424 	if(b==marker)
       
   425 		goto do_marker;
       
   426 
       
   427 do_pair:
       
   428 	p2 = LUT1[b];
       
   429 	b = p1;
       
   430 	p1 = LUT0[b];
       
   431 	if(sp<=stack)
       
   432 		goto error;
       
   433 	*--sp = (TUint8)p2;
       
   434 
       
   435 recurse:
       
   436 	if(b!=p1)
       
   437 		goto do_pair;
       
   438 
       
   439 	if(sp==stackStart)
       
   440 		goto next;
       
   441 	b = *sp++;
       
   442 	if(dst>=dstEnd)
       
   443 		goto error;
       
   444 	*dst++ = (TUint8)p1;
       
   445 	p1 = LUT0[b];
       
   446 	goto recurse;
       
   447 
       
   448 do_marker:
       
   449 	if(src>=srcEnd)
       
   450 		goto error;
       
   451 	p1 = *src++;
       
   452 	goto next;
       
   453 
       
   454 error:
       
   455 	srcNext = 0;
       
   456 	return KErrCorrupt;
       
   457 
       
   458 done_s:
       
   459 	*dst++ = (TUint8)p1;
       
   460 	srcNext = src;
       
   461 	return dst-dstStart;
       
   462 
       
   463 done_d:
       
   464 	if(dst>=dstEnd)
       
   465 		--src;
       
   466 	srcNext = src;
       
   467 	return dst-dstStart;
       
   468 	}
       
   469 
       
   470 
       
   471 TUint8 PakBuffer[MaxBlockSize*4];
       
   472 TUint8 UnpakBuffer[MaxBlockSize];
       
   473 
       
   474 
       
   475 TInt BytePairCompress(TUint8* dst, TUint8* src, TInt size)
       
   476 	{
       
   477 	ASSERT(size<=MaxBlockSize);
       
   478 	TInt compressedSize = Pak(PakBuffer,src,size);
       
   479 	TUint8* pakEnd;
       
   480 	TInt us = Unpak(UnpakBuffer,MaxBlockSize,PakBuffer,compressedSize,pakEnd);
       
   481 	ASSERT(us==size)
       
   482 	ASSERT(pakEnd==PakBuffer+compressedSize)
       
   483 	ASSERT(!memcmp(src,UnpakBuffer,size))
       
   484 	if(compressedSize>=size)
       
   485 		return KErrTooBig;
       
   486 	memcpy(dst,PakBuffer,compressedSize);
       
   487 	return compressedSize;
       
   488 	}