kernel/eka/include/byte_pair_compress.h
changeset 0 a41df078684a
equal deleted inserted replaced
-1:000000000000 0:a41df078684a
       
     1 // Copyright (c) 2007-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of the License "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // e32\include\byte_pair_compress.h
       
    15 // This header file contains both the prototype and implementation for the byte pair compressor.
       
    16 // This is done to facilitate sharing the code between the e32 test code and the tools.  The
       
    17 // implementation is only included if the macro BYTE_PAIR_COMPRESS_INCLUDE_IMPLEMENTATION is
       
    18 // defined.
       
    19 // 
       
    20 // WARNING: This file contains some APIs which are internal and are subject
       
    21 //          to change without notice. Such APIs should therefore not be used
       
    22 //          outside the Kernel and Hardware Services package.
       
    23 //
       
    24 
       
    25 #ifndef __BYTE_PAIR_COMPRESS_H__
       
    26 #define __BYTE_PAIR_COMPRESS_H__
       
    27 
       
    28 #include <e32std.h>
       
    29 
       
    30 /**
       
    31  * @internalTechnology
       
    32  *
       
    33  * Compress up to 4096 bytes of data usign the byte-pair compression algorithm.
       
    34  *
       
    35  * @param aDest Destination buffer, must be at least four times the size of the source data.
       
    36  * @param aSrc  Source data to compress.
       
    37  * @param aSize The size of the source data.
       
    38  */
       
    39 extern TInt BytePairCompress(TUint8* aDest, TUint8* aSrc, TInt aSize);
       
    40 
       
    41 #ifdef BYTE_PAIR_COMPRESS_INCLUDE_IMPLEMENTATION
       
    42 
       
    43 const TInt BlockSize = 0x1000;
       
    44 TInt ExtraSave = 0;
       
    45 
       
    46 TUint16 PairCount[0x10000];
       
    47 TUint16 PairBuffer[BlockSize*2];
       
    48 
       
    49 TUint16 GlobalPairs[0x10000] = {0};
       
    50 TUint16 GlobalTokenCounts[0x100] = {0};
       
    51 
       
    52 TUint16 ByteCount[0x100+4];
       
    53 
       
    54 void CountBytes(TUint8* data, TInt size)
       
    55 	{
       
    56 	memset(ByteCount,0,sizeof(ByteCount));
       
    57 	TUint8* dataEnd = data+size;
       
    58 	while(data<dataEnd)
       
    59 		++ByteCount[*data++];
       
    60 	}
       
    61 
       
    62 inline void ByteUsed(TInt b)
       
    63 	{
       
    64 	ByteCount[b] = 0xffff;
       
    65 	}
       
    66 
       
    67 int TieBreak(int b1,int b2)
       
    68 	{
       
    69 	return -ByteCount[b1]-ByteCount[b2];
       
    70 	}
       
    71 
       
    72 TInt MostCommonPair(TInt& pair, TUint8* data, TInt size, TInt minFrequency, TInt marker)
       
    73 	{
       
    74 	memset(PairCount,0,sizeof(PairCount));
       
    75 	TUint8* dataEnd = data+size-1;
       
    76 	TInt pairsFound = 0;
       
    77 	TInt lastPair = -1;
       
    78 	while(data<dataEnd)
       
    79 		{
       
    80 		TInt b1 = *data++;
       
    81 		if(b1==marker)
       
    82 			{
       
    83 			// skip marker and following byte
       
    84 			lastPair = -1;
       
    85 			++data;
       
    86 			continue;
       
    87 			}
       
    88 		TInt b2 = *data;
       
    89 		if(b2==marker)
       
    90 			{
       
    91 			// skip marker and following byte
       
    92 			lastPair = -1;
       
    93 			data+=2;
       
    94 			continue;
       
    95 			}
       
    96 		TInt p = (b2<<8)|b1;
       
    97 		if(p==lastPair)
       
    98 			{
       
    99 			// ensure a pair of identical bytes don't get double counted
       
   100 			lastPair = -1;
       
   101 			continue;
       
   102 			}
       
   103 		lastPair = p;
       
   104 		++PairCount[p];
       
   105 		if(PairCount[p]==minFrequency)
       
   106 			PairBuffer[pairsFound++] = (TUint16)p;
       
   107 		}
       
   108 
       
   109 	TInt bestCount = -1;
       
   110 	TInt bestPair = -1;
       
   111 	TInt bestTieBreak = 0;
       
   112 	TInt p;
       
   113 	while(pairsFound--)
       
   114 		{
       
   115 		p = PairBuffer[pairsFound];
       
   116 		TInt f=PairCount[p];
       
   117 		if(f>bestCount)
       
   118 			{
       
   119 			bestCount = f;
       
   120 			bestPair = p;
       
   121 			bestTieBreak = TieBreak(p&0xff,p>>8);
       
   122 			}
       
   123 		else if(f==bestCount)
       
   124 			{
       
   125 			TInt tieBreak = TieBreak(p&0xff,p>>8);
       
   126 			if(tieBreak>bestTieBreak)
       
   127 				{
       
   128 				bestCount = f;
       
   129 				bestPair = p;
       
   130 				bestTieBreak = tieBreak;
       
   131 				}
       
   132 			}
       
   133 		}
       
   134 	pair = bestPair;
       
   135 	return bestCount;
       
   136 	}
       
   137 
       
   138 TInt LeastCommonByte(TInt& byte)
       
   139 	{
       
   140 	TInt bestCount = 0xffff;
       
   141 	TInt bestByte = -1;
       
   142 	for(TInt b = 0; b<0x100; b++)
       
   143 		{
       
   144 		TInt f = ByteCount[b];
       
   145 		if(f<bestCount)
       
   146 			{
       
   147 			bestCount = f;
       
   148 			bestByte = b;
       
   149 			}
       
   150 		}
       
   151 	byte = bestByte;
       
   152 	return bestCount;
       
   153 	}
       
   154 
       
   155 TInt BytePairCompress(TUint8* dst, TUint8* src, TInt size)
       
   156 	{
       
   157 	TInt originalSize = size;
       
   158 	TUint8* dst2 = dst+size*2;
       
   159 	TUint8* in = src;
       
   160 	TUint8* out = dst;
       
   161 
       
   162 	TUint8 tokens[0x100*3];
       
   163 	TInt tokenCount = 0;
       
   164 
       
   165 	CountBytes(in,size);
       
   166 
       
   167 	TInt marker = -1;
       
   168 	TInt overhead = 1+3+LeastCommonByte(marker);
       
   169 	ByteUsed(marker);
       
   170 
       
   171 	TUint8* inEnd = in+size;
       
   172 	TUint8* outStart = out;
       
   173 	while(in<inEnd)
       
   174 		{
       
   175 		TInt b=*in++;
       
   176 		if(b==marker)
       
   177 			*out++ = (TUint8)b;
       
   178 		*out++ = (TUint8)b;
       
   179 		}
       
   180 	size = out-outStart;
       
   181 
       
   182 	TInt outToggle = 1;
       
   183 	in = dst;
       
   184 	out = dst2;
       
   185 
       
   186 	for(TInt r=256; r>0; --r)
       
   187 		{
       
   188 		TInt byte;
       
   189 		TInt byteCount = LeastCommonByte(byte);
       
   190 		TInt pair;
       
   191 		TInt pairCount = MostCommonPair(pair,in,size,overhead+1,marker);
       
   192 		TInt saving = pairCount-byteCount;
       
   193 		if(saving<=overhead)
       
   194 			break;
       
   195 
       
   196 		overhead = 3;
       
   197 		if(tokenCount>=32)
       
   198 			overhead = 2;
       
   199 
       
   200 		TUint8* d=tokens+3*tokenCount;
       
   201 		++tokenCount;
       
   202 		*d++ = (TUint8)byte;
       
   203 		ByteUsed(byte);
       
   204 		*d++ = (TUint8)pair;
       
   205 		ByteUsed(pair&0xff);
       
   206 		*d++ = (TUint8)(pair>>8);
       
   207 		ByteUsed(pair>>8);
       
   208 		++GlobalPairs[pair];
       
   209 
       
   210 		inEnd = in+size;
       
   211 		outStart = out;
       
   212 		while(in<inEnd)
       
   213 			{
       
   214 			TInt b=*in++;
       
   215 			if(b==marker)
       
   216 				{
       
   217 				*out++ = (TUint8)marker;
       
   218 				b = *in++;
       
   219 				}
       
   220 			else if(b==byte)
       
   221 				{
       
   222 				*out++ = (TUint8)marker;
       
   223 				--byteCount;
       
   224 				}
       
   225 			else if(b==(pair&0xff) && in<inEnd && *in==(pair>>8))
       
   226 				{
       
   227 				++in;
       
   228 				b = byte;
       
   229 				--pairCount;
       
   230 				}
       
   231 			*out++ = (TUint8)b;
       
   232 			}
       
   233 		ASSERT(!byteCount);
       
   234 		ASSERT(!pairCount);
       
   235 		size = out-outStart;
       
   236 
       
   237 		outToggle ^= 1;
       
   238 		if(outToggle)
       
   239 			{
       
   240 			in = dst;
       
   241 			out = dst2;
       
   242 			}
       
   243 		else
       
   244 			{
       
   245 			in = dst2;
       
   246 			out = dst;
       
   247 			}
       
   248 		}
       
   249 
       
   250 	// sort tokens with a bubble sort...
       
   251 	for(TInt x=0; x<tokenCount-1; x++)
       
   252 		for(TInt y=x+1; y<tokenCount; y++)
       
   253 			if(tokens[x*3]>tokens[y*3])
       
   254 				{
       
   255 				TInt z = tokens[x*3];
       
   256 				tokens[x*3] = tokens[y*3];
       
   257 				tokens[y*3] = (TUint8)z;
       
   258 				z = tokens[x*3+1];
       
   259 				tokens[x*3+1] = tokens[y*3+1];
       
   260 				tokens[y*3+1] = (TUint8)z;
       
   261 				z = tokens[x*3+2];
       
   262 				tokens[x*3+2] = tokens[y*3+2];
       
   263 				tokens[y*3+2] = (TUint8)z;
       
   264 				}
       
   265 
       
   266 	// check for not being able to compress...
       
   267 	if(size>originalSize)
       
   268 		{
       
   269 		*dst++ = 0; // store zero token count
       
   270 		memcpy(dst,src,originalSize); // store original data
       
   271 		return originalSize+1;
       
   272 		}
       
   273 
       
   274 	// make sure data is in second buffer (dst2)
       
   275 	if(in!=dst2)
       
   276 		memcpy(dst2,dst,size);
       
   277 
       
   278 	// store tokens...
       
   279 	TUint8* originalDst = dst;
       
   280 	*dst++ = (TUint8)tokenCount;
       
   281 	if(tokenCount)
       
   282 		{
       
   283 		*dst++ = (TUint8)marker;
       
   284 		if(tokenCount<32)
       
   285 			{
       
   286 			memcpy(dst,tokens,tokenCount*3);
       
   287 			dst += tokenCount*3;
       
   288 			}
       
   289 		else
       
   290 			{
       
   291 			TUint8* bitMask = dst;
       
   292 			memset(bitMask,0,32);
       
   293 			dst += 32;
       
   294 			TUint8* d=tokens;
       
   295 			do
       
   296 				{
       
   297 				TInt t=*d++;
       
   298 				bitMask[t>>3] |= (1<<(t&7));
       
   299 				*dst++ = *d++;
       
   300 				*dst++ = *d++;
       
   301 				}
       
   302 			while(--tokenCount);
       
   303 			}
       
   304 		}
       
   305 	// store data...
       
   306 	memcpy(dst,dst2,size);
       
   307 	dst += size;
       
   308 
       
   309 	// get stats...
       
   310 	++GlobalTokenCounts[tokenCount];
       
   311 
       
   312 	// return total size of compressed data...
       
   313 	return dst-originalDst;
       
   314 	}
       
   315 
       
   316 #endif
       
   317 #endif