toolsandutils/e32tools/compress/byte_pair.cpp
changeset 0 83f4b4db085c
child 1 d4b442d23379
equal deleted inserted replaced
-1:000000000000 0:83f4b4db085c
       
     1 // Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 //
       
    15 
       
    16 #include "byte_pair.h"
       
    17 
       
    18 #undef ASSERT
       
    19 #define ASSERT(c)	if(!(c))	\
       
    20 {		\
       
    21     __BREAKPOINT()	\
       
    22 }
       
    23 void CBytePair::InitPairCount(TUint8* data, TInt size, TInt minFrequency, TInt marker)
       
    24 {
       
    25     PairsFound = 0;
       
    26     memset(PairCount,0,sizeof(PairCount));
       
    27     TUint8* dataEnd = data+size-1;
       
    28     TInt lastPair = -1;
       
    29     while(data<dataEnd)
       
    30     {
       
    31         TInt b1 = *data++;
       
    32         if(b1==marker)
       
    33         {
       
    34             // skip marker and following byte
       
    35             lastPair = -1;
       
    36             ++data;
       
    37             continue;
       
    38         }
       
    39         TInt b2 = *data;
       
    40         if(b2==marker)
       
    41         {
       
    42             // skip marker and following byte
       
    43             lastPair = -1;
       
    44             data+=2;
       
    45             continue;
       
    46         }
       
    47         TInt p = (b2<<8)|b1;
       
    48         if(p==lastPair)
       
    49         {
       
    50             // ensure a pair of identical bytes don't get double counted
       
    51             lastPair = -1;
       
    52             continue;
       
    53         }
       
    54         lastPair = p;
       
    55         ++PairCount[p];
       
    56         if(PairCount[p]==minFrequency) {
       
    57             PairBuffer[PairsFound++] = (TUint16)p;
       
    58         }
       
    59     }
       
    60 }
       
    61 TInt CBytePair::MostCommonPair(TInt& pair)
       
    62 {
       
    63     TInt bestCount = -1;
       
    64     TInt bestPair = -1;
       
    65     TInt bestTieBreak = 0;
       
    66     TInt p;
       
    67     TInt pairsFound = PairsFound;
       
    68     while(pairsFound--)
       
    69     {
       
    70         p = PairBuffer[pairsFound];
       
    71         TInt f=PairCount[p];
       
    72         if(f>bestCount)
       
    73         {
       
    74             bestCount = f;
       
    75             bestPair = p;
       
    76             //bestTieBreak = TieBreak(p&0xff,p>>8);
       
    77             bestTieBreak = -ByteCount[p&0xff]-ByteCount[p>>8];
       
    78         }
       
    79         else if(f==bestCount)
       
    80         {
       
    81             //TInt tieBreak = TieBreak(p&0xff,p>>8);
       
    82             TInt tieBreak = -ByteCount[p&0xff]-ByteCount[p>>8];
       
    83             if(tieBreak>bestTieBreak)
       
    84             {
       
    85                 bestCount = f;
       
    86                 bestPair = p;
       
    87                 bestTieBreak = tieBreak;
       
    88             }
       
    89         }
       
    90     }
       
    91     pair = bestPair;
       
    92     return bestCount;
       
    93 }
       
    94 
       
    95 
       
    96 TInt CBytePair::LeastCommonByte(TInt& byte)
       
    97 {
       
    98     TInt bestCount = 0xffff;
       
    99     TInt bestByte = -1;
       
   100     TInt b;
       
   101     for(b=0; b<0x100; b++)
       
   102     {
       
   103         TInt f = ByteCount[b];
       
   104         if(f<bestCount)
       
   105         {
       
   106             bestCount = f;
       
   107             bestByte = b;
       
   108         }
       
   109     }
       
   110     byte = bestByte;
       
   111     return bestCount;
       
   112 }
       
   113 
       
   114 
       
   115 TInt CBytePair::Compress(TUint8* dst, TUint8* src, TInt size)
       
   116 {
       
   117     TInt originalSize = size;
       
   118     TUint8* dst2 = dst+size*2;
       
   119     TUint8* in = src;
       
   120     TUint8* out = dst;
       
   121 
       
   122     TUint8 tokens[0x100*3];
       
   123     TInt tokenCount = 0;
       
   124 
       
   125     CountBytes(in,size);
       
   126 
       
   127     TInt marker = -1;
       
   128     TInt overhead = 1+3+LeastCommonByte(marker);
       
   129     ByteUsed(marker);
       
   130 
       
   131     TUint8* inEnd = in+size;
       
   132     TUint8* outStart = out;
       
   133     while(in<inEnd)
       
   134     {
       
   135         TInt b=*in++;
       
   136         if(b==marker)
       
   137             *out++ = (TUint8)b;
       
   138         *out++ = (TUint8)b;
       
   139     }
       
   140     size = out-outStart;
       
   141 
       
   142     TInt outToggle = 1;
       
   143     in = dst;
       
   144     out = dst2;
       
   145     InitPairCount(in,size,overhead+1,marker);
       
   146 
       
   147     for(TInt r=256; r>0; --r)
       
   148     {
       
   149         TInt byte;
       
   150         TInt byteCount = LeastCommonByte(byte);
       
   151         if(iFastCompress && byteCount) break;
       
   152         TInt pair;
       
   153         TInt pairCount = MostCommonPair(pair);
       
   154         TInt saving = pairCount-byteCount;
       
   155         if(saving<=overhead)
       
   156             break;
       
   157 
       
   158         overhead = 3;
       
   159         if(tokenCount>=32)
       
   160             overhead = 2;
       
   161         TInt minFrequency = overhead+1;
       
   162         PairsFound = 0;
       
   163         memset(PairCount,0,sizeof(PairCount));
       
   164 
       
   165         TUint8* d=tokens+3*tokenCount;
       
   166         ++tokenCount;
       
   167         *d++ = (TUint8)byte;
       
   168         ByteUsed(byte);
       
   169         *d++ = (TUint8)pair;
       
   170         ByteUsed(pair&0xff);
       
   171         *d++ = (TUint8)(pair>>8);
       
   172         ByteUsed(pair>>8);
       
   173         ++GlobalPairs[pair];
       
   174 
       
   175         inEnd = in+size;
       
   176         outStart = out;
       
   177         //
       
   178         TInt lastPair = -1;
       
   179         TInt b, bb[2], p;
       
   180         TInt jj = 0;
       
   181         if(byteCount == 0) {
       
   182             //in this case, marker is not used in the replacement
       
   183             b=*in++;
       
   184             if(b==(pair&0xff) && in<inEnd && *in==(pair>>8))
       
   185             {
       
   186                 ++in;
       
   187                 --pairCount;
       
   188                 b = byte;
       
   189             }
       
   190             *out++ = (TUint8)b;
       
   191             bb[0] = b;
       
   192 
       
   193             while(in<inEnd) {
       
   194                 b=*in++;
       
   195                 if(b==(pair&0xff) && in<inEnd && *in==(pair>>8))
       
   196                 {
       
   197                     ++in;
       
   198                     --pairCount;
       
   199                     b = byte;
       
   200                 }
       
   201                 *out++ = (TUint8)b;
       
   202 
       
   203                 p = (b<<8)|bb[0];
       
   204                 bb[0] = b;
       
   205                 if(p==lastPair) {
       
   206                     // ensure a pair of identical bytes don't get double counted
       
   207                     lastPair = -1;
       
   208                 }
       
   209                 else {
       
   210                     lastPair = p;
       
   211                     ++PairCount[p];
       
   212                     if(PairCount[p]==minFrequency)
       
   213                         PairBuffer[PairsFound++] = (TUint16)p;
       
   214                 }
       
   215             }
       
   216         }
       
   217         else {
       
   218             while(in<inEnd) {
       
   219                 b=*in++;
       
   220                 if(b==marker)
       
   221                 {
       
   222                     *out++ = (TUint8)marker;
       
   223                     *out++ = *in++;
       
   224                     lastPair = -1;
       
   225                     jj = 0;
       
   226                 }
       
   227                 else if(b==byte)
       
   228                 {
       
   229                     *out++ = (TUint8)marker;
       
   230                     --byteCount;
       
   231                     *out++ = (TUint8)byte;
       
   232                     lastPair = -1;
       
   233                     jj = 0;
       
   234                 }
       
   235                 else if(b==(pair&0xff) && in<inEnd && *in==(pair>>8))
       
   236                 {
       
   237                     ++in;
       
   238                     --pairCount;
       
   239                     *out++ = (TUint8)byte;
       
   240                     bb[jj++] = byte;
       
   241                 }
       
   242                 else{
       
   243                     *out++ = (TUint8)b;
       
   244                     bb[jj++] = b;
       
   245                 }
       
   246 
       
   247                 if(jj == 2) {
       
   248                     p = (bb[1]<<8)|bb[0];
       
   249                     bb[0] = bb[1];
       
   250                     jj = 1;
       
   251                     if(p==lastPair)
       
   252                     {
       
   253                         // ensure a pair of identical bytes don't get double counted
       
   254                         lastPair = -1;
       
   255                     }
       
   256                     else {
       
   257                         lastPair = p;
       
   258                         ++PairCount[p];
       
   259                         if(PairCount[p]==minFrequency)
       
   260                             PairBuffer[PairsFound++] = (TUint16)p;
       
   261                     }
       
   262                 }
       
   263             }
       
   264         }
       
   265         //
       
   266         ASSERT(!byteCount);
       
   267         ASSERT(!pairCount);
       
   268         size = out-outStart;
       
   269 
       
   270         outToggle ^= 1;
       
   271         if(outToggle)
       
   272         {
       
   273             in = dst;
       
   274             out = dst2;
       
   275         }
       
   276         else
       
   277         {
       
   278             in = dst2;
       
   279             out = dst;
       
   280         }
       
   281     }
       
   282 
       
   283     // sort tokens with a bubble sort...
       
   284     for(TInt x=0; x<tokenCount-1; x++)
       
   285         for(TInt y=x+1; y<tokenCount; y++)
       
   286             if(tokens[x*3]>tokens[y*3])
       
   287             {
       
   288                 TInt z = tokens[x*3];
       
   289                 tokens[x*3] = tokens[y*3];
       
   290                 tokens[y*3] = (TUint8)z;
       
   291                 z = tokens[x*3+1];
       
   292                 tokens[x*3+1] = tokens[y*3+1];
       
   293                 tokens[y*3+1] = (TUint8)z;
       
   294                 z = tokens[x*3+2];
       
   295                 tokens[x*3+2] = tokens[y*3+2];
       
   296                 tokens[y*3+2] = (TUint8)z;
       
   297             }
       
   298 
       
   299     // check for not being able to compress...
       
   300     if(size>originalSize)
       
   301     {
       
   302         *dst++ = 0; // store zero token count
       
   303         memcpy(dst,src,originalSize); // store original data
       
   304         return originalSize+1;
       
   305     }
       
   306 
       
   307     // make sure data is in second buffer (dst2)
       
   308     if(in!=dst2)
       
   309         memcpy(dst2,dst,size);
       
   310 
       
   311     // store tokens...
       
   312     TUint8* originalDst = dst;
       
   313     *dst++ = (TUint8)tokenCount;
       
   314     if(tokenCount)
       
   315     {
       
   316         *dst++ = (TUint8)marker;
       
   317         if(tokenCount<32)
       
   318         {
       
   319             memcpy(dst,tokens,tokenCount*3);
       
   320             dst += tokenCount*3;
       
   321         }
       
   322         else
       
   323         {
       
   324             TUint8* bitMask = dst;
       
   325             memset(bitMask,0,32);
       
   326             dst += 32;
       
   327             TUint8* d=tokens;
       
   328             do
       
   329             {
       
   330                 TInt t=*d++;
       
   331                 bitMask[t>>3] |= (1<<(t&7));
       
   332                 *dst++ = *d++;
       
   333                 *dst++ = *d++;
       
   334             }
       
   335             while(--tokenCount);
       
   336         }
       
   337     }
       
   338     // store data...
       
   339     memcpy(dst,dst2,size);
       
   340     dst += size;
       
   341 
       
   342     // get stats...
       
   343     ++GlobalTokenCounts[tokenCount];
       
   344 
       
   345     // return total size of compressed data...
       
   346     return dst-originalDst;
       
   347 }
       
   348 
       
   349 
       
   350 TInt CBytePair::Decompress(TUint8* dst, TInt dstSize, TUint8* src, TInt srcSize, TUint8*& srcNext)
       
   351 {
       
   352     TUint8* dstStart = dst;
       
   353     TUint8* dstEnd = dst+dstSize;
       
   354     TUint8* srcEnd = src+srcSize;
       
   355 
       
   356     TUint32 LUT[0x100/2];
       
   357     TUint8* LUT0 = (TUint8*)LUT;
       
   358     TUint8* LUT1 = LUT0+0x100;
       
   359 
       
   360     TUint8 stack[0x100];
       
   361     TUint8* stackStart = stack+sizeof(stack);
       
   362     TUint8* sp = stackStart;
       
   363 
       
   364     TUint32 marker = ~0u;
       
   365     TInt numTokens;
       
   366     TUint32 p1;
       
   367     TUint32 p2;
       
   368 
       
   369     TUint32* l = (TUint32*)LUT;
       
   370     TUint32 b = 0x03020100;
       
   371     TUint32 step = 0x04040404;
       
   372     do
       
   373     {
       
   374         *l++ = b;
       
   375         b += step;
       
   376     }
       
   377     while(b>step);
       
   378 
       
   379     if(src>=srcEnd)
       
   380         goto error;
       
   381     numTokens = *src++;
       
   382     if(numTokens)
       
   383     {
       
   384         if(src>=srcEnd)
       
   385             goto error;
       
   386         marker = *src++;
       
   387         LUT0[marker] = (TUint8)~marker;
       
   388 
       
   389         if(numTokens<32)
       
   390         {
       
   391             TUint8* tokenEnd = src+3*numTokens;
       
   392             if(tokenEnd>srcEnd)
       
   393                 goto error;
       
   394             do
       
   395             {
       
   396                 TInt b = *src++;
       
   397                 TInt p1 = *src++;
       
   398                 TInt p2 = *src++;
       
   399                 LUT0[b] = (TUint8)p1;
       
   400                 LUT1[b] = (TUint8)p2;
       
   401             }
       
   402             while(src<tokenEnd);
       
   403         }
       
   404         else
       
   405         {
       
   406             TUint8* bitMask = src;
       
   407             src += 32;
       
   408             if(src>srcEnd)
       
   409                 goto error;
       
   410             TInt b=0;
       
   411             do
       
   412             {
       
   413                 TUint8 mask = bitMask[b>>3];
       
   414                 if(mask&(1<<(b&7)))
       
   415                 {
       
   416                     if(src>srcEnd)
       
   417                         goto error;
       
   418                     TInt p1 = *src++;
       
   419                     if(src>srcEnd)
       
   420                         goto error;
       
   421                     TInt p2 = *src++;
       
   422                     LUT0[b] = (TUint8)p1;
       
   423                     LUT1[b] = (TUint8)p2;		
       
   424                     --numTokens;
       
   425                 }
       
   426                 ++b;
       
   427             }
       
   428             while(b<0x100);
       
   429             if(numTokens)
       
   430                 goto error;
       
   431         }
       
   432     }
       
   433 
       
   434     if(src>=srcEnd)
       
   435         goto error;
       
   436     b = *src++;
       
   437     if(dst>=dstEnd)
       
   438         goto error;
       
   439     p1 = LUT0[b];
       
   440     if(p1!=b)
       
   441         goto not_single;
       
   442 next:
       
   443     if(src>=srcEnd)
       
   444         goto done_s;
       
   445     b = *src++;
       
   446     *dst++ = (TUint8)p1;
       
   447     if(dst>=dstEnd)
       
   448         goto done_d;
       
   449     p1 = LUT0[b];
       
   450     if(p1==b)
       
   451         goto next;
       
   452 
       
   453 not_single:
       
   454     if(b==marker)
       
   455         goto do_marker;
       
   456 
       
   457 do_pair:
       
   458     p2 = LUT1[b];
       
   459     b = p1;
       
   460     p1 = LUT0[b];
       
   461     if(sp<=stack)
       
   462         goto error;
       
   463     *--sp = (TUint8)p2;
       
   464 
       
   465 recurse:
       
   466     if(b!=p1)
       
   467         goto do_pair;
       
   468 
       
   469     if(sp==stackStart)
       
   470         goto next;
       
   471     b = *sp++;
       
   472     if(dst>=dstEnd)
       
   473         goto error;
       
   474     *dst++ = (TUint8)p1;
       
   475     p1 = LUT0[b];
       
   476     goto recurse;
       
   477 
       
   478 do_marker:
       
   479     if(src>=srcEnd)
       
   480         goto error;
       
   481     p1 = *src++;
       
   482     goto next;
       
   483 
       
   484 error:
       
   485     srcNext = 0;
       
   486     return KErrCorrupt;
       
   487 
       
   488 done_s:
       
   489     *dst++ = (TUint8)p1;
       
   490     srcNext = src;
       
   491     return dst-dstStart;
       
   492 
       
   493 done_d:
       
   494     if(dst>=dstEnd)
       
   495         --src;
       
   496     srcNext = src;
       
   497     return dst-dstStart;
       
   498 }
       
   499 
       
   500 
       
   501 TInt BytePairCompress(TUint8* dst, TUint8* src, TInt size, CBytePair *aBPE)
       
   502 {
       
   503     TUint8 PakBuffer[MaxBlockSize*4];
       
   504     TUint8 UnpakBuffer[MaxBlockSize];
       
   505     ASSERT(size<=MaxBlockSize);
       
   506     TInt compressedSize = aBPE->Compress(PakBuffer,src,size);
       
   507     TUint8* pakEnd;
       
   508     TInt us = aBPE->Decompress(UnpakBuffer,MaxBlockSize,PakBuffer,compressedSize,pakEnd);
       
   509     ASSERT(us==size)
       
   510     ASSERT(pakEnd==PakBuffer+compressedSize)
       
   511     ASSERT(!memcmp(src,UnpakBuffer,size))
       
   512     if(compressedSize>=size)
       
   513         return KErrTooBig;
       
   514     memcpy(dst,PakBuffer,compressedSize);
       
   515     return compressedSize;
       
   516 }