|
1 // Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // |
|
15 |
|
16 #include "byte_pair.h" |
|
17 |
|
18 |
|
19 #undef ASSERT |
|
20 #define ASSERT(c) if(!(c)) \ |
|
21 { \ |
|
22 __BREAKPOINT() \ |
|
23 } |
|
24 |
|
25 const TInt MaxBlockSize = 0x1000; |
|
26 |
|
27 TUint16 PairCount[0x10000]; |
|
28 TUint16 PairBuffer[MaxBlockSize*2]; |
|
29 |
|
30 TUint16 GlobalPairs[0x10000] = {0}; |
|
31 TUint16 GlobalTokenCounts[0x100] = {0}; |
|
32 |
|
33 TUint16 ByteCount[0x100+4]; |
|
34 |
|
35 void CountBytes(TUint8* data, TInt size) |
|
36 { |
|
37 memset(ByteCount,0,sizeof(ByteCount)); |
|
38 TUint8* dataEnd = data+size; |
|
39 while(data<dataEnd) |
|
40 ++ByteCount[*data++]; |
|
41 } |
|
42 |
|
43 |
|
44 inline void ByteUsed(TInt b) |
|
45 { |
|
46 ByteCount[b] = 0xffff; |
|
47 } |
|
48 |
|
49 |
|
50 // 11915620 |
|
51 // 11913551 return -ByteCount[b1]-ByteCount[b2]; |
|
52 // 11913185 |
|
53 |
|
54 #if 0 |
|
55 int TieBreak(int b1,int b2) |
|
56 { |
|
57 int i; |
|
58 int x = 0; |
|
59 for(i=0; i<0x100; i++) |
|
60 x += PairCount[(b1<<8)+i]; |
|
61 int y = 0; |
|
62 for(i=b2; i<0x10000; i+=0x100) |
|
63 y += PairCount[i]; |
|
64 return -x-y; |
|
65 } |
|
66 #endif |
|
67 |
|
68 int TieBreak(int b1,int b2) |
|
69 { |
|
70 return -ByteCount[b1]-ByteCount[b2]; |
|
71 } |
|
72 |
|
73 TInt MostCommonPair(TInt& pair, TUint8* data, TInt size, TInt minFrequency, TInt marker) |
|
74 { |
|
75 memset(PairCount,0,sizeof(PairCount)); |
|
76 TUint8* dataEnd = data+size-1; |
|
77 TInt pairsFound = 0; |
|
78 TInt lastPair = -1; |
|
79 while(data<dataEnd) |
|
80 { |
|
81 TInt b1 = *data++; |
|
82 if(b1==marker) |
|
83 { |
|
84 // skip marker and following byte |
|
85 lastPair = -1; |
|
86 ++data; |
|
87 continue; |
|
88 } |
|
89 TInt b2 = *data; |
|
90 if(b2==marker) |
|
91 { |
|
92 // skip marker and following byte |
|
93 lastPair = -1; |
|
94 data+=2; |
|
95 continue; |
|
96 } |
|
97 TInt p = (b2<<8)|b1; |
|
98 if(p==lastPair) |
|
99 { |
|
100 // ensure a pair of identical bytes don't get double counted |
|
101 lastPair = -1; |
|
102 continue; |
|
103 } |
|
104 lastPair = p; |
|
105 ++PairCount[p]; |
|
106 if(PairCount[p]==minFrequency) |
|
107 PairBuffer[pairsFound++] = (TUint16)p; |
|
108 } |
|
109 |
|
110 TInt bestCount = -1; |
|
111 TInt bestPair = -1; |
|
112 TInt bestTieBreak = 0; |
|
113 TInt p; |
|
114 while(pairsFound--) |
|
115 { |
|
116 p = PairBuffer[pairsFound]; |
|
117 TInt f=PairCount[p]; |
|
118 if(f>bestCount) |
|
119 { |
|
120 bestCount = f; |
|
121 bestPair = p; |
|
122 bestTieBreak = TieBreak(p&0xff,p>>8); |
|
123 } |
|
124 else if(f==bestCount) |
|
125 { |
|
126 TInt tieBreak = TieBreak(p&0xff,p>>8); |
|
127 if(tieBreak>bestTieBreak) |
|
128 { |
|
129 bestCount = f; |
|
130 bestPair = p; |
|
131 bestTieBreak = tieBreak; |
|
132 } |
|
133 } |
|
134 } |
|
135 pair = bestPair; |
|
136 return bestCount; |
|
137 } |
|
138 |
|
139 |
|
140 TInt LeastCommonByte(TInt& byte) |
|
141 { |
|
142 TInt bestCount = 0xffff; |
|
143 TInt bestByte = -1; |
|
144 for(TInt b=0; b<0x100; b++) |
|
145 { |
|
146 TInt f = ByteCount[b]; |
|
147 if(f<bestCount) |
|
148 { |
|
149 bestCount = f; |
|
150 bestByte = b; |
|
151 } |
|
152 } |
|
153 byte = bestByte; |
|
154 return bestCount; |
|
155 } |
|
156 |
|
157 |
|
158 TInt Pak(TUint8* dst, TUint8* src, TInt size) |
|
159 { |
|
160 TInt originalSize = size; |
|
161 TUint8* dst2 = dst+size*2; |
|
162 TUint8* in = src; |
|
163 TUint8* out = dst; |
|
164 |
|
165 TUint8 tokens[0x100*3]; |
|
166 TInt tokenCount = 0; |
|
167 |
|
168 CountBytes(in,size); |
|
169 |
|
170 TInt marker = -1; |
|
171 TInt overhead = 1+3+LeastCommonByte(marker); |
|
172 ByteUsed(marker); |
|
173 |
|
174 TUint8* inEnd = in+size; |
|
175 TUint8* outStart = out; |
|
176 while(in<inEnd) |
|
177 { |
|
178 TInt b=*in++; |
|
179 if(b==marker) |
|
180 *out++ = (TUint8)b; |
|
181 *out++ = (TUint8)b; |
|
182 } |
|
183 size = out-outStart; |
|
184 |
|
185 TInt outToggle = 1; |
|
186 in = dst; |
|
187 out = dst2; |
|
188 |
|
189 for(TInt r=256; r>0; --r) |
|
190 { |
|
191 TInt byte; |
|
192 TInt byteCount = LeastCommonByte(byte); |
|
193 TInt pair; |
|
194 TInt pairCount = MostCommonPair(pair,in,size,overhead+1,marker); |
|
195 TInt saving = pairCount-byteCount; |
|
196 if(saving<=overhead) |
|
197 break; |
|
198 |
|
199 overhead = 3; |
|
200 if(tokenCount>=32) |
|
201 overhead = 2; |
|
202 |
|
203 TUint8* d=tokens+3*tokenCount; |
|
204 ++tokenCount; |
|
205 *d++ = (TUint8)byte; |
|
206 ByteUsed(byte); |
|
207 *d++ = (TUint8)pair; |
|
208 ByteUsed(pair&0xff); |
|
209 *d++ = (TUint8)(pair>>8); |
|
210 ByteUsed(pair>>8); |
|
211 ++GlobalPairs[pair]; |
|
212 |
|
213 inEnd = in+size; |
|
214 outStart = out; |
|
215 while(in<inEnd) |
|
216 { |
|
217 TInt b=*in++; |
|
218 if(b==marker) |
|
219 { |
|
220 *out++ = (TUint8)marker; |
|
221 b = *in++; |
|
222 } |
|
223 else if(b==byte) |
|
224 { |
|
225 *out++ = (TUint8)marker; |
|
226 --byteCount; |
|
227 } |
|
228 else if(b==(pair&0xff) && in<inEnd && *in==(pair>>8)) |
|
229 { |
|
230 ++in; |
|
231 b = byte; |
|
232 --pairCount; |
|
233 } |
|
234 *out++ = (TUint8)b; |
|
235 } |
|
236 ASSERT(!byteCount); |
|
237 ASSERT(!pairCount); |
|
238 size = out-outStart; |
|
239 |
|
240 outToggle ^= 1; |
|
241 if(outToggle) |
|
242 { |
|
243 in = dst; |
|
244 out = dst2; |
|
245 } |
|
246 else |
|
247 { |
|
248 in = dst2; |
|
249 out = dst; |
|
250 } |
|
251 } |
|
252 |
|
253 // sort tokens with a bubble sort... |
|
254 for(TInt x=0; x<tokenCount-1; x++) |
|
255 for(TInt y=x+1; y<tokenCount; y++) |
|
256 if(tokens[x*3]>tokens[y*3]) |
|
257 { |
|
258 TInt z = tokens[x*3]; |
|
259 tokens[x*3] = tokens[y*3]; |
|
260 tokens[y*3] = (TUint8)z; |
|
261 z = tokens[x*3+1]; |
|
262 tokens[x*3+1] = tokens[y*3+1]; |
|
263 tokens[y*3+1] = (TUint8)z; |
|
264 z = tokens[x*3+2]; |
|
265 tokens[x*3+2] = tokens[y*3+2]; |
|
266 tokens[y*3+2] = (TUint8)z; |
|
267 } |
|
268 |
|
269 // check for not being able to compress... |
|
270 if(size>originalSize) |
|
271 { |
|
272 *dst++ = 0; // store zero token count |
|
273 memcpy(dst,src,originalSize); // store original data |
|
274 return originalSize+1; |
|
275 } |
|
276 |
|
277 // make sure data is in second buffer (dst2) |
|
278 if(in!=dst2) |
|
279 memcpy(dst2,dst,size); |
|
280 |
|
281 // store tokens... |
|
282 TUint8* originalDst = dst; |
|
283 *dst++ = (TUint8)tokenCount; |
|
284 if(tokenCount) |
|
285 { |
|
286 *dst++ = (TUint8)marker; |
|
287 if(tokenCount<32) |
|
288 { |
|
289 memcpy(dst,tokens,tokenCount*3); |
|
290 dst += tokenCount*3; |
|
291 } |
|
292 else |
|
293 { |
|
294 TUint8* bitMask = dst; |
|
295 memset(bitMask,0,32); |
|
296 dst += 32; |
|
297 TUint8* d=tokens; |
|
298 do |
|
299 { |
|
300 TInt t=*d++; |
|
301 bitMask[t>>3] |= (1<<(t&7)); |
|
302 *dst++ = *d++; |
|
303 *dst++ = *d++; |
|
304 } |
|
305 while(--tokenCount); |
|
306 } |
|
307 } |
|
308 // store data... |
|
309 memcpy(dst,dst2,size); |
|
310 dst += size; |
|
311 |
|
312 // get stats... |
|
313 ++GlobalTokenCounts[tokenCount]; |
|
314 |
|
315 // return total size of compressed data... |
|
316 return dst-originalDst; |
|
317 } |
|
318 |
|
319 |
|
320 TInt Unpak(TUint8* dst, TInt dstSize, TUint8* src, TInt srcSize, TUint8*& srcNext) |
|
321 { |
|
322 TUint8* dstStart = dst; |
|
323 TUint8* dstEnd = dst+dstSize; |
|
324 TUint8* srcEnd = src+srcSize; |
|
325 |
|
326 TUint32 LUT[0x100/2]; |
|
327 TUint8* LUT0 = (TUint8*)LUT; |
|
328 TUint8* LUT1 = LUT0+0x100; |
|
329 |
|
330 TUint8 stack[0x100]; |
|
331 TUint8* stackStart = stack+sizeof(stack); |
|
332 TUint8* sp = stackStart; |
|
333 |
|
334 TUint32 marker = ~0u; |
|
335 TInt numTokens; |
|
336 TUint32 p1; |
|
337 TUint32 p2; |
|
338 |
|
339 TUint32* l = (TUint32*)LUT; |
|
340 TUint32 b = 0x03020100; |
|
341 TUint32 step = 0x04040404; |
|
342 do |
|
343 { |
|
344 *l++ = b; |
|
345 b += step; |
|
346 } |
|
347 while(b>step); |
|
348 |
|
349 if(src>=srcEnd) |
|
350 goto error; |
|
351 numTokens = *src++; |
|
352 if(numTokens) |
|
353 { |
|
354 if(src>=srcEnd) |
|
355 goto error; |
|
356 marker = *src++; |
|
357 LUT0[marker] = (TUint8)~marker; |
|
358 |
|
359 if(numTokens<32) |
|
360 { |
|
361 TUint8* tokenEnd = src+3*numTokens; |
|
362 if(tokenEnd>srcEnd) |
|
363 goto error; |
|
364 do |
|
365 { |
|
366 TInt b = *src++; |
|
367 TInt p1 = *src++; |
|
368 TInt p2 = *src++; |
|
369 LUT0[b] = (TUint8)p1; |
|
370 LUT1[b] = (TUint8)p2; |
|
371 } |
|
372 while(src<tokenEnd); |
|
373 } |
|
374 else |
|
375 { |
|
376 TUint8* bitMask = src; |
|
377 src += 32; |
|
378 if(src>srcEnd) |
|
379 goto error; |
|
380 TInt b=0; |
|
381 do |
|
382 { |
|
383 TUint8 mask = bitMask[b>>3]; |
|
384 if(mask&(1<<(b&7))) |
|
385 { |
|
386 if(src>srcEnd) |
|
387 goto error; |
|
388 TInt p1 = *src++; |
|
389 if(src>srcEnd) |
|
390 goto error; |
|
391 TInt p2 = *src++; |
|
392 LUT0[b] = (TUint8)p1; |
|
393 LUT1[b] = (TUint8)p2; |
|
394 --numTokens; |
|
395 } |
|
396 ++b; |
|
397 } |
|
398 while(b<0x100); |
|
399 if(numTokens) |
|
400 goto error; |
|
401 } |
|
402 } |
|
403 |
|
404 if(src>=srcEnd) |
|
405 goto error; |
|
406 b = *src++; |
|
407 if(dst>=dstEnd) |
|
408 goto error; |
|
409 p1 = LUT0[b]; |
|
410 if(p1!=b) |
|
411 goto not_single; |
|
412 next: |
|
413 if(src>=srcEnd) |
|
414 goto done_s; |
|
415 b = *src++; |
|
416 *dst++ = (TUint8)p1; |
|
417 if(dst>=dstEnd) |
|
418 goto done_d; |
|
419 p1 = LUT0[b]; |
|
420 if(p1==b) |
|
421 goto next; |
|
422 |
|
423 not_single: |
|
424 if(b==marker) |
|
425 goto do_marker; |
|
426 |
|
427 do_pair: |
|
428 p2 = LUT1[b]; |
|
429 b = p1; |
|
430 p1 = LUT0[b]; |
|
431 if(sp<=stack) |
|
432 goto error; |
|
433 *--sp = (TUint8)p2; |
|
434 |
|
435 recurse: |
|
436 if(b!=p1) |
|
437 goto do_pair; |
|
438 |
|
439 if(sp==stackStart) |
|
440 goto next; |
|
441 b = *sp++; |
|
442 if(dst>=dstEnd) |
|
443 goto error; |
|
444 *dst++ = (TUint8)p1; |
|
445 p1 = LUT0[b]; |
|
446 goto recurse; |
|
447 |
|
448 do_marker: |
|
449 if(src>=srcEnd) |
|
450 goto error; |
|
451 p1 = *src++; |
|
452 goto next; |
|
453 |
|
454 error: |
|
455 srcNext = 0; |
|
456 return KErrCorrupt; |
|
457 |
|
458 done_s: |
|
459 *dst++ = (TUint8)p1; |
|
460 srcNext = src; |
|
461 return dst-dstStart; |
|
462 |
|
463 done_d: |
|
464 if(dst>=dstEnd) |
|
465 --src; |
|
466 srcNext = src; |
|
467 return dst-dstStart; |
|
468 } |
|
469 |
|
470 |
|
471 TUint8 PakBuffer[MaxBlockSize*4]; |
|
472 TUint8 UnpakBuffer[MaxBlockSize]; |
|
473 |
|
474 |
|
475 TInt BytePairCompress(TUint8* dst, TUint8* src, TInt size) |
|
476 { |
|
477 ASSERT(size<=MaxBlockSize); |
|
478 TInt compressedSize = Pak(PakBuffer,src,size); |
|
479 TUint8* pakEnd; |
|
480 TInt us = Unpak(UnpakBuffer,MaxBlockSize,PakBuffer,compressedSize,pakEnd); |
|
481 ASSERT(us==size) |
|
482 ASSERT(pakEnd==PakBuffer+compressedSize) |
|
483 ASSERT(!memcmp(src,UnpakBuffer,size)) |
|
484 if(compressedSize>=size) |
|
485 return KErrTooBig; |
|
486 memcpy(dst,PakBuffer,compressedSize); |
|
487 return compressedSize; |
|
488 } |