|
1 // Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // |
|
15 |
|
16 #include "byte_pair.h" |
|
17 |
|
18 #undef ASSERT |
|
19 #define ASSERT(c) if(!(c)) \ |
|
20 { \ |
|
21 __BREAKPOINT() \ |
|
22 } |
|
23 void CBytePair::InitPairCount(TUint8* data, TInt size, TInt minFrequency, TInt marker) |
|
24 { |
|
25 PairsFound = 0; |
|
26 memset(PairCount,0,sizeof(PairCount)); |
|
27 TUint8* dataEnd = data+size-1; |
|
28 TInt lastPair = -1; |
|
29 while(data<dataEnd) |
|
30 { |
|
31 TInt b1 = *data++; |
|
32 if(b1==marker) |
|
33 { |
|
34 // skip marker and following byte |
|
35 lastPair = -1; |
|
36 ++data; |
|
37 continue; |
|
38 } |
|
39 TInt b2 = *data; |
|
40 if(b2==marker) |
|
41 { |
|
42 // skip marker and following byte |
|
43 lastPair = -1; |
|
44 data+=2; |
|
45 continue; |
|
46 } |
|
47 TInt p = (b2<<8)|b1; |
|
48 if(p==lastPair) |
|
49 { |
|
50 // ensure a pair of identical bytes don't get double counted |
|
51 lastPair = -1; |
|
52 continue; |
|
53 } |
|
54 lastPair = p; |
|
55 ++PairCount[p]; |
|
56 if(PairCount[p]==minFrequency) { |
|
57 PairBuffer[PairsFound++] = (TUint16)p; |
|
58 } |
|
59 } |
|
60 } |
|
61 TInt CBytePair::MostCommonPair(TInt& pair) |
|
62 { |
|
63 TInt bestCount = -1; |
|
64 TInt bestPair = -1; |
|
65 TInt bestTieBreak = 0; |
|
66 TInt p; |
|
67 TInt pairsFound = PairsFound; |
|
68 while(pairsFound--) |
|
69 { |
|
70 p = PairBuffer[pairsFound]; |
|
71 TInt f=PairCount[p]; |
|
72 if(f>bestCount) |
|
73 { |
|
74 bestCount = f; |
|
75 bestPair = p; |
|
76 //bestTieBreak = TieBreak(p&0xff,p>>8); |
|
77 bestTieBreak = -ByteCount[p&0xff]-ByteCount[p>>8]; |
|
78 } |
|
79 else if(f==bestCount) |
|
80 { |
|
81 //TInt tieBreak = TieBreak(p&0xff,p>>8); |
|
82 TInt tieBreak = -ByteCount[p&0xff]-ByteCount[p>>8]; |
|
83 if(tieBreak>bestTieBreak) |
|
84 { |
|
85 bestCount = f; |
|
86 bestPair = p; |
|
87 bestTieBreak = tieBreak; |
|
88 } |
|
89 } |
|
90 } |
|
91 pair = bestPair; |
|
92 return bestCount; |
|
93 } |
|
94 |
|
95 |
|
96 TInt CBytePair::LeastCommonByte(TInt& byte) |
|
97 { |
|
98 TInt bestCount = 0xffff; |
|
99 TInt bestByte = -1; |
|
100 TInt b; |
|
101 for(b=0; b<0x100; b++) |
|
102 { |
|
103 TInt f = ByteCount[b]; |
|
104 if(f<bestCount) |
|
105 { |
|
106 bestCount = f; |
|
107 bestByte = b; |
|
108 } |
|
109 } |
|
110 byte = bestByte; |
|
111 return bestCount; |
|
112 } |
|
113 |
|
114 |
|
115 TInt CBytePair::Compress(TUint8* dst, TUint8* src, TInt size) |
|
116 { |
|
117 TInt originalSize = size; |
|
118 TUint8* dst2 = dst+size*2; |
|
119 TUint8* in = src; |
|
120 TUint8* out = dst; |
|
121 |
|
122 TUint8 tokens[0x100*3]; |
|
123 TInt tokenCount = 0; |
|
124 |
|
125 CountBytes(in,size); |
|
126 |
|
127 TInt marker = -1; |
|
128 TInt overhead = 1+3+LeastCommonByte(marker); |
|
129 ByteUsed(marker); |
|
130 |
|
131 TUint8* inEnd = in+size; |
|
132 TUint8* outStart = out; |
|
133 while(in<inEnd) |
|
134 { |
|
135 TInt b=*in++; |
|
136 if(b==marker) |
|
137 *out++ = (TUint8)b; |
|
138 *out++ = (TUint8)b; |
|
139 } |
|
140 size = out-outStart; |
|
141 |
|
142 TInt outToggle = 1; |
|
143 in = dst; |
|
144 out = dst2; |
|
145 InitPairCount(in,size,overhead+1,marker); |
|
146 |
|
147 for(TInt r=256; r>0; --r) |
|
148 { |
|
149 TInt byte; |
|
150 TInt byteCount = LeastCommonByte(byte); |
|
151 if(iFastCompress && byteCount) break; |
|
152 TInt pair; |
|
153 TInt pairCount = MostCommonPair(pair); |
|
154 TInt saving = pairCount-byteCount; |
|
155 if(saving<=overhead) |
|
156 break; |
|
157 |
|
158 overhead = 3; |
|
159 if(tokenCount>=32) |
|
160 overhead = 2; |
|
161 TInt minFrequency = overhead+1; |
|
162 PairsFound = 0; |
|
163 memset(PairCount,0,sizeof(PairCount)); |
|
164 |
|
165 TUint8* d=tokens+3*tokenCount; |
|
166 ++tokenCount; |
|
167 *d++ = (TUint8)byte; |
|
168 ByteUsed(byte); |
|
169 *d++ = (TUint8)pair; |
|
170 ByteUsed(pair&0xff); |
|
171 *d++ = (TUint8)(pair>>8); |
|
172 ByteUsed(pair>>8); |
|
173 ++GlobalPairs[pair]; |
|
174 |
|
175 inEnd = in+size; |
|
176 outStart = out; |
|
177 // |
|
178 TInt lastPair = -1; |
|
179 TInt b, bb[2], p; |
|
180 TInt jj = 0; |
|
181 if(byteCount == 0) { |
|
182 //in this case, marker is not used in the replacement |
|
183 b=*in++; |
|
184 if(b==(pair&0xff) && in<inEnd && *in==(pair>>8)) |
|
185 { |
|
186 ++in; |
|
187 --pairCount; |
|
188 b = byte; |
|
189 } |
|
190 *out++ = (TUint8)b; |
|
191 bb[0] = b; |
|
192 |
|
193 while(in<inEnd) { |
|
194 b=*in++; |
|
195 if(b==(pair&0xff) && in<inEnd && *in==(pair>>8)) |
|
196 { |
|
197 ++in; |
|
198 --pairCount; |
|
199 b = byte; |
|
200 } |
|
201 *out++ = (TUint8)b; |
|
202 |
|
203 p = (b<<8)|bb[0]; |
|
204 bb[0] = b; |
|
205 if(p==lastPair) { |
|
206 // ensure a pair of identical bytes don't get double counted |
|
207 lastPair = -1; |
|
208 } |
|
209 else { |
|
210 lastPair = p; |
|
211 ++PairCount[p]; |
|
212 if(PairCount[p]==minFrequency) |
|
213 PairBuffer[PairsFound++] = (TUint16)p; |
|
214 } |
|
215 } |
|
216 } |
|
217 else { |
|
218 while(in<inEnd) { |
|
219 b=*in++; |
|
220 if(b==marker) |
|
221 { |
|
222 *out++ = (TUint8)marker; |
|
223 *out++ = *in++; |
|
224 lastPair = -1; |
|
225 jj = 0; |
|
226 } |
|
227 else if(b==byte) |
|
228 { |
|
229 *out++ = (TUint8)marker; |
|
230 --byteCount; |
|
231 *out++ = (TUint8)byte; |
|
232 lastPair = -1; |
|
233 jj = 0; |
|
234 } |
|
235 else if(b==(pair&0xff) && in<inEnd && *in==(pair>>8)) |
|
236 { |
|
237 ++in; |
|
238 --pairCount; |
|
239 *out++ = (TUint8)byte; |
|
240 bb[jj++] = byte; |
|
241 } |
|
242 else{ |
|
243 *out++ = (TUint8)b; |
|
244 bb[jj++] = b; |
|
245 } |
|
246 |
|
247 if(jj == 2) { |
|
248 p = (bb[1]<<8)|bb[0]; |
|
249 bb[0] = bb[1]; |
|
250 jj = 1; |
|
251 if(p==lastPair) |
|
252 { |
|
253 // ensure a pair of identical bytes don't get double counted |
|
254 lastPair = -1; |
|
255 } |
|
256 else { |
|
257 lastPair = p; |
|
258 ++PairCount[p]; |
|
259 if(PairCount[p]==minFrequency) |
|
260 PairBuffer[PairsFound++] = (TUint16)p; |
|
261 } |
|
262 } |
|
263 } |
|
264 } |
|
265 // |
|
266 ASSERT(!byteCount); |
|
267 ASSERT(!pairCount); |
|
268 size = out-outStart; |
|
269 |
|
270 outToggle ^= 1; |
|
271 if(outToggle) |
|
272 { |
|
273 in = dst; |
|
274 out = dst2; |
|
275 } |
|
276 else |
|
277 { |
|
278 in = dst2; |
|
279 out = dst; |
|
280 } |
|
281 } |
|
282 |
|
283 // sort tokens with a bubble sort... |
|
284 for(TInt x=0; x<tokenCount-1; x++) |
|
285 for(TInt y=x+1; y<tokenCount; y++) |
|
286 if(tokens[x*3]>tokens[y*3]) |
|
287 { |
|
288 TInt z = tokens[x*3]; |
|
289 tokens[x*3] = tokens[y*3]; |
|
290 tokens[y*3] = (TUint8)z; |
|
291 z = tokens[x*3+1]; |
|
292 tokens[x*3+1] = tokens[y*3+1]; |
|
293 tokens[y*3+1] = (TUint8)z; |
|
294 z = tokens[x*3+2]; |
|
295 tokens[x*3+2] = tokens[y*3+2]; |
|
296 tokens[y*3+2] = (TUint8)z; |
|
297 } |
|
298 |
|
299 // check for not being able to compress... |
|
300 if(size>originalSize) |
|
301 { |
|
302 *dst++ = 0; // store zero token count |
|
303 memcpy(dst,src,originalSize); // store original data |
|
304 return originalSize+1; |
|
305 } |
|
306 |
|
307 // make sure data is in second buffer (dst2) |
|
308 if(in!=dst2) |
|
309 memcpy(dst2,dst,size); |
|
310 |
|
311 // store tokens... |
|
312 TUint8* originalDst = dst; |
|
313 *dst++ = (TUint8)tokenCount; |
|
314 if(tokenCount) |
|
315 { |
|
316 *dst++ = (TUint8)marker; |
|
317 if(tokenCount<32) |
|
318 { |
|
319 memcpy(dst,tokens,tokenCount*3); |
|
320 dst += tokenCount*3; |
|
321 } |
|
322 else |
|
323 { |
|
324 TUint8* bitMask = dst; |
|
325 memset(bitMask,0,32); |
|
326 dst += 32; |
|
327 TUint8* d=tokens; |
|
328 do |
|
329 { |
|
330 TInt t=*d++; |
|
331 bitMask[t>>3] |= (1<<(t&7)); |
|
332 *dst++ = *d++; |
|
333 *dst++ = *d++; |
|
334 } |
|
335 while(--tokenCount); |
|
336 } |
|
337 } |
|
338 // store data... |
|
339 memcpy(dst,dst2,size); |
|
340 dst += size; |
|
341 |
|
342 // get stats... |
|
343 ++GlobalTokenCounts[tokenCount]; |
|
344 |
|
345 // return total size of compressed data... |
|
346 return dst-originalDst; |
|
347 } |
|
348 |
|
349 |
|
350 TInt CBytePair::Decompress(TUint8* dst, TInt dstSize, TUint8* src, TInt srcSize, TUint8*& srcNext) |
|
351 { |
|
352 TUint8* dstStart = dst; |
|
353 TUint8* dstEnd = dst+dstSize; |
|
354 TUint8* srcEnd = src+srcSize; |
|
355 |
|
356 TUint32 LUT[0x100/2]; |
|
357 TUint8* LUT0 = (TUint8*)LUT; |
|
358 TUint8* LUT1 = LUT0+0x100; |
|
359 |
|
360 TUint8 stack[0x100]; |
|
361 TUint8* stackStart = stack+sizeof(stack); |
|
362 TUint8* sp = stackStart; |
|
363 |
|
364 TUint32 marker = ~0u; |
|
365 TInt numTokens; |
|
366 TUint32 p1; |
|
367 TUint32 p2; |
|
368 |
|
369 TUint32* l = (TUint32*)LUT; |
|
370 TUint32 b = 0x03020100; |
|
371 TUint32 step = 0x04040404; |
|
372 do |
|
373 { |
|
374 *l++ = b; |
|
375 b += step; |
|
376 } |
|
377 while(b>step); |
|
378 |
|
379 if(src>=srcEnd) |
|
380 goto error; |
|
381 numTokens = *src++; |
|
382 if(numTokens) |
|
383 { |
|
384 if(src>=srcEnd) |
|
385 goto error; |
|
386 marker = *src++; |
|
387 LUT0[marker] = (TUint8)~marker; |
|
388 |
|
389 if(numTokens<32) |
|
390 { |
|
391 TUint8* tokenEnd = src+3*numTokens; |
|
392 if(tokenEnd>srcEnd) |
|
393 goto error; |
|
394 do |
|
395 { |
|
396 TInt b = *src++; |
|
397 TInt p1 = *src++; |
|
398 TInt p2 = *src++; |
|
399 LUT0[b] = (TUint8)p1; |
|
400 LUT1[b] = (TUint8)p2; |
|
401 } |
|
402 while(src<tokenEnd); |
|
403 } |
|
404 else |
|
405 { |
|
406 TUint8* bitMask = src; |
|
407 src += 32; |
|
408 if(src>srcEnd) |
|
409 goto error; |
|
410 TInt b=0; |
|
411 do |
|
412 { |
|
413 TUint8 mask = bitMask[b>>3]; |
|
414 if(mask&(1<<(b&7))) |
|
415 { |
|
416 if(src>srcEnd) |
|
417 goto error; |
|
418 TInt p1 = *src++; |
|
419 if(src>srcEnd) |
|
420 goto error; |
|
421 TInt p2 = *src++; |
|
422 LUT0[b] = (TUint8)p1; |
|
423 LUT1[b] = (TUint8)p2; |
|
424 --numTokens; |
|
425 } |
|
426 ++b; |
|
427 } |
|
428 while(b<0x100); |
|
429 if(numTokens) |
|
430 goto error; |
|
431 } |
|
432 } |
|
433 |
|
434 if(src>=srcEnd) |
|
435 goto error; |
|
436 b = *src++; |
|
437 if(dst>=dstEnd) |
|
438 goto error; |
|
439 p1 = LUT0[b]; |
|
440 if(p1!=b) |
|
441 goto not_single; |
|
442 next: |
|
443 if(src>=srcEnd) |
|
444 goto done_s; |
|
445 b = *src++; |
|
446 *dst++ = (TUint8)p1; |
|
447 if(dst>=dstEnd) |
|
448 goto done_d; |
|
449 p1 = LUT0[b]; |
|
450 if(p1==b) |
|
451 goto next; |
|
452 |
|
453 not_single: |
|
454 if(b==marker) |
|
455 goto do_marker; |
|
456 |
|
457 do_pair: |
|
458 p2 = LUT1[b]; |
|
459 b = p1; |
|
460 p1 = LUT0[b]; |
|
461 if(sp<=stack) |
|
462 goto error; |
|
463 *--sp = (TUint8)p2; |
|
464 |
|
465 recurse: |
|
466 if(b!=p1) |
|
467 goto do_pair; |
|
468 |
|
469 if(sp==stackStart) |
|
470 goto next; |
|
471 b = *sp++; |
|
472 if(dst>=dstEnd) |
|
473 goto error; |
|
474 *dst++ = (TUint8)p1; |
|
475 p1 = LUT0[b]; |
|
476 goto recurse; |
|
477 |
|
478 do_marker: |
|
479 if(src>=srcEnd) |
|
480 goto error; |
|
481 p1 = *src++; |
|
482 goto next; |
|
483 |
|
484 error: |
|
485 srcNext = 0; |
|
486 return KErrCorrupt; |
|
487 |
|
488 done_s: |
|
489 *dst++ = (TUint8)p1; |
|
490 srcNext = src; |
|
491 return dst-dstStart; |
|
492 |
|
493 done_d: |
|
494 if(dst>=dstEnd) |
|
495 --src; |
|
496 srcNext = src; |
|
497 return dst-dstStart; |
|
498 } |
|
499 |
|
500 |
|
501 TInt BytePairCompress(TUint8* dst, TUint8* src, TInt size, CBytePair *aBPE) |
|
502 { |
|
503 TUint8 PakBuffer[MaxBlockSize*4]; |
|
504 TUint8 UnpakBuffer[MaxBlockSize]; |
|
505 ASSERT(size<=MaxBlockSize); |
|
506 TInt compressedSize = aBPE->Compress(PakBuffer,src,size); |
|
507 TUint8* pakEnd; |
|
508 TInt us = aBPE->Decompress(UnpakBuffer,MaxBlockSize,PakBuffer,compressedSize,pakEnd); |
|
509 ASSERT(us==size) |
|
510 ASSERT(pakEnd==PakBuffer+compressedSize) |
|
511 ASSERT(!memcmp(src,UnpakBuffer,size)) |
|
512 if(compressedSize>=size) |
|
513 return KErrTooBig; |
|
514 memcpy(dst,PakBuffer,compressedSize); |
|
515 return compressedSize; |
|
516 } |