|
1 // Copyright (c) 2007-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of the License "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // e32\include\byte_pair_compress.h |
|
15 // This header file contains both the prototype and implementation for the byte pair compressor. |
|
16 // This is done to facilitate sharing the code between the e32 test code and the tools. The |
|
17 // implementation is only included if the macro BYTE_PAIR_COMPRESS_INCLUDE_IMPLEMENTATION is |
|
18 // defined. |
|
19 // |
|
20 // WARNING: This file contains some APIs which are internal and are subject |
|
21 // to change without notice. Such APIs should therefore not be used |
|
22 // outside the Kernel and Hardware Services package. |
|
23 // |
|
24 |
|
25 #ifndef __BYTE_PAIR_COMPRESS_H__ |
|
26 #define __BYTE_PAIR_COMPRESS_H__ |
|
27 |
|
28 #include <e32std.h> |
|
29 |
|
30 /** |
|
31 * @internalTechnology |
|
32 * |
|
33 * Compress up to 4096 bytes of data usign the byte-pair compression algorithm. |
|
34 * |
|
35 * @param aDest Destination buffer, must be at least four times the size of the source data. |
|
36 * @param aSrc Source data to compress. |
|
37 * @param aSize The size of the source data. |
|
38 */ |
|
39 extern TInt BytePairCompress(TUint8* aDest, TUint8* aSrc, TInt aSize); |
|
40 |
|
41 #ifdef BYTE_PAIR_COMPRESS_INCLUDE_IMPLEMENTATION |
|
42 |
|
43 const TInt BlockSize = 0x1000; |
|
44 TInt ExtraSave = 0; |
|
45 |
|
46 TUint16 PairCount[0x10000]; |
|
47 TUint16 PairBuffer[BlockSize*2]; |
|
48 |
|
49 TUint16 GlobalPairs[0x10000] = {0}; |
|
50 TUint16 GlobalTokenCounts[0x100] = {0}; |
|
51 |
|
52 TUint16 ByteCount[0x100+4]; |
|
53 |
|
54 void CountBytes(TUint8* data, TInt size) |
|
55 { |
|
56 memset(ByteCount,0,sizeof(ByteCount)); |
|
57 TUint8* dataEnd = data+size; |
|
58 while(data<dataEnd) |
|
59 ++ByteCount[*data++]; |
|
60 } |
|
61 |
|
62 inline void ByteUsed(TInt b) |
|
63 { |
|
64 ByteCount[b] = 0xffff; |
|
65 } |
|
66 |
|
67 int TieBreak(int b1,int b2) |
|
68 { |
|
69 return -ByteCount[b1]-ByteCount[b2]; |
|
70 } |
|
71 |
|
72 TInt MostCommonPair(TInt& pair, TUint8* data, TInt size, TInt minFrequency, TInt marker) |
|
73 { |
|
74 memset(PairCount,0,sizeof(PairCount)); |
|
75 TUint8* dataEnd = data+size-1; |
|
76 TInt pairsFound = 0; |
|
77 TInt lastPair = -1; |
|
78 while(data<dataEnd) |
|
79 { |
|
80 TInt b1 = *data++; |
|
81 if(b1==marker) |
|
82 { |
|
83 // skip marker and following byte |
|
84 lastPair = -1; |
|
85 ++data; |
|
86 continue; |
|
87 } |
|
88 TInt b2 = *data; |
|
89 if(b2==marker) |
|
90 { |
|
91 // skip marker and following byte |
|
92 lastPair = -1; |
|
93 data+=2; |
|
94 continue; |
|
95 } |
|
96 TInt p = (b2<<8)|b1; |
|
97 if(p==lastPair) |
|
98 { |
|
99 // ensure a pair of identical bytes don't get double counted |
|
100 lastPair = -1; |
|
101 continue; |
|
102 } |
|
103 lastPair = p; |
|
104 ++PairCount[p]; |
|
105 if(PairCount[p]==minFrequency) |
|
106 PairBuffer[pairsFound++] = (TUint16)p; |
|
107 } |
|
108 |
|
109 TInt bestCount = -1; |
|
110 TInt bestPair = -1; |
|
111 TInt bestTieBreak = 0; |
|
112 TInt p; |
|
113 while(pairsFound--) |
|
114 { |
|
115 p = PairBuffer[pairsFound]; |
|
116 TInt f=PairCount[p]; |
|
117 if(f>bestCount) |
|
118 { |
|
119 bestCount = f; |
|
120 bestPair = p; |
|
121 bestTieBreak = TieBreak(p&0xff,p>>8); |
|
122 } |
|
123 else if(f==bestCount) |
|
124 { |
|
125 TInt tieBreak = TieBreak(p&0xff,p>>8); |
|
126 if(tieBreak>bestTieBreak) |
|
127 { |
|
128 bestCount = f; |
|
129 bestPair = p; |
|
130 bestTieBreak = tieBreak; |
|
131 } |
|
132 } |
|
133 } |
|
134 pair = bestPair; |
|
135 return bestCount; |
|
136 } |
|
137 |
|
138 TInt LeastCommonByte(TInt& byte) |
|
139 { |
|
140 TInt bestCount = 0xffff; |
|
141 TInt bestByte = -1; |
|
142 for(TInt b = 0; b<0x100; b++) |
|
143 { |
|
144 TInt f = ByteCount[b]; |
|
145 if(f<bestCount) |
|
146 { |
|
147 bestCount = f; |
|
148 bestByte = b; |
|
149 } |
|
150 } |
|
151 byte = bestByte; |
|
152 return bestCount; |
|
153 } |
|
154 |
|
155 TInt BytePairCompress(TUint8* dst, TUint8* src, TInt size) |
|
156 { |
|
157 TInt originalSize = size; |
|
158 TUint8* dst2 = dst+size*2; |
|
159 TUint8* in = src; |
|
160 TUint8* out = dst; |
|
161 |
|
162 TUint8 tokens[0x100*3]; |
|
163 TInt tokenCount = 0; |
|
164 |
|
165 CountBytes(in,size); |
|
166 |
|
167 TInt marker = -1; |
|
168 TInt overhead = 1+3+LeastCommonByte(marker); |
|
169 ByteUsed(marker); |
|
170 |
|
171 TUint8* inEnd = in+size; |
|
172 TUint8* outStart = out; |
|
173 while(in<inEnd) |
|
174 { |
|
175 TInt b=*in++; |
|
176 if(b==marker) |
|
177 *out++ = (TUint8)b; |
|
178 *out++ = (TUint8)b; |
|
179 } |
|
180 size = out-outStart; |
|
181 |
|
182 TInt outToggle = 1; |
|
183 in = dst; |
|
184 out = dst2; |
|
185 |
|
186 for(TInt r=256; r>0; --r) |
|
187 { |
|
188 TInt byte; |
|
189 TInt byteCount = LeastCommonByte(byte); |
|
190 TInt pair; |
|
191 TInt pairCount = MostCommonPair(pair,in,size,overhead+1,marker); |
|
192 TInt saving = pairCount-byteCount; |
|
193 if(saving<=overhead) |
|
194 break; |
|
195 |
|
196 overhead = 3; |
|
197 if(tokenCount>=32) |
|
198 overhead = 2; |
|
199 |
|
200 TUint8* d=tokens+3*tokenCount; |
|
201 ++tokenCount; |
|
202 *d++ = (TUint8)byte; |
|
203 ByteUsed(byte); |
|
204 *d++ = (TUint8)pair; |
|
205 ByteUsed(pair&0xff); |
|
206 *d++ = (TUint8)(pair>>8); |
|
207 ByteUsed(pair>>8); |
|
208 ++GlobalPairs[pair]; |
|
209 |
|
210 inEnd = in+size; |
|
211 outStart = out; |
|
212 while(in<inEnd) |
|
213 { |
|
214 TInt b=*in++; |
|
215 if(b==marker) |
|
216 { |
|
217 *out++ = (TUint8)marker; |
|
218 b = *in++; |
|
219 } |
|
220 else if(b==byte) |
|
221 { |
|
222 *out++ = (TUint8)marker; |
|
223 --byteCount; |
|
224 } |
|
225 else if(b==(pair&0xff) && in<inEnd && *in==(pair>>8)) |
|
226 { |
|
227 ++in; |
|
228 b = byte; |
|
229 --pairCount; |
|
230 } |
|
231 *out++ = (TUint8)b; |
|
232 } |
|
233 ASSERT(!byteCount); |
|
234 ASSERT(!pairCount); |
|
235 size = out-outStart; |
|
236 |
|
237 outToggle ^= 1; |
|
238 if(outToggle) |
|
239 { |
|
240 in = dst; |
|
241 out = dst2; |
|
242 } |
|
243 else |
|
244 { |
|
245 in = dst2; |
|
246 out = dst; |
|
247 } |
|
248 } |
|
249 |
|
250 // sort tokens with a bubble sort... |
|
251 for(TInt x=0; x<tokenCount-1; x++) |
|
252 for(TInt y=x+1; y<tokenCount; y++) |
|
253 if(tokens[x*3]>tokens[y*3]) |
|
254 { |
|
255 TInt z = tokens[x*3]; |
|
256 tokens[x*3] = tokens[y*3]; |
|
257 tokens[y*3] = (TUint8)z; |
|
258 z = tokens[x*3+1]; |
|
259 tokens[x*3+1] = tokens[y*3+1]; |
|
260 tokens[y*3+1] = (TUint8)z; |
|
261 z = tokens[x*3+2]; |
|
262 tokens[x*3+2] = tokens[y*3+2]; |
|
263 tokens[y*3+2] = (TUint8)z; |
|
264 } |
|
265 |
|
266 // check for not being able to compress... |
|
267 if(size>originalSize) |
|
268 { |
|
269 *dst++ = 0; // store zero token count |
|
270 memcpy(dst,src,originalSize); // store original data |
|
271 return originalSize+1; |
|
272 } |
|
273 |
|
274 // make sure data is in second buffer (dst2) |
|
275 if(in!=dst2) |
|
276 memcpy(dst2,dst,size); |
|
277 |
|
278 // store tokens... |
|
279 TUint8* originalDst = dst; |
|
280 *dst++ = (TUint8)tokenCount; |
|
281 if(tokenCount) |
|
282 { |
|
283 *dst++ = (TUint8)marker; |
|
284 if(tokenCount<32) |
|
285 { |
|
286 memcpy(dst,tokens,tokenCount*3); |
|
287 dst += tokenCount*3; |
|
288 } |
|
289 else |
|
290 { |
|
291 TUint8* bitMask = dst; |
|
292 memset(bitMask,0,32); |
|
293 dst += 32; |
|
294 TUint8* d=tokens; |
|
295 do |
|
296 { |
|
297 TInt t=*d++; |
|
298 bitMask[t>>3] |= (1<<(t&7)); |
|
299 *dst++ = *d++; |
|
300 *dst++ = *d++; |
|
301 } |
|
302 while(--tokenCount); |
|
303 } |
|
304 } |
|
305 // store data... |
|
306 memcpy(dst,dst2,size); |
|
307 dst += size; |
|
308 |
|
309 // get stats... |
|
310 ++GlobalTokenCounts[tokenCount]; |
|
311 |
|
312 // return total size of compressed data... |
|
313 return dst-originalDst; |
|
314 } |
|
315 |
|
316 #endif |
|
317 #endif |