|
1 /* |
|
2 * Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 /* inflate.c -- Not copyrighted 1992 by Mark Adler |
|
18 version c10p1, 10 January 1993 */ |
|
19 |
|
20 /* You can do whatever you like with this source file, though I would |
|
21 prefer that if you modify it and redistribute it that you include |
|
22 comments to that effect with your name and the date. Thank you. |
|
23 [The history has been moved to the file ChangeLog.] |
|
24 */ |
|
25 |
|
26 /* |
|
27 Inflate deflated (PKZIP's method 8 compressed) data. The compression |
|
28 method searches for as much of the current string of bytes (up to a |
|
29 length of 258) in the previous 32K bytes. If it doesn't find any |
|
30 matches (of at least length 3), it codes the next byte. Otherwise, it |
|
31 codes the length of the matched string and its distance backwards from |
|
32 the current position. There is a single Huffman code that codes both |
|
33 single bytes (called "literals") and match lengths. A second Huffman |
|
34 code codes the distance information, which follows a length code. Each |
|
35 length or distance code actually represents a base value and a number |
|
36 of "extra" (sometimes zero) bits to get to add to the base value. At |
|
37 the end of each deflated block is a special end-of-block (EOB) literal/ |
|
38 length code. The decoding process is basically: get a literal/length |
|
39 code; if EOB then done; if a literal, emit the decoded byte; if a |
|
40 length then get the distance and emit the referred-to bytes from the |
|
41 sliding window of previously emitted data. |
|
42 |
|
43 There are (currently) three kinds of inflate blocks: stored, fixed, and |
|
44 dynamic. The compressor deals with some chunk of data at a time, and |
|
45 decides which method to use on a chunk-by-chunk basis. A chunk might |
|
46 typically be 32K or 64K. If the chunk is uncompressible, then the |
|
47 "stored" method is used. In this case, the bytes are simply stored as |
|
48 is, eight bits per byte, with none of the above coding. The bytes are |
|
49 preceded by a count, since there is no longer an EOB code. |
|
50 |
|
51 If the data is compressible, then either the fixed or dynamic methods |
|
52 are used. In the dynamic method, the compressed data is preceded by |
|
53 an encoding of the literal/length and distance Huffman codes that are |
|
54 to be used to decode this block. The representation is itself Huffman |
|
55 coded, and so is preceded by a description of that code. These code |
|
56 descriptions take up a little space, and so for small blocks, there is |
|
57 a predefined set of codes, called the fixed codes. The fixed method is |
|
58 used if the block codes up smaller that way (usually for quite small |
|
59 chunks), otherwise the dynamic method is used. In the latter case, the |
|
60 codes are customized to the probabilities in the current block, and so |
|
61 can code it much better than the pre-determined fixed codes. |
|
62 |
|
63 The Huffman codes themselves are decoded using a mutli-level table |
|
64 lookup, in order to maximize the speed of decoding plus the speed of |
|
65 building the decoding tables. See the comments below that precede the |
|
66 lbits and dbits tuning parameters. |
|
67 */ |
|
68 |
|
69 |
|
70 /* |
|
71 Notes beyond the 1.93a appnote.txt: |
|
72 |
|
73 1. Distance pointers never point before the beginning of the output |
|
74 stream. |
|
75 2. Distance pointers can point back across blocks, up to 32k away. |
|
76 3. There is an implied maximum of 7 bits for the bit length table and |
|
77 15 bits for the actual data. |
|
78 4. If only one code exists, then it is encoded using one bit. (Zero |
|
79 would be more efficient, but perhaps a little confusing.) If two |
|
80 codes exist, they are coded using one bit each (0 and 1). |
|
81 5. There is no way of sending zero distance codes--a dummy must be |
|
82 sent if there are none. (History: a pre 2.0 version of PKZIP would |
|
83 store blocks with no distance codes, but this was discovered to be |
|
84 too harsh a criterion.) Valid only for 1.93a. 2.04c does allow |
|
85 zero distance codes, which is sent as one code of zero bits in |
|
86 length. |
|
87 6. There are up to 286 literal/length codes. Code 256 represents the |
|
88 end-of-block. Note however that the static length tree defines |
|
89 288 codes just to fill out the Huffman codes. Codes 286 and 287 |
|
90 cannot be used though, since there is no length base or extra bits |
|
91 defined for them. Similarly, there are up to 30 distance codes. |
|
92 However, static trees define 32 codes (all 5 bits) to fill out the |
|
93 Huffman codes, but the last two had better not show up in the data. |
|
94 7. Unzip can check dynamic Huffman blocks for complete code sets. |
|
95 The exception is that a single code would not be complete (see #4). |
|
96 8. The five bits following the block type is really the number of |
|
97 literal codes sent minus 257. |
|
98 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits |
|
99 (1+6+6). Therefore, to output three times the length, you output |
|
100 three codes (1+1+1), whereas to output four times the same length, |
|
101 you only need two codes (1+3). Hmm. |
|
102 10. In the tree reconstruction algorithm, Code = Code + Increment |
|
103 only if BitLength(i) is not zero. (Pretty obvious.) |
|
104 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19) |
|
105 12. Note: length code 284 can represent 227-258, but length code 285 |
|
106 really is 258. The last length deserves its own, short code |
|
107 since it gets used a lot in very redundant files. The length |
|
108 258 is special since 258 - 3 (the min match length) is 255. |
|
109 13. The literal/length and distance code bit lengths are read as a |
|
110 single stream of lengths. It is possible (and advantageous) for |
|
111 a repeat code (16, 17, or 18) to go across the boundary between |
|
112 the two sets of lengths. |
|
113 */ |
|
114 |
|
115 #include "inflate.h" |
|
116 |
|
117 extern void* memcpy(void*, const void*, unsigned); |
|
118 extern void* memset(void*, int, unsigned); |
|
119 |
|
120 /* Huffman code lookup table entry--this entry is four bytes for machines |
|
121 that have 16-bit pointers (e.g. PC's in the small or medium model). |
|
122 Valid extra bits are 0..13. e == 15 is EOB (end of block), e == 16 |
|
123 means that v is a literal, 16 < e < 32 means that v is a pointer to |
|
124 the next table, which codes e - 16 bits, and lastly e == 99 indicates |
|
125 an unused code. If a code with e == 99 is looked up, this implies an |
|
126 error in the data. */ |
|
127 struct huft { |
|
128 uch e; /* number of extra bits or operation */ |
|
129 uch b; /* number of bits in this code or subcode */ |
|
130 union { |
|
131 ush n; /* literal, length base, or distance base */ |
|
132 struct huft *t; /* pointer to next level of table */ |
|
133 } v; |
|
134 }; |
|
135 |
|
136 |
|
137 /* Function prototypes */ |
|
138 int huft_build(unsigned *, unsigned, unsigned, const ush *, const ush *, |
|
139 struct huft **, int *); |
|
140 int huft_free(struct huft *); |
|
141 int inflate_codes(struct huft *, struct huft *, int, int); |
|
142 int inflate_stored(void); |
|
143 int inflate_fixed(void); |
|
144 int inflate_dynamic(void); |
|
145 int inflate_block(int *); |
|
146 int inflate(void); |
|
147 |
|
148 |
|
149 /* The inflate algorithm uses a sliding 32K byte window on the uncompressed |
|
150 stream to find repeated byte strings. This is implemented here as a |
|
151 circular buffer. The index is updated simply by incrementing and then |
|
152 and'ing with 0x7fff (32K-1). */ |
|
153 /* It is left to other modules to supply the 32K area. It is assumed |
|
154 to be usable as if it were declared "uch slide[32768];" or as just |
|
155 "uch *slide;" and then malloc'ed in the latter case. The definition |
|
156 must be in unzip.h, included above. */ |
|
157 /* unsigned wp; current position in slide */ |
|
158 /*#define wp outcnt*/ |
|
159 /*#define flush_output(w) (wp=(w),flush_window())*/ |
|
160 |
|
161 /* Tables for deflate from PKZIP's appnote.txt. */ |
|
162 static const unsigned border[] = { /* Order of the bit length code lengths */ |
|
163 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; |
|
164 static const ush cplens[] = { /* Copy lengths for literal codes 257..285 */ |
|
165 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, |
|
166 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; |
|
167 /* note: see note #13 above about the 258 in this list. */ |
|
168 static const ush cplext[] = { /* Extra bits for literal codes 257..285 */ |
|
169 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, |
|
170 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99}; /* 99==invalid */ |
|
171 static const ush cpdist[] = { /* Copy offsets for distance codes 0..29 */ |
|
172 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, |
|
173 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, |
|
174 8193, 12289, 16385, 24577}; |
|
175 static const ush cpdext[] = { /* Extra bits for distance codes */ |
|
176 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, |
|
177 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, |
|
178 12, 12, 13, 13}; |
|
179 |
|
180 |
|
181 |
|
182 /* Macros for inflate() bit peeking and grabbing. |
|
183 The usage is: |
|
184 |
|
185 NEEDBITS(j) |
|
186 x = b & mask_bits[j]; |
|
187 DUMPBITS(j) |
|
188 |
|
189 where NEEDBITS makes sure that b has at least j bits in it, and |
|
190 DUMPBITS removes the bits from b. The macros use the variable k |
|
191 for the number of bits in b. Normally, b and k are register |
|
192 variables for speed, and are initialized at the beginning of a |
|
193 routine that uses these macros from a global bit buffer and count. |
|
194 |
|
195 If we assume that EOB will be the longest code, then we will never |
|
196 ask for bits with NEEDBITS that are beyond the end of the stream. |
|
197 So, NEEDBITS should not read any more bytes than are needed to |
|
198 meet the request. Then no bytes need to be "returned" to the buffer |
|
199 at the end of the last block. |
|
200 |
|
201 However, this assumption is not true for fixed blocks--the EOB code |
|
202 is 7 bits, but the other literal/length codes can be 8 or 9 bits. |
|
203 (The EOB code is shorter than other codes because fixed blocks are |
|
204 generally short. So, while a block always has an EOB, many other |
|
205 literal/length codes have a significantly lower probability of |
|
206 showing up at all.) However, by making the first table have a |
|
207 lookup of seven bits, the EOB code will be found in that first |
|
208 lookup, and so will not require that too many bits be pulled from |
|
209 the stream. |
|
210 */ |
|
211 |
|
212 ulg bb; /* bit buffer */ |
|
213 unsigned bk; /* bits in bit buffer */ |
|
214 |
|
215 static const ush mask_bits[] = { |
|
216 0x0000, |
|
217 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, |
|
218 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff |
|
219 }; |
|
220 |
|
221 #define get_byte() (inptr < inbuf_end ? *inptr++ : fill_inbuf()) |
|
222 #define NEXTBYTE() (uch)get_byte() |
|
223 #define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<<k;k+=8;}} |
|
224 #define DUMPBITS(n) {b>>=(n);k-=(n);} |
|
225 |
|
226 |
|
227 /* |
|
228 Huffman code decoding is performed using a multi-level table lookup. |
|
229 The fastest way to decode is to simply build a lookup table whose |
|
230 size is determined by the longest code. However, the time it takes |
|
231 to build this table can also be a factor if the data being decoded |
|
232 is not very long. The most common codes are necessarily the |
|
233 shortest codes, so those codes dominate the decoding time, and hence |
|
234 the speed. The idea is you can have a shorter table that decodes the |
|
235 shorter, more probable codes, and then point to subsidiary tables for |
|
236 the longer codes. The time it costs to decode the longer codes is |
|
237 then traded against the time it takes to make longer tables. |
|
238 |
|
239 This results of this trade are in the variables lbits and dbits |
|
240 below. lbits is the number of bits the first level table for literal/ |
|
241 length codes can decode in one step, and dbits is the same thing for |
|
242 the distance codes. Subsequent tables are also less than or equal to |
|
243 those sizes. These values may be adjusted either when all of the |
|
244 codes are shorter than that, in which case the longest code length in |
|
245 bits is used, or when the shortest code is *longer* than the requested |
|
246 table size, in which case the length of the shortest code in bits is |
|
247 used. |
|
248 |
|
249 There are two different values for the two tables, since they code a |
|
250 different number of possibilities each. The literal/length table |
|
251 codes 286 possible values, or in a flat code, a little over eight |
|
252 bits. The distance table codes 30 possible values, or a little less |
|
253 than five bits, flat. The optimum values for speed end up being |
|
254 about one bit more than those, so lbits is 8+1 and dbits is 5+1. |
|
255 The optimum values may differ though from machine to machine, and |
|
256 possibly even between compilers. Your mileage may vary. |
|
257 */ |
|
258 |
|
259 |
|
260 static const int lbits = 9; /* bits in base literal/length lookup table */ |
|
261 static const int dbits = 6; /* bits in base distance lookup table */ |
|
262 |
|
263 |
|
264 /* If BMAX needs to be larger than 16, then h and x[] should be ulg. */ |
|
265 #define BMAX 16 /* maximum bit length of any code (16 for explode) */ |
|
266 #define N_MAX 288 /* maximum number of codes in any set */ |
|
267 |
|
268 |
|
269 unsigned hufts; /* track memory usage */ |
|
270 |
|
271 |
|
272 int huft_build( |
|
273 unsigned *b, /* code lengths in bits (all assumed <= BMAX) */ |
|
274 unsigned n, /* number of codes (assumed <= N_MAX) */ |
|
275 unsigned s, /* number of simple-valued codes (0..s-1) */ |
|
276 const ush *d, /* list of base values for non-simple codes */ |
|
277 const ush *e, /* list of extra bits for non-simple codes */ |
|
278 struct huft **t, /* result: starting table */ |
|
279 int *m /* maximum lookup bits, returns actual */ |
|
280 ) |
|
281 /* Given a list of code lengths and a maximum table size, make a set of |
|
282 tables to decode that set of codes. Return zero on success, one if |
|
283 the given code set is incomplete (the tables are still built in this |
|
284 case), two if the input is invalid (all zero length codes or an |
|
285 oversubscribed set of lengths), and three if not enough memory. */ |
|
286 { |
|
287 unsigned a; /* counter for codes of length k */ |
|
288 unsigned c[BMAX+1]; /* bit length count table */ |
|
289 unsigned f; /* i repeats in table every f entries */ |
|
290 int g; /* maximum code length */ |
|
291 int h; /* table level */ |
|
292 register unsigned i; /* counter, current code */ |
|
293 register unsigned j; /* counter */ |
|
294 register int k; /* number of bits in current code */ |
|
295 int l; /* bits per table (returned in m) */ |
|
296 register unsigned *p; /* pointer into c[], b[], or v[] */ |
|
297 register struct huft *q; /* points to current table */ |
|
298 struct huft r; /* table entry for structure assignment */ |
|
299 struct huft *u[BMAX]; /* table stack */ |
|
300 unsigned v[N_MAX]; /* values in order of bit length */ |
|
301 register int w; /* bits before this table == (l * h) */ |
|
302 unsigned x[BMAX+1]; /* bit offsets, then code stack */ |
|
303 unsigned *xp; /* pointer into x */ |
|
304 int y; /* number of dummy codes added */ |
|
305 unsigned z; /* number of entries in current table */ |
|
306 |
|
307 |
|
308 /* Generate counts for each bit length */ |
|
309 memset(c, 0, sizeof(c)); |
|
310 p = b; i = n; |
|
311 do { |
|
312 /* Tracecv(*p, (stderr, (n-i >= ' ' && n-i <= '~' ? "%c %d\n" : "0x%x %d\n"), |
|
313 n-i, *p));*/ |
|
314 c[*p]++; /* assume all entries <= BMAX */ |
|
315 p++; /* Can't combine with above line (Solaris bug) */ |
|
316 } while (--i); |
|
317 if (c[0] == n) /* null input--all zero length codes */ |
|
318 { |
|
319 *t = (struct huft *)NULL; |
|
320 *m = 0; |
|
321 return 0; |
|
322 } |
|
323 |
|
324 |
|
325 /* Find minimum and maximum length, bound *m by those */ |
|
326 l = *m; |
|
327 for (j = 1; j <= BMAX; j++) |
|
328 if (c[j]) |
|
329 break; |
|
330 k = j; /* minimum code length */ |
|
331 if ((unsigned)l < j) |
|
332 l = j; |
|
333 for (i = BMAX; i; i--) |
|
334 if (c[i]) |
|
335 break; |
|
336 g = i; /* maximum code length */ |
|
337 if ((unsigned)l > i) |
|
338 l = i; |
|
339 *m = l; |
|
340 |
|
341 |
|
342 /* Adjust last length count to fill out codes, if needed */ |
|
343 for (y = 1 << j; j < i; j++, y <<= 1) |
|
344 if ((y -= c[j]) < 0) |
|
345 return 2; /* bad input: more codes than bits */ |
|
346 if ((y -= c[i]) < 0) |
|
347 return 2; |
|
348 c[i] += y; |
|
349 |
|
350 |
|
351 /* Generate starting offsets into the value table for each length */ |
|
352 x[1] = j = 0; |
|
353 p = c + 1; xp = x + 2; |
|
354 while (--i) { /* note that i == g from above */ |
|
355 *xp++ = (j += *p++); |
|
356 } |
|
357 |
|
358 |
|
359 /* Make a table of values in order of bit lengths */ |
|
360 p = b; i = 0; |
|
361 do { |
|
362 if ((j = *p++) != 0) |
|
363 v[x[j]++] = i; |
|
364 } while (++i < n); |
|
365 |
|
366 |
|
367 /* Generate the Huffman codes and for each, make the table entries */ |
|
368 x[0] = i = 0; /* first Huffman code is zero */ |
|
369 p = v; /* grab values in bit order */ |
|
370 h = -1; /* no tables yet--level -1 */ |
|
371 w = -l; /* bits decoded == (l * h) */ |
|
372 u[0] = (struct huft *)NULL; /* just to keep compilers happy */ |
|
373 q = (struct huft *)NULL; /* ditto */ |
|
374 z = 0; /* ditto */ |
|
375 |
|
376 /* go through the bit lengths (k already is bits in shortest code) */ |
|
377 for (; k <= g; k++) |
|
378 { |
|
379 a = c[k]; |
|
380 while (a--) |
|
381 { |
|
382 /* here i is the Huffman code of length k bits for value *p */ |
|
383 /* make tables up to required level */ |
|
384 while (k > w + l) |
|
385 { |
|
386 h++; |
|
387 w += l; /* previous table always l bits */ |
|
388 |
|
389 /* compute minimum size table less than or equal to l bits */ |
|
390 z = (z = g - w) > (unsigned)l ? l : z; /* upper limit on table size */ |
|
391 if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */ |
|
392 { /* too few codes for k-w bit table */ |
|
393 f -= a + 1; /* deduct codes from patterns left */ |
|
394 xp = c + k; |
|
395 while (++j < z) /* try smaller tables up to z bits */ |
|
396 { |
|
397 if ((f <<= 1) <= *++xp) |
|
398 break; /* enough codes to use up j bits */ |
|
399 f -= *xp; /* else deduct codes from patterns */ |
|
400 } |
|
401 } |
|
402 z = 1 << j; /* table entries for j-bit table */ |
|
403 |
|
404 /* allocate and link in new table */ |
|
405 if ((q = (struct huft *)malloc((z + 1)*sizeof(struct huft))) == |
|
406 (struct huft *)NULL) |
|
407 { |
|
408 if (h) |
|
409 huft_free(u[0]); |
|
410 return 3; /* not enough memory */ |
|
411 } |
|
412 hufts += z + 1; /* track memory usage */ |
|
413 *t = q + 1; /* link to list for huft_free() */ |
|
414 *(t = &(q->v.t)) = (struct huft *)NULL; |
|
415 u[h] = ++q; /* table starts after link */ |
|
416 |
|
417 /* connect to last table, if there is one */ |
|
418 if (h) |
|
419 { |
|
420 x[h] = i; /* save pattern for backing up */ |
|
421 r.b = (uch)l; /* bits to dump before this table */ |
|
422 r.e = (uch)(16 + j); /* bits in this table */ |
|
423 r.v.t = q; /* pointer to this table */ |
|
424 j = i >> (w - l); /* (get around Turbo C bug) */ |
|
425 u[h-1][j] = r; /* connect to last table */ |
|
426 } |
|
427 } |
|
428 |
|
429 /* set up table entry in r */ |
|
430 r.b = (uch)(k - w); |
|
431 if (p >= v + n) |
|
432 r.e = 99; /* out of values--invalid code */ |
|
433 else if (*p < s) |
|
434 { |
|
435 r.e = (uch)(*p < 256 ? 16 : 15); /* 256 is end-of-block code */ |
|
436 r.v.n = (ush)(*p); /* simple code is just the value */ |
|
437 p++; /* one compiler does not like *p++ */ |
|
438 } |
|
439 else |
|
440 { |
|
441 r.e = (uch)e[*p - s]; /* non-simple--look up in lists */ |
|
442 r.v.n = d[*p++ - s]; |
|
443 } |
|
444 |
|
445 /* fill code-like entries with r */ |
|
446 f = 1 << (k - w); |
|
447 for (j = i >> w; j < z; j += f) |
|
448 q[j] = r; |
|
449 |
|
450 /* backwards increment the k-bit code i */ |
|
451 for (j = 1 << (k - 1); i & j; j >>= 1) |
|
452 i ^= j; |
|
453 i ^= j; |
|
454 |
|
455 /* backup over finished tables */ |
|
456 while ((i & ((1 << w) - 1)) != x[h]) |
|
457 { |
|
458 h--; /* don't need to update q */ |
|
459 w -= l; |
|
460 } |
|
461 } |
|
462 } |
|
463 |
|
464 |
|
465 /* Return true (1) if we were given an incomplete table */ |
|
466 return y != 0 && g != 1; |
|
467 } |
|
468 |
|
469 |
|
470 |
|
471 int huft_free(struct huft *t) |
|
472 /* Free the malloc'ed tables built by huft_build(), which makes a linked |
|
473 list of the tables it made, with the links in a dummy first entry of |
|
474 each table. */ |
|
475 { |
|
476 register struct huft *p, *q; |
|
477 |
|
478 |
|
479 /* Go through linked list, freeing from the malloced (t[-1]) address. */ |
|
480 p = t; |
|
481 while (p != (struct huft *)NULL) |
|
482 { |
|
483 q = (--p)->v.t; |
|
484 free((char*)p); |
|
485 p = q; |
|
486 } |
|
487 return 0; |
|
488 } |
|
489 |
|
490 |
|
491 int inflate_codes( |
|
492 struct huft *tl, |
|
493 struct huft *td, /* literal/length and distance decoder tables */ |
|
494 int bl, |
|
495 int bd /* number of bits decoded by tl[] and td[] */ |
|
496 ) |
|
497 /* inflate (decompress) the codes in a deflated (compressed) block. |
|
498 Return an error code or zero if it all goes ok. */ |
|
499 { |
|
500 register unsigned e; /* table entry flag/number of extra bits */ |
|
501 unsigned n, d; /* length and index for copy */ |
|
502 struct huft *t; /* pointer to table entry */ |
|
503 unsigned ml, md; /* masks for bl and bd bits */ |
|
504 register ulg b=bb; /* bit buffer */ |
|
505 register unsigned k=bk; /* number of bits in bit buffer */ |
|
506 register uch* p=(uch*)outptr; |
|
507 |
|
508 /* inflate the coded data */ |
|
509 ml = mask_bits[bl]; /* precompute masks for speed */ |
|
510 md = mask_bits[bd]; |
|
511 for (;;) /* do until end of block */ |
|
512 { |
|
513 NEEDBITS((unsigned)bl) |
|
514 if ((e = (t = tl + ((unsigned)b & ml))->e) > 16) |
|
515 do { |
|
516 if (e == 99) |
|
517 return 1; |
|
518 DUMPBITS(t->b) |
|
519 e -= 16; |
|
520 NEEDBITS(e) |
|
521 } while ((e = (t = t->v.t + ((unsigned)b & mask_bits[e]))->e) > 16); |
|
522 DUMPBITS(t->b) |
|
523 if (e == 16) /* then it's a literal */ |
|
524 { |
|
525 *p++ = (uch)t->v.n; |
|
526 } |
|
527 else /* it's an EOB or a length */ |
|
528 { |
|
529 /* exit if end of block */ |
|
530 if (e == 15) |
|
531 break; |
|
532 |
|
533 /* get length of block to copy */ |
|
534 NEEDBITS(e) |
|
535 n = t->v.n + ((unsigned)b & mask_bits[e]); |
|
536 DUMPBITS(e); |
|
537 |
|
538 /* decode distance of block to copy */ |
|
539 NEEDBITS((unsigned)bd) |
|
540 if ((e = (t = td + ((unsigned)b & md))->e) > 16) |
|
541 do { |
|
542 if (e == 99) |
|
543 return 1; |
|
544 DUMPBITS(t->b) |
|
545 e -= 16; |
|
546 NEEDBITS(e) |
|
547 } while ((e = (t = t->v.t + ((unsigned)b & mask_bits[e]))->e) > 16); |
|
548 DUMPBITS(t->b) |
|
549 NEEDBITS(e) |
|
550 d = t->v.n + ((unsigned)b & mask_bits[e]); |
|
551 d &= ZIP_WINDOW_SIZE-1; |
|
552 DUMPBITS(e) |
|
553 |
|
554 /* do the copy */ |
|
555 if (d>=n) |
|
556 { |
|
557 memcpy(p, p-d, n); |
|
558 p+=n; |
|
559 } |
|
560 else |
|
561 { |
|
562 uch* q=p-d; |
|
563 while(n--) *p++=*q++; |
|
564 } |
|
565 } |
|
566 } |
|
567 |
|
568 |
|
569 /* restore the globals from the locals */ |
|
570 outptr=p; |
|
571 bb = b; /* restore global bit buffer */ |
|
572 bk = k; |
|
573 |
|
574 /* done */ |
|
575 return 0; |
|
576 } |
|
577 |
|
578 |
|
579 |
|
580 int inflate_stored() |
|
581 /* "decompress" an inflated type 0 (stored) block. */ |
|
582 { |
|
583 unsigned n; /* number of bytes in block */ |
|
584 register ulg b; /* bit buffer */ |
|
585 register unsigned k; /* number of bits in bit buffer */ |
|
586 |
|
587 register uch* p=(uch*)outptr; |
|
588 |
|
589 |
|
590 /* make local copies of globals */ |
|
591 b = bb; /* initialize bit buffer */ |
|
592 k = bk; |
|
593 |
|
594 |
|
595 /* go to byte boundary */ |
|
596 n = k & 7; |
|
597 DUMPBITS(n); |
|
598 |
|
599 |
|
600 /* get the length and its complement */ |
|
601 NEEDBITS(16) |
|
602 n = ((unsigned)b & 0xffff); |
|
603 DUMPBITS(16) |
|
604 NEEDBITS(16) |
|
605 if (n != (unsigned)((~b) & 0xffff)) |
|
606 return 1; /* error in compressed data */ |
|
607 DUMPBITS(16) |
|
608 |
|
609 |
|
610 /* read and output the compressed data */ |
|
611 while (n--) |
|
612 { |
|
613 NEEDBITS(8) |
|
614 *p++=(uch)b; |
|
615 DUMPBITS(8) |
|
616 } |
|
617 |
|
618 |
|
619 /* restore the globals from the locals */ |
|
620 outptr=p; |
|
621 bb = b; /* restore global bit buffer */ |
|
622 bk = k; |
|
623 return 0; |
|
624 } |
|
625 |
|
626 |
|
627 |
|
628 int inflate_fixed() |
|
629 /* decompress an inflated type 1 (fixed Huffman codes) block. We should |
|
630 either replace this with a custom decoder, or at least precompute the |
|
631 Huffman tables. */ |
|
632 { |
|
633 int i; /* temporary variable */ |
|
634 struct huft *tl; /* literal/length code table */ |
|
635 struct huft *td; /* distance code table */ |
|
636 int bl; /* lookup bits for tl */ |
|
637 int bd; /* lookup bits for td */ |
|
638 unsigned l[288]; /* length list for huft_build */ |
|
639 |
|
640 |
|
641 /* set up literal table */ |
|
642 for (i = 0; i < 144; i++) |
|
643 l[i] = 8; |
|
644 for (; i < 256; i++) |
|
645 l[i] = 9; |
|
646 for (; i < 280; i++) |
|
647 l[i] = 7; |
|
648 for (; i < 288; i++) /* make a complete, but wrong code set */ |
|
649 l[i] = 8; |
|
650 bl = 7; |
|
651 // coverity[overrun-call] |
|
652 // Coverity seems to get confused - there is nothing wrong with this code |
|
653 if ((i = huft_build(l, 288, 257, cplens, cplext, &tl, &bl)) != 0) |
|
654 return i; |
|
655 |
|
656 |
|
657 /* set up distance table */ |
|
658 for (i = 0; i < 30; i++) /* make an incomplete code set */ |
|
659 l[i] = 5; |
|
660 bd = 5; |
|
661 if ((i = huft_build(l, 30, 0, cpdist, cpdext, &td, &bd)) > 1) |
|
662 { |
|
663 huft_free(tl); |
|
664 return i; |
|
665 } |
|
666 |
|
667 |
|
668 /* decompress until an end-of-block code */ |
|
669 if (inflate_codes(tl, td, bl, bd)) |
|
670 return 1; |
|
671 |
|
672 |
|
673 /* free the decoding tables, return */ |
|
674 huft_free(tl); |
|
675 huft_free(td); |
|
676 return 0; |
|
677 } |
|
678 |
|
679 |
|
680 |
|
681 int inflate_dynamic() |
|
682 /* decompress an inflated type 2 (dynamic Huffman codes) block. */ |
|
683 { |
|
684 int i; /* temporary variables */ |
|
685 unsigned j; |
|
686 unsigned l; /* last length */ |
|
687 unsigned m; /* mask for bit lengths table */ |
|
688 unsigned n; /* number of lengths to get */ |
|
689 struct huft *tl; /* literal/length code table */ |
|
690 struct huft *td; /* distance code table */ |
|
691 int bl; /* lookup bits for tl */ |
|
692 int bd; /* lookup bits for td */ |
|
693 unsigned nb; /* number of bit length codes */ |
|
694 unsigned nl; /* number of literal/length codes */ |
|
695 unsigned nd; /* number of distance codes */ |
|
696 #ifdef PKZIP_BUG_WORKAROUND |
|
697 unsigned ll[288+32]; /* literal/length and distance code lengths */ |
|
698 #else |
|
699 unsigned ll[286+30]; /* literal/length and distance code lengths */ |
|
700 #endif |
|
701 register ulg b; /* bit buffer */ |
|
702 register unsigned k; /* number of bits in bit buffer */ |
|
703 |
|
704 |
|
705 /* make local bit buffer */ |
|
706 b = bb; |
|
707 k = bk; |
|
708 |
|
709 |
|
710 /* read in table lengths */ |
|
711 NEEDBITS(5) |
|
712 nl = 257 + ((unsigned)b & 0x1f); /* number of literal/length codes */ |
|
713 DUMPBITS(5) |
|
714 NEEDBITS(5) |
|
715 nd = 1 + ((unsigned)b & 0x1f); /* number of distance codes */ |
|
716 DUMPBITS(5) |
|
717 NEEDBITS(4) |
|
718 nb = 4 + ((unsigned)b & 0xf); /* number of bit length codes */ |
|
719 DUMPBITS(4) |
|
720 #ifdef PKZIP_BUG_WORKAROUND |
|
721 if (nl > 288 || nd > 32) |
|
722 #else |
|
723 if (nl > 286 || nd > 30) |
|
724 #endif |
|
725 return 1; /* bad lengths */ |
|
726 |
|
727 |
|
728 /* read in bit-length-code lengths */ |
|
729 for (j = 0; j < nb; j++) |
|
730 { |
|
731 NEEDBITS(3) |
|
732 ll[border[j]] = (unsigned)b & 7; |
|
733 DUMPBITS(3) |
|
734 } |
|
735 for (; j < 19; j++) |
|
736 ll[border[j]] = 0; |
|
737 |
|
738 |
|
739 /* build decoding table for trees--single level, 7 bit lookup */ |
|
740 bl = 7; |
|
741 if ((i = huft_build(ll, 19, 19, NULL, NULL, &tl, &bl)) != 0) |
|
742 { |
|
743 if (i == 1) |
|
744 huft_free(tl); |
|
745 return i; /* incomplete code set */ |
|
746 } |
|
747 |
|
748 |
|
749 /* read in literal and distance code lengths */ |
|
750 n = nl + nd; |
|
751 m = mask_bits[bl]; |
|
752 i = l = 0; |
|
753 while ((unsigned)i < n) |
|
754 { |
|
755 NEEDBITS((unsigned)bl) |
|
756 j = (td = tl + ((unsigned)b & m))->b; |
|
757 DUMPBITS(j) |
|
758 j = td->v.n; |
|
759 if (j < 16) /* length of code in bits (0..15) */ |
|
760 ll[i++] = l = j; /* save last length in l */ |
|
761 else if (j == 16) /* repeat last length 3 to 6 times */ |
|
762 { |
|
763 NEEDBITS(2) |
|
764 j = 3 + ((unsigned)b & 3); |
|
765 DUMPBITS(2) |
|
766 if ((unsigned)i + j > n) |
|
767 return 1; |
|
768 while (j--) |
|
769 ll[i++] = l; |
|
770 } |
|
771 else if (j == 17) /* 3 to 10 zero length codes */ |
|
772 { |
|
773 NEEDBITS(3) |
|
774 j = 3 + ((unsigned)b & 7); |
|
775 DUMPBITS(3) |
|
776 if ((unsigned)i + j > n) |
|
777 return 1; |
|
778 while (j--) |
|
779 ll[i++] = 0; |
|
780 l = 0; |
|
781 } |
|
782 else /* j == 18: 11 to 138 zero length codes */ |
|
783 { |
|
784 NEEDBITS(7) |
|
785 j = 11 + ((unsigned)b & 0x7f); |
|
786 DUMPBITS(7) |
|
787 if ((unsigned)i + j > n) |
|
788 return 1; |
|
789 while (j--) |
|
790 ll[i++] = 0; |
|
791 l = 0; |
|
792 } |
|
793 } |
|
794 |
|
795 |
|
796 /* free decoding table for trees */ |
|
797 huft_free(tl); |
|
798 |
|
799 |
|
800 /* restore the global bit buffer */ |
|
801 bb = b; |
|
802 bk = k; |
|
803 |
|
804 |
|
805 /* build the decoding tables for literal/length and distance codes */ |
|
806 bl = lbits; |
|
807 if ((i = huft_build(ll, nl, 257, cplens, cplext, &tl, &bl)) != 0) |
|
808 { |
|
809 if (i == 1) { |
|
810 /* fprintf(stderr, " incomplete literal tree\n");*/ |
|
811 huft_free(tl); |
|
812 } |
|
813 return i; /* incomplete code set */ |
|
814 } |
|
815 bd = dbits; |
|
816 if ((i = huft_build(ll + nl, nd, 0, cpdist, cpdext, &td, &bd)) != 0) |
|
817 { |
|
818 if (i == 1) { |
|
819 /* fprintf(stderr, " incomplete distance tree\n");*/ |
|
820 #ifdef PKZIP_BUG_WORKAROUND |
|
821 i = 0; |
|
822 } |
|
823 #else |
|
824 huft_free(td); |
|
825 } |
|
826 huft_free(tl); |
|
827 return i; /* incomplete code set */ |
|
828 #endif |
|
829 } |
|
830 |
|
831 |
|
832 /* decompress until an end-of-block code */ |
|
833 if (inflate_codes(tl, td, bl, bd)) |
|
834 return 1; |
|
835 |
|
836 |
|
837 /* free the decoding tables, return */ |
|
838 huft_free(tl); |
|
839 huft_free(td); |
|
840 return 0; |
|
841 } |
|
842 |
|
843 |
|
844 |
|
845 int inflate_block(int* e) |
|
846 /* decompress an inflated block */ |
|
847 { |
|
848 unsigned t; /* block type */ |
|
849 register ulg b; /* bit buffer */ |
|
850 register unsigned k; /* number of bits in bit buffer */ |
|
851 |
|
852 |
|
853 /* make local bit buffer */ |
|
854 b = bb; |
|
855 k = bk; |
|
856 |
|
857 |
|
858 /* read in last block bit */ |
|
859 NEEDBITS(1) |
|
860 *e = (int)b & 1; |
|
861 DUMPBITS(1) |
|
862 |
|
863 |
|
864 /* read in block type */ |
|
865 NEEDBITS(2) |
|
866 t = (unsigned)b & 3; |
|
867 DUMPBITS(2) |
|
868 |
|
869 |
|
870 /* restore the global bit buffer */ |
|
871 bb = b; |
|
872 bk = k; |
|
873 |
|
874 |
|
875 /* inflate that block type */ |
|
876 if (t == 2) |
|
877 return inflate_dynamic(); |
|
878 if (t == 0) |
|
879 return inflate_stored(); |
|
880 if (t == 1) |
|
881 return inflate_fixed(); |
|
882 |
|
883 |
|
884 /* bad block type */ |
|
885 return 2; |
|
886 } |
|
887 |
|
888 |
|
889 |
|
890 int inflate() |
|
891 /* decompress an inflated entry */ |
|
892 { |
|
893 int e; /* last block flag */ |
|
894 int r; /* result code */ |
|
895 unsigned h; /* maximum struct huft's malloc'ed */ |
|
896 |
|
897 |
|
898 /* initialize window, bit buffer */ |
|
899 /* wp = 0;*/ |
|
900 bk = 0; |
|
901 bb = 0; |
|
902 |
|
903 |
|
904 /* decompress until the last block */ |
|
905 h = 0; |
|
906 do { |
|
907 hufts = 0; |
|
908 r=inflate_block(&e); |
|
909 process_block(r); |
|
910 if (r!=0) |
|
911 return r; |
|
912 if (hufts > h) |
|
913 h = hufts; |
|
914 } while (!e); |
|
915 |
|
916 /* Undo too much lookahead. The next read will be byte aligned so we |
|
917 * can discard unused bits in the last meaningful byte. |
|
918 */ |
|
919 /* while (bk >= 8) { |
|
920 bk -= 8; |
|
921 inptr--; |
|
922 }*/ |
|
923 |
|
924 /* flush out slide */ |
|
925 /* flush_output(wp);*/ |
|
926 |
|
927 |
|
928 /* return success */ |
|
929 #ifdef DEBUG |
|
930 /* fprintf(stderr, "<%u> ", h);*/ |
|
931 #endif /* DEBUG */ |
|
932 |
|
933 return 0; |
|
934 } |