|
1 /* inffas86.c is a hand tuned assembler version of |
|
2 * |
|
3 * inffast.c -- fast decoding |
|
4 * Copyright (C) 1995-2003 Mark Adler |
|
5 * For conditions of distribution and use, see copyright notice in zlib.h |
|
6 * |
|
7 * Copyright (C) 2003 Chris Anderson <christop@charm.net> |
|
8 * Please use the copyright conditions above. |
|
9 * |
|
10 * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also |
|
11 * slightly quicker on x86 systems because, instead of using rep movsb to copy |
|
12 * data, it uses rep movsw, which moves data in 2-byte chunks instead of single |
|
13 * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates |
|
14 * from http://fedora.linux.duke.edu/fc1_x86_64 |
|
15 * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with |
|
16 * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version, |
|
17 * when decompressing mozilla-source-1.3.tar.gz. |
|
18 * |
|
19 * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from |
|
20 * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at |
|
21 * the moment. I have successfully compiled and tested this code with gcc2.96, |
|
22 * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S |
|
23 * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX |
|
24 * enabled. I will attempt to merge the MMX code into this version. Newer |
|
25 * versions of this and inffast.S can be found at |
|
26 * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ |
|
27 */ |
|
28 |
|
29 #include "zutil.h" |
|
30 #include "inftrees.h" |
|
31 #include "inflate.h" |
|
32 #include "inffast.h" |
|
33 |
|
34 /* Mark Adler's comments from inffast.c: */ |
|
35 |
|
36 /* |
|
37 Decode literal, length, and distance codes and write out the resulting |
|
38 literal and match bytes until either not enough input or output is |
|
39 available, an end-of-block is encountered, or a data error is encountered. |
|
40 When large enough input and output buffers are supplied to inflate(), for |
|
41 example, a 16K input buffer and a 64K output buffer, more than 95% of the |
|
42 inflate execution time is spent in this routine. |
|
43 |
|
44 Entry assumptions: |
|
45 |
|
46 state->mode == LEN |
|
47 strm->avail_in >= 6 |
|
48 strm->avail_out >= 258 |
|
49 start >= strm->avail_out |
|
50 state->bits < 8 |
|
51 |
|
52 On return, state->mode is one of: |
|
53 |
|
54 LEN -- ran out of enough output space or enough available input |
|
55 TYPE -- reached end of block code, inflate() to interpret next block |
|
56 BAD -- error in block data |
|
57 |
|
58 Notes: |
|
59 |
|
60 - The maximum input bits used by a length/distance pair is 15 bits for the |
|
61 length code, 5 bits for the length extra, 15 bits for the distance code, |
|
62 and 13 bits for the distance extra. This totals 48 bits, or six bytes. |
|
63 Therefore if strm->avail_in >= 6, then there is enough input to avoid |
|
64 checking for available input while decoding. |
|
65 |
|
66 - The maximum bytes that a single length/distance pair can output is 258 |
|
67 bytes, which is the maximum length that can be coded. inflate_fast() |
|
68 requires strm->avail_out >= 258 for each loop to avoid checking for |
|
69 output space. |
|
70 */ |
|
71 void inflate_fast(strm, start) |
|
72 z_streamp strm; |
|
73 unsigned start; /* inflate()'s starting value for strm->avail_out */ |
|
74 { |
|
75 struct inflate_state FAR *state; |
|
76 struct inffast_ar { |
|
77 /* 64 32 x86 x86_64 */ |
|
78 /* ar offset register */ |
|
79 /* 0 0 */ void *esp; /* esp save */ |
|
80 /* 8 4 */ void *ebp; /* ebp save */ |
|
81 /* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */ |
|
82 /* 24 12 */ unsigned char FAR *last; /* r9 while in < last */ |
|
83 /* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */ |
|
84 /* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */ |
|
85 /* 48 24 */ unsigned char FAR *end; /* r10 while out < end */ |
|
86 /* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */ |
|
87 /* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */ |
|
88 /* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */ |
|
89 /* 80 40 */ unsigned long hold; /* edx rdx local strm->hold */ |
|
90 /* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */ |
|
91 /* 92 48 */ unsigned wsize; /* window size */ |
|
92 /* 96 52 */ unsigned write; /* window write index */ |
|
93 /*100 56 */ unsigned lmask; /* r12 mask for lcode */ |
|
94 /*104 60 */ unsigned dmask; /* r13 mask for dcode */ |
|
95 /*108 64 */ unsigned len; /* r14 match length */ |
|
96 /*112 68 */ unsigned dist; /* r15 match distance */ |
|
97 /*116 72 */ unsigned status; /* set when state chng*/ |
|
98 } ar; |
|
99 |
|
100 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 ) |
|
101 #define PAD_AVAIL_IN 6 |
|
102 #define PAD_AVAIL_OUT 258 |
|
103 #else |
|
104 #define PAD_AVAIL_IN 5 |
|
105 #define PAD_AVAIL_OUT 257 |
|
106 #endif |
|
107 |
|
108 /* copy state to local variables */ |
|
109 state = (struct inflate_state FAR *)strm->state; |
|
110 ar.in = strm->next_in; |
|
111 ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN); |
|
112 ar.out = strm->next_out; |
|
113 ar.beg = ar.out - (start - strm->avail_out); |
|
114 ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT); |
|
115 ar.wsize = state->wsize; |
|
116 ar.write = state->write; |
|
117 ar.window = state->window; |
|
118 ar.hold = state->hold; |
|
119 ar.bits = state->bits; |
|
120 ar.lcode = state->lencode; |
|
121 ar.dcode = state->distcode; |
|
122 ar.lmask = (1U << state->lenbits) - 1; |
|
123 ar.dmask = (1U << state->distbits) - 1; |
|
124 |
|
125 /* decode literals and length/distances until end-of-block or not enough |
|
126 input data or output space */ |
|
127 |
|
128 /* align in on 1/2 hold size boundary */ |
|
129 while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) { |
|
130 ar.hold += (unsigned long)*ar.in++ << ar.bits; |
|
131 ar.bits += 8; |
|
132 } |
|
133 |
|
134 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 ) |
|
135 __asm__ __volatile__ ( |
|
136 " leaq %0, %%rax\n" |
|
137 " movq %%rbp, 8(%%rax)\n" /* save regs rbp and rsp */ |
|
138 " movq %%rsp, (%%rax)\n" |
|
139 " movq %%rax, %%rsp\n" /* make rsp point to &ar */ |
|
140 " movq 16(%%rsp), %%rsi\n" /* rsi = in */ |
|
141 " movq 32(%%rsp), %%rdi\n" /* rdi = out */ |
|
142 " movq 24(%%rsp), %%r9\n" /* r9 = last */ |
|
143 " movq 48(%%rsp), %%r10\n" /* r10 = end */ |
|
144 " movq 64(%%rsp), %%rbp\n" /* rbp = lcode */ |
|
145 " movq 72(%%rsp), %%r11\n" /* r11 = dcode */ |
|
146 " movq 80(%%rsp), %%rdx\n" /* rdx = hold */ |
|
147 " movl 88(%%rsp), %%ebx\n" /* ebx = bits */ |
|
148 " movl 100(%%rsp), %%r12d\n" /* r12d = lmask */ |
|
149 " movl 104(%%rsp), %%r13d\n" /* r13d = dmask */ |
|
150 /* r14d = len */ |
|
151 /* r15d = dist */ |
|
152 " cld\n" |
|
153 " cmpq %%rdi, %%r10\n" |
|
154 " je .L_one_time\n" /* if only one decode left */ |
|
155 " cmpq %%rsi, %%r9\n" |
|
156 " je .L_one_time\n" |
|
157 " jmp .L_do_loop\n" |
|
158 |
|
159 ".L_one_time:\n" |
|
160 " movq %%r12, %%r8\n" /* r8 = lmask */ |
|
161 " cmpb $32, %%bl\n" |
|
162 " ja .L_get_length_code_one_time\n" |
|
163 |
|
164 " lodsl\n" /* eax = *(uint *)in++ */ |
|
165 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ |
|
166 " addb $32, %%bl\n" /* bits += 32 */ |
|
167 " shlq %%cl, %%rax\n" |
|
168 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */ |
|
169 " jmp .L_get_length_code_one_time\n" |
|
170 |
|
171 ".align 32,0x90\n" |
|
172 ".L_while_test:\n" |
|
173 " cmpq %%rdi, %%r10\n" |
|
174 " jbe .L_break_loop\n" |
|
175 " cmpq %%rsi, %%r9\n" |
|
176 " jbe .L_break_loop\n" |
|
177 |
|
178 ".L_do_loop:\n" |
|
179 " movq %%r12, %%r8\n" /* r8 = lmask */ |
|
180 " cmpb $32, %%bl\n" |
|
181 " ja .L_get_length_code\n" /* if (32 < bits) */ |
|
182 |
|
183 " lodsl\n" /* eax = *(uint *)in++ */ |
|
184 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ |
|
185 " addb $32, %%bl\n" /* bits += 32 */ |
|
186 " shlq %%cl, %%rax\n" |
|
187 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */ |
|
188 |
|
189 ".L_get_length_code:\n" |
|
190 " andq %%rdx, %%r8\n" /* r8 &= hold */ |
|
191 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */ |
|
192 |
|
193 " movb %%ah, %%cl\n" /* cl = this.bits */ |
|
194 " subb %%ah, %%bl\n" /* bits -= this.bits */ |
|
195 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */ |
|
196 |
|
197 " testb %%al, %%al\n" |
|
198 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */ |
|
199 |
|
200 " movq %%r12, %%r8\n" /* r8 = lmask */ |
|
201 " shrl $16, %%eax\n" /* output this.val char */ |
|
202 " stosb\n" |
|
203 |
|
204 ".L_get_length_code_one_time:\n" |
|
205 " andq %%rdx, %%r8\n" /* r8 &= hold */ |
|
206 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */ |
|
207 |
|
208 ".L_dolen:\n" |
|
209 " movb %%ah, %%cl\n" /* cl = this.bits */ |
|
210 " subb %%ah, %%bl\n" /* bits -= this.bits */ |
|
211 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */ |
|
212 |
|
213 " testb %%al, %%al\n" |
|
214 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */ |
|
215 |
|
216 " shrl $16, %%eax\n" /* output this.val char */ |
|
217 " stosb\n" |
|
218 " jmp .L_while_test\n" |
|
219 |
|
220 ".align 32,0x90\n" |
|
221 ".L_test_for_length_base:\n" |
|
222 " movl %%eax, %%r14d\n" /* len = this */ |
|
223 " shrl $16, %%r14d\n" /* len = this.val */ |
|
224 " movb %%al, %%cl\n" |
|
225 |
|
226 " testb $16, %%al\n" |
|
227 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */ |
|
228 " andb $15, %%cl\n" /* op &= 15 */ |
|
229 " jz .L_decode_distance\n" /* if (!op) */ |
|
230 |
|
231 ".L_add_bits_to_len:\n" |
|
232 " subb %%cl, %%bl\n" |
|
233 " xorl %%eax, %%eax\n" |
|
234 " incl %%eax\n" |
|
235 " shll %%cl, %%eax\n" |
|
236 " decl %%eax\n" |
|
237 " andl %%edx, %%eax\n" /* eax &= hold */ |
|
238 " shrq %%cl, %%rdx\n" |
|
239 " addl %%eax, %%r14d\n" /* len += hold & mask[op] */ |
|
240 |
|
241 ".L_decode_distance:\n" |
|
242 " movq %%r13, %%r8\n" /* r8 = dmask */ |
|
243 " cmpb $32, %%bl\n" |
|
244 " ja .L_get_distance_code\n" /* if (32 < bits) */ |
|
245 |
|
246 " lodsl\n" /* eax = *(uint *)in++ */ |
|
247 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ |
|
248 " addb $32, %%bl\n" /* bits += 32 */ |
|
249 " shlq %%cl, %%rax\n" |
|
250 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */ |
|
251 |
|
252 ".L_get_distance_code:\n" |
|
253 " andq %%rdx, %%r8\n" /* r8 &= hold */ |
|
254 " movl (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */ |
|
255 |
|
256 ".L_dodist:\n" |
|
257 " movl %%eax, %%r15d\n" /* dist = this */ |
|
258 " shrl $16, %%r15d\n" /* dist = this.val */ |
|
259 " movb %%ah, %%cl\n" |
|
260 " subb %%ah, %%bl\n" /* bits -= this.bits */ |
|
261 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */ |
|
262 " movb %%al, %%cl\n" /* cl = this.op */ |
|
263 |
|
264 " testb $16, %%al\n" /* if ((op & 16) == 0) */ |
|
265 " jz .L_test_for_second_level_dist\n" |
|
266 " andb $15, %%cl\n" /* op &= 15 */ |
|
267 " jz .L_check_dist_one\n" |
|
268 |
|
269 ".L_add_bits_to_dist:\n" |
|
270 " subb %%cl, %%bl\n" |
|
271 " xorl %%eax, %%eax\n" |
|
272 " incl %%eax\n" |
|
273 " shll %%cl, %%eax\n" |
|
274 " decl %%eax\n" /* (1 << op) - 1 */ |
|
275 " andl %%edx, %%eax\n" /* eax &= hold */ |
|
276 " shrq %%cl, %%rdx\n" |
|
277 " addl %%eax, %%r15d\n" /* dist += hold & ((1 << op) - 1) */ |
|
278 |
|
279 ".L_check_window:\n" |
|
280 " movq %%rsi, %%r8\n" /* save in so from can use it's reg */ |
|
281 " movq %%rdi, %%rax\n" |
|
282 " subq 40(%%rsp), %%rax\n" /* nbytes = out - beg */ |
|
283 |
|
284 " cmpl %%r15d, %%eax\n" |
|
285 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */ |
|
286 |
|
287 " movl %%r14d, %%ecx\n" /* ecx = len */ |
|
288 " movq %%rdi, %%rsi\n" |
|
289 " subq %%r15, %%rsi\n" /* from = out - dist */ |
|
290 |
|
291 " sarl %%ecx\n" |
|
292 " jnc .L_copy_two\n" /* if len % 2 == 0 */ |
|
293 |
|
294 " rep movsw\n" |
|
295 " movb (%%rsi), %%al\n" |
|
296 " movb %%al, (%%rdi)\n" |
|
297 " incq %%rdi\n" |
|
298 |
|
299 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */ |
|
300 " jmp .L_while_test\n" |
|
301 |
|
302 ".L_copy_two:\n" |
|
303 " rep movsw\n" |
|
304 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */ |
|
305 " jmp .L_while_test\n" |
|
306 |
|
307 ".align 32,0x90\n" |
|
308 ".L_check_dist_one:\n" |
|
309 " cmpl $1, %%r15d\n" /* if dist 1, is a memset */ |
|
310 " jne .L_check_window\n" |
|
311 " cmpq %%rdi, 40(%%rsp)\n" /* if out == beg, outside window */ |
|
312 " je .L_check_window\n" |
|
313 |
|
314 " movl %%r14d, %%ecx\n" /* ecx = len */ |
|
315 " movb -1(%%rdi), %%al\n" |
|
316 " movb %%al, %%ah\n" |
|
317 |
|
318 " sarl %%ecx\n" |
|
319 " jnc .L_set_two\n" |
|
320 " movb %%al, (%%rdi)\n" |
|
321 " incq %%rdi\n" |
|
322 |
|
323 ".L_set_two:\n" |
|
324 " rep stosw\n" |
|
325 " jmp .L_while_test\n" |
|
326 |
|
327 ".align 32,0x90\n" |
|
328 ".L_test_for_second_level_length:\n" |
|
329 " testb $64, %%al\n" |
|
330 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */ |
|
331 |
|
332 " xorl %%eax, %%eax\n" |
|
333 " incl %%eax\n" |
|
334 " shll %%cl, %%eax\n" |
|
335 " decl %%eax\n" |
|
336 " andl %%edx, %%eax\n" /* eax &= hold */ |
|
337 " addl %%r14d, %%eax\n" /* eax += len */ |
|
338 " movl (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/ |
|
339 " jmp .L_dolen\n" |
|
340 |
|
341 ".align 32,0x90\n" |
|
342 ".L_test_for_second_level_dist:\n" |
|
343 " testb $64, %%al\n" |
|
344 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */ |
|
345 |
|
346 " xorl %%eax, %%eax\n" |
|
347 " incl %%eax\n" |
|
348 " shll %%cl, %%eax\n" |
|
349 " decl %%eax\n" |
|
350 " andl %%edx, %%eax\n" /* eax &= hold */ |
|
351 " addl %%r15d, %%eax\n" /* eax += dist */ |
|
352 " movl (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/ |
|
353 " jmp .L_dodist\n" |
|
354 |
|
355 ".align 32,0x90\n" |
|
356 ".L_clip_window:\n" |
|
357 " movl %%eax, %%ecx\n" /* ecx = nbytes */ |
|
358 " movl 92(%%rsp), %%eax\n" /* eax = wsize, prepare for dist cmp */ |
|
359 " negl %%ecx\n" /* nbytes = -nbytes */ |
|
360 |
|
361 " cmpl %%r15d, %%eax\n" |
|
362 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */ |
|
363 |
|
364 " addl %%r15d, %%ecx\n" /* nbytes = dist - nbytes */ |
|
365 " cmpl $0, 96(%%rsp)\n" |
|
366 " jne .L_wrap_around_window\n" /* if (write != 0) */ |
|
367 |
|
368 " movq 56(%%rsp), %%rsi\n" /* from = window */ |
|
369 " subl %%ecx, %%eax\n" /* eax -= nbytes */ |
|
370 " addq %%rax, %%rsi\n" /* from += wsize - nbytes */ |
|
371 |
|
372 " movl %%r14d, %%eax\n" /* eax = len */ |
|
373 " cmpl %%ecx, %%r14d\n" |
|
374 " jbe .L_do_copy\n" /* if (nbytes >= len) */ |
|
375 |
|
376 " subl %%ecx, %%eax\n" /* eax -= nbytes */ |
|
377 " rep movsb\n" |
|
378 " movq %%rdi, %%rsi\n" |
|
379 " subq %%r15, %%rsi\n" /* from = &out[ -dist ] */ |
|
380 " jmp .L_do_copy\n" |
|
381 |
|
382 ".align 32,0x90\n" |
|
383 ".L_wrap_around_window:\n" |
|
384 " movl 96(%%rsp), %%eax\n" /* eax = write */ |
|
385 " cmpl %%eax, %%ecx\n" |
|
386 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */ |
|
387 |
|
388 " movl 92(%%rsp), %%esi\n" /* from = wsize */ |
|
389 " addq 56(%%rsp), %%rsi\n" /* from += window */ |
|
390 " addq %%rax, %%rsi\n" /* from += write */ |
|
391 " subq %%rcx, %%rsi\n" /* from -= nbytes */ |
|
392 " subl %%eax, %%ecx\n" /* nbytes -= write */ |
|
393 |
|
394 " movl %%r14d, %%eax\n" /* eax = len */ |
|
395 " cmpl %%ecx, %%eax\n" |
|
396 " jbe .L_do_copy\n" /* if (nbytes >= len) */ |
|
397 |
|
398 " subl %%ecx, %%eax\n" /* len -= nbytes */ |
|
399 " rep movsb\n" |
|
400 " movq 56(%%rsp), %%rsi\n" /* from = window */ |
|
401 " movl 96(%%rsp), %%ecx\n" /* nbytes = write */ |
|
402 " cmpl %%ecx, %%eax\n" |
|
403 " jbe .L_do_copy\n" /* if (nbytes >= len) */ |
|
404 |
|
405 " subl %%ecx, %%eax\n" /* len -= nbytes */ |
|
406 " rep movsb\n" |
|
407 " movq %%rdi, %%rsi\n" |
|
408 " subq %%r15, %%rsi\n" /* from = out - dist */ |
|
409 " jmp .L_do_copy\n" |
|
410 |
|
411 ".align 32,0x90\n" |
|
412 ".L_contiguous_in_window:\n" |
|
413 " movq 56(%%rsp), %%rsi\n" /* rsi = window */ |
|
414 " addq %%rax, %%rsi\n" |
|
415 " subq %%rcx, %%rsi\n" /* from += write - nbytes */ |
|
416 |
|
417 " movl %%r14d, %%eax\n" /* eax = len */ |
|
418 " cmpl %%ecx, %%eax\n" |
|
419 " jbe .L_do_copy\n" /* if (nbytes >= len) */ |
|
420 |
|
421 " subl %%ecx, %%eax\n" /* len -= nbytes */ |
|
422 " rep movsb\n" |
|
423 " movq %%rdi, %%rsi\n" |
|
424 " subq %%r15, %%rsi\n" /* from = out - dist */ |
|
425 " jmp .L_do_copy\n" /* if (nbytes >= len) */ |
|
426 |
|
427 ".align 32,0x90\n" |
|
428 ".L_do_copy:\n" |
|
429 " movl %%eax, %%ecx\n" /* ecx = len */ |
|
430 " rep movsb\n" |
|
431 |
|
432 " movq %%r8, %%rsi\n" /* move in back to %esi, toss from */ |
|
433 " jmp .L_while_test\n" |
|
434 |
|
435 ".L_test_for_end_of_block:\n" |
|
436 " testb $32, %%al\n" |
|
437 " jz .L_invalid_literal_length_code\n" |
|
438 " movl $1, 116(%%rsp)\n" |
|
439 " jmp .L_break_loop_with_status\n" |
|
440 |
|
441 ".L_invalid_literal_length_code:\n" |
|
442 " movl $2, 116(%%rsp)\n" |
|
443 " jmp .L_break_loop_with_status\n" |
|
444 |
|
445 ".L_invalid_distance_code:\n" |
|
446 " movl $3, 116(%%rsp)\n" |
|
447 " jmp .L_break_loop_with_status\n" |
|
448 |
|
449 ".L_invalid_distance_too_far:\n" |
|
450 " movl $4, 116(%%rsp)\n" |
|
451 " jmp .L_break_loop_with_status\n" |
|
452 |
|
453 ".L_break_loop:\n" |
|
454 " movl $0, 116(%%rsp)\n" |
|
455 |
|
456 ".L_break_loop_with_status:\n" |
|
457 /* put in, out, bits, and hold back into ar and pop esp */ |
|
458 " movq %%rsi, 16(%%rsp)\n" /* in */ |
|
459 " movq %%rdi, 32(%%rsp)\n" /* out */ |
|
460 " movl %%ebx, 88(%%rsp)\n" /* bits */ |
|
461 " movq %%rdx, 80(%%rsp)\n" /* hold */ |
|
462 " movq (%%rsp), %%rax\n" /* restore rbp and rsp */ |
|
463 " movq 8(%%rsp), %%rbp\n" |
|
464 " movq %%rax, %%rsp\n" |
|
465 : |
|
466 : "m" (ar) |
|
467 : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", |
|
468 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" |
|
469 ); |
|
470 #elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 ) |
|
471 __asm__ __volatile__ ( |
|
472 " leal %0, %%eax\n" |
|
473 " movl %%esp, (%%eax)\n" /* save esp, ebp */ |
|
474 " movl %%ebp, 4(%%eax)\n" |
|
475 " movl %%eax, %%esp\n" |
|
476 " movl 8(%%esp), %%esi\n" /* esi = in */ |
|
477 " movl 16(%%esp), %%edi\n" /* edi = out */ |
|
478 " movl 40(%%esp), %%edx\n" /* edx = hold */ |
|
479 " movl 44(%%esp), %%ebx\n" /* ebx = bits */ |
|
480 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */ |
|
481 |
|
482 " cld\n" |
|
483 " jmp .L_do_loop\n" |
|
484 |
|
485 ".align 32,0x90\n" |
|
486 ".L_while_test:\n" |
|
487 " cmpl %%edi, 24(%%esp)\n" /* out < end */ |
|
488 " jbe .L_break_loop\n" |
|
489 " cmpl %%esi, 12(%%esp)\n" /* in < last */ |
|
490 " jbe .L_break_loop\n" |
|
491 |
|
492 ".L_do_loop:\n" |
|
493 " cmpb $15, %%bl\n" |
|
494 " ja .L_get_length_code\n" /* if (15 < bits) */ |
|
495 |
|
496 " xorl %%eax, %%eax\n" |
|
497 " lodsw\n" /* al = *(ushort *)in++ */ |
|
498 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ |
|
499 " addb $16, %%bl\n" /* bits += 16 */ |
|
500 " shll %%cl, %%eax\n" |
|
501 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ |
|
502 |
|
503 ".L_get_length_code:\n" |
|
504 " movl 56(%%esp), %%eax\n" /* eax = lmask */ |
|
505 " andl %%edx, %%eax\n" /* eax &= hold */ |
|
506 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */ |
|
507 |
|
508 ".L_dolen:\n" |
|
509 " movb %%ah, %%cl\n" /* cl = this.bits */ |
|
510 " subb %%ah, %%bl\n" /* bits -= this.bits */ |
|
511 " shrl %%cl, %%edx\n" /* hold >>= this.bits */ |
|
512 |
|
513 " testb %%al, %%al\n" |
|
514 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */ |
|
515 |
|
516 " shrl $16, %%eax\n" /* output this.val char */ |
|
517 " stosb\n" |
|
518 " jmp .L_while_test\n" |
|
519 |
|
520 ".align 32,0x90\n" |
|
521 ".L_test_for_length_base:\n" |
|
522 " movl %%eax, %%ecx\n" /* len = this */ |
|
523 " shrl $16, %%ecx\n" /* len = this.val */ |
|
524 " movl %%ecx, 64(%%esp)\n" /* save len */ |
|
525 " movb %%al, %%cl\n" |
|
526 |
|
527 " testb $16, %%al\n" |
|
528 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */ |
|
529 " andb $15, %%cl\n" /* op &= 15 */ |
|
530 " jz .L_decode_distance\n" /* if (!op) */ |
|
531 " cmpb %%cl, %%bl\n" |
|
532 " jae .L_add_bits_to_len\n" /* if (op <= bits) */ |
|
533 |
|
534 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */ |
|
535 " xorl %%eax, %%eax\n" |
|
536 " lodsw\n" /* al = *(ushort *)in++ */ |
|
537 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ |
|
538 " addb $16, %%bl\n" /* bits += 16 */ |
|
539 " shll %%cl, %%eax\n" |
|
540 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ |
|
541 " movb %%ch, %%cl\n" /* move op back to ecx */ |
|
542 |
|
543 ".L_add_bits_to_len:\n" |
|
544 " subb %%cl, %%bl\n" |
|
545 " xorl %%eax, %%eax\n" |
|
546 " incl %%eax\n" |
|
547 " shll %%cl, %%eax\n" |
|
548 " decl %%eax\n" |
|
549 " andl %%edx, %%eax\n" /* eax &= hold */ |
|
550 " shrl %%cl, %%edx\n" |
|
551 " addl %%eax, 64(%%esp)\n" /* len += hold & mask[op] */ |
|
552 |
|
553 ".L_decode_distance:\n" |
|
554 " cmpb $15, %%bl\n" |
|
555 " ja .L_get_distance_code\n" /* if (15 < bits) */ |
|
556 |
|
557 " xorl %%eax, %%eax\n" |
|
558 " lodsw\n" /* al = *(ushort *)in++ */ |
|
559 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ |
|
560 " addb $16, %%bl\n" /* bits += 16 */ |
|
561 " shll %%cl, %%eax\n" |
|
562 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ |
|
563 |
|
564 ".L_get_distance_code:\n" |
|
565 " movl 60(%%esp), %%eax\n" /* eax = dmask */ |
|
566 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */ |
|
567 " andl %%edx, %%eax\n" /* eax &= hold */ |
|
568 " movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */ |
|
569 |
|
570 ".L_dodist:\n" |
|
571 " movl %%eax, %%ebp\n" /* dist = this */ |
|
572 " shrl $16, %%ebp\n" /* dist = this.val */ |
|
573 " movb %%ah, %%cl\n" |
|
574 " subb %%ah, %%bl\n" /* bits -= this.bits */ |
|
575 " shrl %%cl, %%edx\n" /* hold >>= this.bits */ |
|
576 " movb %%al, %%cl\n" /* cl = this.op */ |
|
577 |
|
578 " testb $16, %%al\n" /* if ((op & 16) == 0) */ |
|
579 " jz .L_test_for_second_level_dist\n" |
|
580 " andb $15, %%cl\n" /* op &= 15 */ |
|
581 " jz .L_check_dist_one\n" |
|
582 " cmpb %%cl, %%bl\n" |
|
583 " jae .L_add_bits_to_dist\n" /* if (op <= bits) 97.6% */ |
|
584 |
|
585 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */ |
|
586 " xorl %%eax, %%eax\n" |
|
587 " lodsw\n" /* al = *(ushort *)in++ */ |
|
588 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ |
|
589 " addb $16, %%bl\n" /* bits += 16 */ |
|
590 " shll %%cl, %%eax\n" |
|
591 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ |
|
592 " movb %%ch, %%cl\n" /* move op back to ecx */ |
|
593 |
|
594 ".L_add_bits_to_dist:\n" |
|
595 " subb %%cl, %%bl\n" |
|
596 " xorl %%eax, %%eax\n" |
|
597 " incl %%eax\n" |
|
598 " shll %%cl, %%eax\n" |
|
599 " decl %%eax\n" /* (1 << op) - 1 */ |
|
600 " andl %%edx, %%eax\n" /* eax &= hold */ |
|
601 " shrl %%cl, %%edx\n" |
|
602 " addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */ |
|
603 |
|
604 ".L_check_window:\n" |
|
605 " movl %%esi, 8(%%esp)\n" /* save in so from can use it's reg */ |
|
606 " movl %%edi, %%eax\n" |
|
607 " subl 20(%%esp), %%eax\n" /* nbytes = out - beg */ |
|
608 |
|
609 " cmpl %%ebp, %%eax\n" |
|
610 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */ |
|
611 |
|
612 " movl 64(%%esp), %%ecx\n" /* ecx = len */ |
|
613 " movl %%edi, %%esi\n" |
|
614 " subl %%ebp, %%esi\n" /* from = out - dist */ |
|
615 |
|
616 " sarl %%ecx\n" |
|
617 " jnc .L_copy_two\n" /* if len % 2 == 0 */ |
|
618 |
|
619 " rep movsw\n" |
|
620 " movb (%%esi), %%al\n" |
|
621 " movb %%al, (%%edi)\n" |
|
622 " incl %%edi\n" |
|
623 |
|
624 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */ |
|
625 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */ |
|
626 " jmp .L_while_test\n" |
|
627 |
|
628 ".L_copy_two:\n" |
|
629 " rep movsw\n" |
|
630 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */ |
|
631 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */ |
|
632 " jmp .L_while_test\n" |
|
633 |
|
634 ".align 32,0x90\n" |
|
635 ".L_check_dist_one:\n" |
|
636 " cmpl $1, %%ebp\n" /* if dist 1, is a memset */ |
|
637 " jne .L_check_window\n" |
|
638 " cmpl %%edi, 20(%%esp)\n" |
|
639 " je .L_check_window\n" /* out == beg, if outside window */ |
|
640 |
|
641 " movl 64(%%esp), %%ecx\n" /* ecx = len */ |
|
642 " movb -1(%%edi), %%al\n" |
|
643 " movb %%al, %%ah\n" |
|
644 |
|
645 " sarl %%ecx\n" |
|
646 " jnc .L_set_two\n" |
|
647 " movb %%al, (%%edi)\n" |
|
648 " incl %%edi\n" |
|
649 |
|
650 ".L_set_two:\n" |
|
651 " rep stosw\n" |
|
652 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */ |
|
653 " jmp .L_while_test\n" |
|
654 |
|
655 ".align 32,0x90\n" |
|
656 ".L_test_for_second_level_length:\n" |
|
657 " testb $64, %%al\n" |
|
658 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */ |
|
659 |
|
660 " xorl %%eax, %%eax\n" |
|
661 " incl %%eax\n" |
|
662 " shll %%cl, %%eax\n" |
|
663 " decl %%eax\n" |
|
664 " andl %%edx, %%eax\n" /* eax &= hold */ |
|
665 " addl 64(%%esp), %%eax\n" /* eax += len */ |
|
666 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/ |
|
667 " jmp .L_dolen\n" |
|
668 |
|
669 ".align 32,0x90\n" |
|
670 ".L_test_for_second_level_dist:\n" |
|
671 " testb $64, %%al\n" |
|
672 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */ |
|
673 |
|
674 " xorl %%eax, %%eax\n" |
|
675 " incl %%eax\n" |
|
676 " shll %%cl, %%eax\n" |
|
677 " decl %%eax\n" |
|
678 " andl %%edx, %%eax\n" /* eax &= hold */ |
|
679 " addl %%ebp, %%eax\n" /* eax += dist */ |
|
680 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */ |
|
681 " movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/ |
|
682 " jmp .L_dodist\n" |
|
683 |
|
684 ".align 32,0x90\n" |
|
685 ".L_clip_window:\n" |
|
686 " movl %%eax, %%ecx\n" |
|
687 " movl 48(%%esp), %%eax\n" /* eax = wsize */ |
|
688 " negl %%ecx\n" /* nbytes = -nbytes */ |
|
689 " movl 28(%%esp), %%esi\n" /* from = window */ |
|
690 |
|
691 " cmpl %%ebp, %%eax\n" |
|
692 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */ |
|
693 |
|
694 " addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */ |
|
695 " cmpl $0, 52(%%esp)\n" |
|
696 " jne .L_wrap_around_window\n" /* if (write != 0) */ |
|
697 |
|
698 " subl %%ecx, %%eax\n" |
|
699 " addl %%eax, %%esi\n" /* from += wsize - nbytes */ |
|
700 |
|
701 " movl 64(%%esp), %%eax\n" /* eax = len */ |
|
702 " cmpl %%ecx, %%eax\n" |
|
703 " jbe .L_do_copy\n" /* if (nbytes >= len) */ |
|
704 |
|
705 " subl %%ecx, %%eax\n" /* len -= nbytes */ |
|
706 " rep movsb\n" |
|
707 " movl %%edi, %%esi\n" |
|
708 " subl %%ebp, %%esi\n" /* from = out - dist */ |
|
709 " jmp .L_do_copy\n" |
|
710 |
|
711 ".align 32,0x90\n" |
|
712 ".L_wrap_around_window:\n" |
|
713 " movl 52(%%esp), %%eax\n" /* eax = write */ |
|
714 " cmpl %%eax, %%ecx\n" |
|
715 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */ |
|
716 |
|
717 " addl 48(%%esp), %%esi\n" /* from += wsize */ |
|
718 " addl %%eax, %%esi\n" /* from += write */ |
|
719 " subl %%ecx, %%esi\n" /* from -= nbytes */ |
|
720 " subl %%eax, %%ecx\n" /* nbytes -= write */ |
|
721 |
|
722 " movl 64(%%esp), %%eax\n" /* eax = len */ |
|
723 " cmpl %%ecx, %%eax\n" |
|
724 " jbe .L_do_copy\n" /* if (nbytes >= len) */ |
|
725 |
|
726 " subl %%ecx, %%eax\n" /* len -= nbytes */ |
|
727 " rep movsb\n" |
|
728 " movl 28(%%esp), %%esi\n" /* from = window */ |
|
729 " movl 52(%%esp), %%ecx\n" /* nbytes = write */ |
|
730 " cmpl %%ecx, %%eax\n" |
|
731 " jbe .L_do_copy\n" /* if (nbytes >= len) */ |
|
732 |
|
733 " subl %%ecx, %%eax\n" /* len -= nbytes */ |
|
734 " rep movsb\n" |
|
735 " movl %%edi, %%esi\n" |
|
736 " subl %%ebp, %%esi\n" /* from = out - dist */ |
|
737 " jmp .L_do_copy\n" |
|
738 |
|
739 ".align 32,0x90\n" |
|
740 ".L_contiguous_in_window:\n" |
|
741 " addl %%eax, %%esi\n" |
|
742 " subl %%ecx, %%esi\n" /* from += write - nbytes */ |
|
743 |
|
744 " movl 64(%%esp), %%eax\n" /* eax = len */ |
|
745 " cmpl %%ecx, %%eax\n" |
|
746 " jbe .L_do_copy\n" /* if (nbytes >= len) */ |
|
747 |
|
748 " subl %%ecx, %%eax\n" /* len -= nbytes */ |
|
749 " rep movsb\n" |
|
750 " movl %%edi, %%esi\n" |
|
751 " subl %%ebp, %%esi\n" /* from = out - dist */ |
|
752 " jmp .L_do_copy\n" /* if (nbytes >= len) */ |
|
753 |
|
754 ".align 32,0x90\n" |
|
755 ".L_do_copy:\n" |
|
756 " movl %%eax, %%ecx\n" |
|
757 " rep movsb\n" |
|
758 |
|
759 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */ |
|
760 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */ |
|
761 " jmp .L_while_test\n" |
|
762 |
|
763 ".L_test_for_end_of_block:\n" |
|
764 " testb $32, %%al\n" |
|
765 " jz .L_invalid_literal_length_code\n" |
|
766 " movl $1, 72(%%esp)\n" |
|
767 " jmp .L_break_loop_with_status\n" |
|
768 |
|
769 ".L_invalid_literal_length_code:\n" |
|
770 " movl $2, 72(%%esp)\n" |
|
771 " jmp .L_break_loop_with_status\n" |
|
772 |
|
773 ".L_invalid_distance_code:\n" |
|
774 " movl $3, 72(%%esp)\n" |
|
775 " jmp .L_break_loop_with_status\n" |
|
776 |
|
777 ".L_invalid_distance_too_far:\n" |
|
778 " movl 8(%%esp), %%esi\n" |
|
779 " movl $4, 72(%%esp)\n" |
|
780 " jmp .L_break_loop_with_status\n" |
|
781 |
|
782 ".L_break_loop:\n" |
|
783 " movl $0, 72(%%esp)\n" |
|
784 |
|
785 ".L_break_loop_with_status:\n" |
|
786 /* put in, out, bits, and hold back into ar and pop esp */ |
|
787 " movl %%esi, 8(%%esp)\n" /* save in */ |
|
788 " movl %%edi, 16(%%esp)\n" /* save out */ |
|
789 " movl %%ebx, 44(%%esp)\n" /* save bits */ |
|
790 " movl %%edx, 40(%%esp)\n" /* save hold */ |
|
791 " movl 4(%%esp), %%ebp\n" /* restore esp, ebp */ |
|
792 " movl (%%esp), %%esp\n" |
|
793 : |
|
794 : "m" (ar) |
|
795 : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" |
|
796 ); |
|
797 #elif defined( _MSC_VER ) && ! defined( _M_AMD64 ) |
|
798 __asm { |
|
799 lea eax, ar |
|
800 mov [eax], esp /* save esp, ebp */ |
|
801 mov [eax+4], ebp |
|
802 mov esp, eax |
|
803 mov esi, [esp+8] /* esi = in */ |
|
804 mov edi, [esp+16] /* edi = out */ |
|
805 mov edx, [esp+40] /* edx = hold */ |
|
806 mov ebx, [esp+44] /* ebx = bits */ |
|
807 mov ebp, [esp+32] /* ebp = lcode */ |
|
808 |
|
809 cld |
|
810 jmp L_do_loop |
|
811 |
|
812 ALIGN 4 |
|
813 L_while_test: |
|
814 cmp [esp+24], edi |
|
815 jbe L_break_loop |
|
816 cmp [esp+12], esi |
|
817 jbe L_break_loop |
|
818 |
|
819 L_do_loop: |
|
820 cmp bl, 15 |
|
821 ja L_get_length_code /* if (15 < bits) */ |
|
822 |
|
823 xor eax, eax |
|
824 lodsw /* al = *(ushort *)in++ */ |
|
825 mov cl, bl /* cl = bits, needs it for shifting */ |
|
826 add bl, 16 /* bits += 16 */ |
|
827 shl eax, cl |
|
828 or edx, eax /* hold |= *((ushort *)in)++ << bits */ |
|
829 |
|
830 L_get_length_code: |
|
831 mov eax, [esp+56] /* eax = lmask */ |
|
832 and eax, edx /* eax &= hold */ |
|
833 mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */ |
|
834 |
|
835 L_dolen: |
|
836 mov cl, ah /* cl = this.bits */ |
|
837 sub bl, ah /* bits -= this.bits */ |
|
838 shr edx, cl /* hold >>= this.bits */ |
|
839 |
|
840 test al, al |
|
841 jnz L_test_for_length_base /* if (op != 0) 45.7% */ |
|
842 |
|
843 shr eax, 16 /* output this.val char */ |
|
844 stosb |
|
845 jmp L_while_test |
|
846 |
|
847 ALIGN 4 |
|
848 L_test_for_length_base: |
|
849 mov ecx, eax /* len = this */ |
|
850 shr ecx, 16 /* len = this.val */ |
|
851 mov [esp+64], ecx /* save len */ |
|
852 mov cl, al |
|
853 |
|
854 test al, 16 |
|
855 jz L_test_for_second_level_length /* if ((op & 16) == 0) 8% */ |
|
856 and cl, 15 /* op &= 15 */ |
|
857 jz L_decode_distance /* if (!op) */ |
|
858 cmp bl, cl |
|
859 jae L_add_bits_to_len /* if (op <= bits) */ |
|
860 |
|
861 mov ch, cl /* stash op in ch, freeing cl */ |
|
862 xor eax, eax |
|
863 lodsw /* al = *(ushort *)in++ */ |
|
864 mov cl, bl /* cl = bits, needs it for shifting */ |
|
865 add bl, 16 /* bits += 16 */ |
|
866 shl eax, cl |
|
867 or edx, eax /* hold |= *((ushort *)in)++ << bits */ |
|
868 mov cl, ch /* move op back to ecx */ |
|
869 |
|
870 L_add_bits_to_len: |
|
871 sub bl, cl |
|
872 xor eax, eax |
|
873 inc eax |
|
874 shl eax, cl |
|
875 dec eax |
|
876 and eax, edx /* eax &= hold */ |
|
877 shr edx, cl |
|
878 add [esp+64], eax /* len += hold & mask[op] */ |
|
879 |
|
880 L_decode_distance: |
|
881 cmp bl, 15 |
|
882 ja L_get_distance_code /* if (15 < bits) */ |
|
883 |
|
884 xor eax, eax |
|
885 lodsw /* al = *(ushort *)in++ */ |
|
886 mov cl, bl /* cl = bits, needs it for shifting */ |
|
887 add bl, 16 /* bits += 16 */ |
|
888 shl eax, cl |
|
889 or edx, eax /* hold |= *((ushort *)in)++ << bits */ |
|
890 |
|
891 L_get_distance_code: |
|
892 mov eax, [esp+60] /* eax = dmask */ |
|
893 mov ecx, [esp+36] /* ecx = dcode */ |
|
894 and eax, edx /* eax &= hold */ |
|
895 mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */ |
|
896 |
|
897 L_dodist: |
|
898 mov ebp, eax /* dist = this */ |
|
899 shr ebp, 16 /* dist = this.val */ |
|
900 mov cl, ah |
|
901 sub bl, ah /* bits -= this.bits */ |
|
902 shr edx, cl /* hold >>= this.bits */ |
|
903 mov cl, al /* cl = this.op */ |
|
904 |
|
905 test al, 16 /* if ((op & 16) == 0) */ |
|
906 jz L_test_for_second_level_dist |
|
907 and cl, 15 /* op &= 15 */ |
|
908 jz L_check_dist_one |
|
909 cmp bl, cl |
|
910 jae L_add_bits_to_dist /* if (op <= bits) 97.6% */ |
|
911 |
|
912 mov ch, cl /* stash op in ch, freeing cl */ |
|
913 xor eax, eax |
|
914 lodsw /* al = *(ushort *)in++ */ |
|
915 mov cl, bl /* cl = bits, needs it for shifting */ |
|
916 add bl, 16 /* bits += 16 */ |
|
917 shl eax, cl |
|
918 or edx, eax /* hold |= *((ushort *)in)++ << bits */ |
|
919 mov cl, ch /* move op back to ecx */ |
|
920 |
|
921 L_add_bits_to_dist: |
|
922 sub bl, cl |
|
923 xor eax, eax |
|
924 inc eax |
|
925 shl eax, cl |
|
926 dec eax /* (1 << op) - 1 */ |
|
927 and eax, edx /* eax &= hold */ |
|
928 shr edx, cl |
|
929 add ebp, eax /* dist += hold & ((1 << op) - 1) */ |
|
930 |
|
931 L_check_window: |
|
932 mov [esp+8], esi /* save in so from can use it's reg */ |
|
933 mov eax, edi |
|
934 sub eax, [esp+20] /* nbytes = out - beg */ |
|
935 |
|
936 cmp eax, ebp |
|
937 jb L_clip_window /* if (dist > nbytes) 4.2% */ |
|
938 |
|
939 mov ecx, [esp+64] /* ecx = len */ |
|
940 mov esi, edi |
|
941 sub esi, ebp /* from = out - dist */ |
|
942 |
|
943 sar ecx, 1 |
|
944 jnc L_copy_two |
|
945 |
|
946 rep movsw |
|
947 mov al, [esi] |
|
948 mov [edi], al |
|
949 inc edi |
|
950 |
|
951 mov esi, [esp+8] /* move in back to %esi, toss from */ |
|
952 mov ebp, [esp+32] /* ebp = lcode */ |
|
953 jmp L_while_test |
|
954 |
|
955 L_copy_two: |
|
956 rep movsw |
|
957 mov esi, [esp+8] /* move in back to %esi, toss from */ |
|
958 mov ebp, [esp+32] /* ebp = lcode */ |
|
959 jmp L_while_test |
|
960 |
|
961 ALIGN 4 |
|
962 L_check_dist_one: |
|
963 cmp ebp, 1 /* if dist 1, is a memset */ |
|
964 jne L_check_window |
|
965 cmp [esp+20], edi |
|
966 je L_check_window /* out == beg, if outside window */ |
|
967 |
|
968 mov ecx, [esp+64] /* ecx = len */ |
|
969 mov al, [edi-1] |
|
970 mov ah, al |
|
971 |
|
972 sar ecx, 1 |
|
973 jnc L_set_two |
|
974 mov [edi], al /* memset out with from[-1] */ |
|
975 inc edi |
|
976 |
|
977 L_set_two: |
|
978 rep stosw |
|
979 mov ebp, [esp+32] /* ebp = lcode */ |
|
980 jmp L_while_test |
|
981 |
|
982 ALIGN 4 |
|
983 L_test_for_second_level_length: |
|
984 test al, 64 |
|
985 jnz L_test_for_end_of_block /* if ((op & 64) != 0) */ |
|
986 |
|
987 xor eax, eax |
|
988 inc eax |
|
989 shl eax, cl |
|
990 dec eax |
|
991 and eax, edx /* eax &= hold */ |
|
992 add eax, [esp+64] /* eax += len */ |
|
993 mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/ |
|
994 jmp L_dolen |
|
995 |
|
996 ALIGN 4 |
|
997 L_test_for_second_level_dist: |
|
998 test al, 64 |
|
999 jnz L_invalid_distance_code /* if ((op & 64) != 0) */ |
|
1000 |
|
1001 xor eax, eax |
|
1002 inc eax |
|
1003 shl eax, cl |
|
1004 dec eax |
|
1005 and eax, edx /* eax &= hold */ |
|
1006 add eax, ebp /* eax += dist */ |
|
1007 mov ecx, [esp+36] /* ecx = dcode */ |
|
1008 mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/ |
|
1009 jmp L_dodist |
|
1010 |
|
1011 ALIGN 4 |
|
1012 L_clip_window: |
|
1013 mov ecx, eax |
|
1014 mov eax, [esp+48] /* eax = wsize */ |
|
1015 neg ecx /* nbytes = -nbytes */ |
|
1016 mov esi, [esp+28] /* from = window */ |
|
1017 |
|
1018 cmp eax, ebp |
|
1019 jb L_invalid_distance_too_far /* if (dist > wsize) */ |
|
1020 |
|
1021 add ecx, ebp /* nbytes = dist - nbytes */ |
|
1022 cmp dword ptr [esp+52], 0 |
|
1023 jne L_wrap_around_window /* if (write != 0) */ |
|
1024 |
|
1025 sub eax, ecx |
|
1026 add esi, eax /* from += wsize - nbytes */ |
|
1027 |
|
1028 mov eax, [esp+64] /* eax = len */ |
|
1029 cmp eax, ecx |
|
1030 jbe L_do_copy /* if (nbytes >= len) */ |
|
1031 |
|
1032 sub eax, ecx /* len -= nbytes */ |
|
1033 rep movsb |
|
1034 mov esi, edi |
|
1035 sub esi, ebp /* from = out - dist */ |
|
1036 jmp L_do_copy |
|
1037 |
|
1038 ALIGN 4 |
|
1039 L_wrap_around_window: |
|
1040 mov eax, [esp+52] /* eax = write */ |
|
1041 cmp ecx, eax |
|
1042 jbe L_contiguous_in_window /* if (write >= nbytes) */ |
|
1043 |
|
1044 add esi, [esp+48] /* from += wsize */ |
|
1045 add esi, eax /* from += write */ |
|
1046 sub esi, ecx /* from -= nbytes */ |
|
1047 sub ecx, eax /* nbytes -= write */ |
|
1048 |
|
1049 mov eax, [esp+64] /* eax = len */ |
|
1050 cmp eax, ecx |
|
1051 jbe L_do_copy /* if (nbytes >= len) */ |
|
1052 |
|
1053 sub eax, ecx /* len -= nbytes */ |
|
1054 rep movsb |
|
1055 mov esi, [esp+28] /* from = window */ |
|
1056 mov ecx, [esp+52] /* nbytes = write */ |
|
1057 cmp eax, ecx |
|
1058 jbe L_do_copy /* if (nbytes >= len) */ |
|
1059 |
|
1060 sub eax, ecx /* len -= nbytes */ |
|
1061 rep movsb |
|
1062 mov esi, edi |
|
1063 sub esi, ebp /* from = out - dist */ |
|
1064 jmp L_do_copy |
|
1065 |
|
1066 ALIGN 4 |
|
1067 L_contiguous_in_window: |
|
1068 add esi, eax |
|
1069 sub esi, ecx /* from += write - nbytes */ |
|
1070 |
|
1071 mov eax, [esp+64] /* eax = len */ |
|
1072 cmp eax, ecx |
|
1073 jbe L_do_copy /* if (nbytes >= len) */ |
|
1074 |
|
1075 sub eax, ecx /* len -= nbytes */ |
|
1076 rep movsb |
|
1077 mov esi, edi |
|
1078 sub esi, ebp /* from = out - dist */ |
|
1079 jmp L_do_copy |
|
1080 |
|
1081 ALIGN 4 |
|
1082 L_do_copy: |
|
1083 mov ecx, eax |
|
1084 rep movsb |
|
1085 |
|
1086 mov esi, [esp+8] /* move in back to %esi, toss from */ |
|
1087 mov ebp, [esp+32] /* ebp = lcode */ |
|
1088 jmp L_while_test |
|
1089 |
|
1090 L_test_for_end_of_block: |
|
1091 test al, 32 |
|
1092 jz L_invalid_literal_length_code |
|
1093 mov dword ptr [esp+72], 1 |
|
1094 jmp L_break_loop_with_status |
|
1095 |
|
1096 L_invalid_literal_length_code: |
|
1097 mov dword ptr [esp+72], 2 |
|
1098 jmp L_break_loop_with_status |
|
1099 |
|
1100 L_invalid_distance_code: |
|
1101 mov dword ptr [esp+72], 3 |
|
1102 jmp L_break_loop_with_status |
|
1103 |
|
1104 L_invalid_distance_too_far: |
|
1105 mov esi, [esp+4] |
|
1106 mov dword ptr [esp+72], 4 |
|
1107 jmp L_break_loop_with_status |
|
1108 |
|
1109 L_break_loop: |
|
1110 mov dword ptr [esp+72], 0 |
|
1111 |
|
1112 L_break_loop_with_status: |
|
1113 /* put in, out, bits, and hold back into ar and pop esp */ |
|
1114 mov [esp+8], esi /* save in */ |
|
1115 mov [esp+16], edi /* save out */ |
|
1116 mov [esp+44], ebx /* save bits */ |
|
1117 mov [esp+40], edx /* save hold */ |
|
1118 mov ebp, [esp+4] /* restore esp, ebp */ |
|
1119 mov esp, [esp] |
|
1120 } |
|
1121 #else |
|
1122 #error "x86 architecture not defined" |
|
1123 #endif |
|
1124 |
|
1125 if (ar.status > 1) { |
|
1126 if (ar.status == 2) |
|
1127 strm->msg = "invalid literal/length code"; |
|
1128 else if (ar.status == 3) |
|
1129 strm->msg = "invalid distance code"; |
|
1130 else |
|
1131 strm->msg = "invalid distance too far back"; |
|
1132 state->mode = BAD; |
|
1133 } |
|
1134 else if ( ar.status == 1 ) { |
|
1135 state->mode = TYPE; |
|
1136 } |
|
1137 |
|
1138 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ |
|
1139 ar.len = ar.bits >> 3; |
|
1140 ar.in -= ar.len; |
|
1141 ar.bits -= ar.len << 3; |
|
1142 ar.hold &= (1U << ar.bits) - 1; |
|
1143 |
|
1144 /* update state and return */ |
|
1145 strm->next_in = ar.in; |
|
1146 strm->next_out = ar.out; |
|
1147 strm->avail_in = (unsigned)(ar.in < ar.last ? |
|
1148 PAD_AVAIL_IN + (ar.last - ar.in) : |
|
1149 PAD_AVAIL_IN - (ar.in - ar.last)); |
|
1150 strm->avail_out = (unsigned)(ar.out < ar.end ? |
|
1151 PAD_AVAIL_OUT + (ar.end - ar.out) : |
|
1152 PAD_AVAIL_OUT - (ar.out - ar.end)); |
|
1153 state->hold = ar.hold; |
|
1154 state->bits = ar.bits; |
|
1155 return; |
|
1156 } |
|
1157 |