symbian-qemu-0.9.1-12/zlib-1.2.3/contrib/asm686/match.S
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 /* match.s -- Pentium-Pro-optimized version of longest_match()
       
     2  * Written for zlib 1.1.2
       
     3  * Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
       
     4  *
       
     5  * This is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License.
       
     7  */
       
     8 
       
     9 #ifndef NO_UNDERLINE
       
    10 #define	match_init	_match_init
       
    11 #define	longest_match	_longest_match
       
    12 #endif
       
    13 
       
    14 #define	MAX_MATCH	(258)
       
    15 #define	MIN_MATCH	(3)
       
    16 #define	MIN_LOOKAHEAD	(MAX_MATCH + MIN_MATCH + 1)
       
    17 #define	MAX_MATCH_8	((MAX_MATCH + 7) & ~7)
       
    18 
       
    19 /* stack frame offsets */
       
    20 
       
    21 #define	chainlenwmask		0	/* high word: current chain len	*/
       
    22 					/* low word: s->wmask		*/
       
    23 #define	window			4	/* local copy of s->window	*/
       
    24 #define	windowbestlen		8	/* s->window + bestlen		*/
       
    25 #define	scanstart		16	/* first two bytes of string	*/
       
    26 #define	scanend			12	/* last two bytes of string	*/
       
    27 #define	scanalign		20	/* dword-misalignment of string	*/
       
    28 #define	nicematch		24	/* a good enough match size	*/
       
    29 #define	bestlen			28	/* size of best match so far	*/
       
    30 #define	scan			32	/* ptr to string wanting match	*/
       
    31 
       
    32 #define	LocalVarsSize		(36)
       
    33 /*	saved ebx		36 */
       
    34 /*	saved edi		40 */
       
    35 /*	saved esi		44 */
       
    36 /*	saved ebp		48 */
       
    37 /*	return address		52 */
       
    38 #define	deflatestate		56	/* the function arguments	*/
       
    39 #define	curmatch		60
       
    40 
       
    41 /* All the +zlib1222add offsets are due to the addition of fields
       
    42  *  in zlib in the deflate_state structure since the asm code was first written
       
    43  * (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
       
    44  * (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
       
    45  * if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
       
    46  */
       
    47 
       
    48 #define zlib1222add		(8)
       
    49 
       
    50 #define	dsWSize			(36+zlib1222add)
       
    51 #define	dsWMask			(44+zlib1222add)
       
    52 #define	dsWindow		(48+zlib1222add)
       
    53 #define	dsPrev			(56+zlib1222add)
       
    54 #define	dsMatchLen		(88+zlib1222add)
       
    55 #define	dsPrevMatch		(92+zlib1222add)
       
    56 #define	dsStrStart		(100+zlib1222add)
       
    57 #define	dsMatchStart		(104+zlib1222add)
       
    58 #define	dsLookahead		(108+zlib1222add)
       
    59 #define	dsPrevLen		(112+zlib1222add)
       
    60 #define	dsMaxChainLen		(116+zlib1222add)
       
    61 #define	dsGoodMatch		(132+zlib1222add)
       
    62 #define	dsNiceMatch		(136+zlib1222add)
       
    63 
       
    64 
       
    65 .file "match.S"
       
    66 
       
    67 .globl	match_init, longest_match
       
    68 
       
    69 .text
       
    70 
       
    71 /* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */
       
    72 
       
    73 longest_match:
       
    74 
       
    75 /* Save registers that the compiler may be using, and adjust %esp to	*/
       
    76 /* make room for our stack frame.					*/
       
    77 
       
    78 		pushl	%ebp
       
    79 		pushl	%edi
       
    80 		pushl	%esi
       
    81 		pushl	%ebx
       
    82 		subl	$LocalVarsSize, %esp
       
    83 
       
    84 /* Retrieve the function arguments. %ecx will hold cur_match		*/
       
    85 /* throughout the entire function. %edx will hold the pointer to the	*/
       
    86 /* deflate_state structure during the function's setup (before		*/
       
    87 /* entering the main loop).						*/
       
    88 
       
    89 		movl	deflatestate(%esp), %edx
       
    90 		movl	curmatch(%esp), %ecx
       
    91 
       
    92 /* uInt wmask = s->w_mask;						*/
       
    93 /* unsigned chain_length = s->max_chain_length;				*/
       
    94 /* if (s->prev_length >= s->good_match) {				*/
       
    95 /*     chain_length >>= 2;						*/
       
    96 /* }									*/
       
    97 
       
    98 		movl	dsPrevLen(%edx), %eax
       
    99 		movl	dsGoodMatch(%edx), %ebx
       
   100 		cmpl	%ebx, %eax
       
   101 		movl	dsWMask(%edx), %eax
       
   102 		movl	dsMaxChainLen(%edx), %ebx
       
   103 		jl	LastMatchGood
       
   104 		shrl	$2, %ebx
       
   105 LastMatchGood:
       
   106 
       
   107 /* chainlen is decremented once beforehand so that the function can	*/
       
   108 /* use the sign flag instead of the zero flag for the exit test.	*/
       
   109 /* It is then shifted into the high word, to make room for the wmask	*/
       
   110 /* value, which it will always accompany.				*/
       
   111 
       
   112 		decl	%ebx
       
   113 		shll	$16, %ebx
       
   114 		orl	%eax, %ebx
       
   115 		movl	%ebx, chainlenwmask(%esp)
       
   116 
       
   117 /* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;	*/
       
   118 
       
   119 		movl	dsNiceMatch(%edx), %eax
       
   120 		movl	dsLookahead(%edx), %ebx
       
   121 		cmpl	%eax, %ebx
       
   122 		jl	LookaheadLess
       
   123 		movl	%eax, %ebx
       
   124 LookaheadLess:	movl	%ebx, nicematch(%esp)
       
   125 
       
   126 /* register Bytef *scan = s->window + s->strstart;			*/
       
   127 
       
   128 		movl	dsWindow(%edx), %esi
       
   129 		movl	%esi, window(%esp)
       
   130 		movl	dsStrStart(%edx), %ebp
       
   131 		lea	(%esi,%ebp), %edi
       
   132 		movl	%edi, scan(%esp)
       
   133 
       
   134 /* Determine how many bytes the scan ptr is off from being		*/
       
   135 /* dword-aligned.							*/
       
   136 
       
   137 		movl	%edi, %eax
       
   138 		negl	%eax
       
   139 		andl	$3, %eax
       
   140 		movl	%eax, scanalign(%esp)
       
   141 
       
   142 /* IPos limit = s->strstart > (IPos)MAX_DIST(s) ?			*/
       
   143 /*     s->strstart - (IPos)MAX_DIST(s) : NIL;				*/
       
   144 
       
   145 		movl	dsWSize(%edx), %eax
       
   146 		subl	$MIN_LOOKAHEAD, %eax
       
   147 		subl	%eax, %ebp
       
   148 		jg	LimitPositive
       
   149 		xorl	%ebp, %ebp
       
   150 LimitPositive:
       
   151 
       
   152 /* int best_len = s->prev_length;					*/
       
   153 
       
   154 		movl	dsPrevLen(%edx), %eax
       
   155 		movl	%eax, bestlen(%esp)
       
   156 
       
   157 /* Store the sum of s->window + best_len in %esi locally, and in %esi.	*/
       
   158 
       
   159 		addl	%eax, %esi
       
   160 		movl	%esi, windowbestlen(%esp)
       
   161 
       
   162 /* register ush scan_start = *(ushf*)scan;				*/
       
   163 /* register ush scan_end   = *(ushf*)(scan+best_len-1);			*/
       
   164 /* Posf *prev = s->prev;						*/
       
   165 
       
   166 		movzwl	(%edi), %ebx
       
   167 		movl	%ebx, scanstart(%esp)
       
   168 		movzwl	-1(%edi,%eax), %ebx
       
   169 		movl	%ebx, scanend(%esp)
       
   170 		movl	dsPrev(%edx), %edi
       
   171 
       
   172 /* Jump into the main loop.						*/
       
   173 
       
   174 		movl	chainlenwmask(%esp), %edx
       
   175 		jmp	LoopEntry
       
   176 
       
   177 .balign 16
       
   178 
       
   179 /* do {
       
   180  *     match = s->window + cur_match;
       
   181  *     if (*(ushf*)(match+best_len-1) != scan_end ||
       
   182  *         *(ushf*)match != scan_start) continue;
       
   183  *     [...]
       
   184  * } while ((cur_match = prev[cur_match & wmask]) > limit
       
   185  *          && --chain_length != 0);
       
   186  *
       
   187  * Here is the inner loop of the function. The function will spend the
       
   188  * majority of its time in this loop, and majority of that time will
       
   189  * be spent in the first ten instructions.
       
   190  *
       
   191  * Within this loop:
       
   192  * %ebx = scanend
       
   193  * %ecx = curmatch
       
   194  * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
       
   195  * %esi = windowbestlen - i.e., (window + bestlen)
       
   196  * %edi = prev
       
   197  * %ebp = limit
       
   198  */
       
   199 LookupLoop:
       
   200 		andl	%edx, %ecx
       
   201 		movzwl	(%edi,%ecx,2), %ecx
       
   202 		cmpl	%ebp, %ecx
       
   203 		jbe	LeaveNow
       
   204 		subl	$0x00010000, %edx
       
   205 		js	LeaveNow
       
   206 LoopEntry:	movzwl	-1(%esi,%ecx), %eax
       
   207 		cmpl	%ebx, %eax
       
   208 		jnz	LookupLoop
       
   209 		movl	window(%esp), %eax
       
   210 		movzwl	(%eax,%ecx), %eax
       
   211 		cmpl	scanstart(%esp), %eax
       
   212 		jnz	LookupLoop
       
   213 
       
   214 /* Store the current value of chainlen.					*/
       
   215 
       
   216 		movl	%edx, chainlenwmask(%esp)
       
   217 
       
   218 /* Point %edi to the string under scrutiny, and %esi to the string we	*/
       
   219 /* are hoping to match it up with. In actuality, %esi and %edi are	*/
       
   220 /* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is	*/
       
   221 /* initialized to -(MAX_MATCH_8 - scanalign).				*/
       
   222 
       
   223 		movl	window(%esp), %esi
       
   224 		movl	scan(%esp), %edi
       
   225 		addl	%ecx, %esi
       
   226 		movl	scanalign(%esp), %eax
       
   227 		movl	$(-MAX_MATCH_8), %edx
       
   228 		lea	MAX_MATCH_8(%edi,%eax), %edi
       
   229 		lea	MAX_MATCH_8(%esi,%eax), %esi
       
   230 
       
   231 /* Test the strings for equality, 8 bytes at a time. At the end,
       
   232  * adjust %edx so that it is offset to the exact byte that mismatched.
       
   233  *
       
   234  * We already know at this point that the first three bytes of the
       
   235  * strings match each other, and they can be safely passed over before
       
   236  * starting the compare loop. So what this code does is skip over 0-3
       
   237  * bytes, as much as necessary in order to dword-align the %edi
       
   238  * pointer. (%esi will still be misaligned three times out of four.)
       
   239  *
       
   240  * It should be confessed that this loop usually does not represent
       
   241  * much of the total running time. Replacing it with a more
       
   242  * straightforward "rep cmpsb" would not drastically degrade
       
   243  * performance.
       
   244  */
       
   245 LoopCmps:
       
   246 		movl	(%esi,%edx), %eax
       
   247 		xorl	(%edi,%edx), %eax
       
   248 		jnz	LeaveLoopCmps
       
   249 		movl	4(%esi,%edx), %eax
       
   250 		xorl	4(%edi,%edx), %eax
       
   251 		jnz	LeaveLoopCmps4
       
   252 		addl	$8, %edx
       
   253 		jnz	LoopCmps
       
   254 		jmp	LenMaximum
       
   255 LeaveLoopCmps4:	addl	$4, %edx
       
   256 LeaveLoopCmps:	testl	$0x0000FFFF, %eax
       
   257 		jnz	LenLower
       
   258 		addl	$2, %edx
       
   259 		shrl	$16, %eax
       
   260 LenLower:	subb	$1, %al
       
   261 		adcl	$0, %edx
       
   262 
       
   263 /* Calculate the length of the match. If it is longer than MAX_MATCH,	*/
       
   264 /* then automatically accept it as the best possible match and leave.	*/
       
   265 
       
   266 		lea	(%edi,%edx), %eax
       
   267 		movl	scan(%esp), %edi
       
   268 		subl	%edi, %eax
       
   269 		cmpl	$MAX_MATCH, %eax
       
   270 		jge	LenMaximum
       
   271 
       
   272 /* If the length of the match is not longer than the best match we	*/
       
   273 /* have so far, then forget it and return to the lookup loop.		*/
       
   274 
       
   275 		movl	deflatestate(%esp), %edx
       
   276 		movl	bestlen(%esp), %ebx
       
   277 		cmpl	%ebx, %eax
       
   278 		jg	LongerMatch
       
   279 		movl	windowbestlen(%esp), %esi
       
   280 		movl	dsPrev(%edx), %edi
       
   281 		movl	scanend(%esp), %ebx
       
   282 		movl	chainlenwmask(%esp), %edx
       
   283 		jmp	LookupLoop
       
   284 
       
   285 /*         s->match_start = cur_match;					*/
       
   286 /*         best_len = len;						*/
       
   287 /*         if (len >= nice_match) break;				*/
       
   288 /*         scan_end = *(ushf*)(scan+best_len-1);			*/
       
   289 
       
   290 LongerMatch:	movl	nicematch(%esp), %ebx
       
   291 		movl	%eax, bestlen(%esp)
       
   292 		movl	%ecx, dsMatchStart(%edx)
       
   293 		cmpl	%ebx, %eax
       
   294 		jge	LeaveNow
       
   295 		movl	window(%esp), %esi
       
   296 		addl	%eax, %esi
       
   297 		movl	%esi, windowbestlen(%esp)
       
   298 		movzwl	-1(%edi,%eax), %ebx
       
   299 		movl	dsPrev(%edx), %edi
       
   300 		movl	%ebx, scanend(%esp)
       
   301 		movl	chainlenwmask(%esp), %edx
       
   302 		jmp	LookupLoop
       
   303 
       
   304 /* Accept the current string, with the maximum possible length.		*/
       
   305 
       
   306 LenMaximum:	movl	deflatestate(%esp), %edx
       
   307 		movl	$MAX_MATCH, bestlen(%esp)
       
   308 		movl	%ecx, dsMatchStart(%edx)
       
   309 
       
   310 /* if ((uInt)best_len <= s->lookahead) return (uInt)best_len;		*/
       
   311 /* return s->lookahead;							*/
       
   312 
       
   313 LeaveNow:
       
   314 		movl	deflatestate(%esp), %edx
       
   315 		movl	bestlen(%esp), %ebx
       
   316 		movl	dsLookahead(%edx), %eax
       
   317 		cmpl	%eax, %ebx
       
   318 		jg	LookaheadRet
       
   319 		movl	%ebx, %eax
       
   320 LookaheadRet:
       
   321 
       
   322 /* Restore the stack and return from whence we came.			*/
       
   323 
       
   324 		addl	$LocalVarsSize, %esp
       
   325 		popl	%ebx
       
   326 		popl	%esi
       
   327 		popl	%edi
       
   328 		popl	%ebp
       
   329 match_init:	ret