symbian-qemu-0.9.1-12/libsdl-trunk/src/hermes/x86p_32.asm
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 ;
       
     2 ; x86 format converters for HERMES
       
     3 ; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
       
     4 ; This source code is licensed under the GNU LGPL
       
     5 ; 
       
     6 ; Please refer to the file COPYING.LIB contained in the distribution for
       
     7 ; licensing conditions		
       
     8 ;
       
     9 ; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
       
    10 ; 
       
    11 
       
    12 BITS 32
       
    13 
       
    14 %include "common.inc"
       
    15 
       
    16 SDL_FUNC _ConvertX86p32_32BGR888
       
    17 SDL_FUNC _ConvertX86p32_32RGBA888
       
    18 SDL_FUNC _ConvertX86p32_32BGRA888
       
    19 SDL_FUNC _ConvertX86p32_24RGB888	
       
    20 SDL_FUNC _ConvertX86p32_24BGR888
       
    21 SDL_FUNC _ConvertX86p32_16RGB565
       
    22 SDL_FUNC _ConvertX86p32_16BGR565
       
    23 SDL_FUNC _ConvertX86p32_16RGB555
       
    24 SDL_FUNC _ConvertX86p32_16BGR555
       
    25 SDL_FUNC _ConvertX86p32_8RGB332
       
    26 
       
    27 SECTION .text
       
    28 
       
    29 ;; _Convert_*
       
    30 ;; Paramters:	
       
    31 ;;   ESI = source 
       
    32 ;;   EDI = dest
       
    33 ;;   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
       
    34 ;; Destroys:
       
    35 ;;   EAX, EBX, EDX
       
    36 
       
    37 
       
    38 _ConvertX86p32_32BGR888:
       
    39 
       
    40     ; check short
       
    41     cmp ecx,BYTE 32
       
    42     ja .L3
       
    43 
       
    44 .L1 ; short loop
       
    45     mov edx,[esi]
       
    46     bswap edx
       
    47     ror edx,8
       
    48     mov [edi],edx
       
    49     add esi,BYTE 4
       
    50     add edi,BYTE 4
       
    51     dec ecx
       
    52     jnz .L1
       
    53 .L2
       
    54     retn
       
    55 
       
    56 .L3 ; save ebp
       
    57     push ebp
       
    58 
       
    59     ; unroll four times
       
    60     mov ebp,ecx
       
    61     shr ebp,2
       
    62     
       
    63     ; save count
       
    64     push ecx
       
    65 
       
    66 .L4     mov eax,[esi]
       
    67         mov ebx,[esi+4]
       
    68 
       
    69         bswap eax
       
    70 
       
    71         bswap ebx
       
    72 
       
    73         ror eax,8
       
    74         mov ecx,[esi+8]
       
    75 
       
    76         ror ebx,8
       
    77         mov edx,[esi+12]
       
    78 
       
    79         bswap ecx
       
    80 
       
    81         bswap edx
       
    82 
       
    83         ror ecx,8
       
    84         mov [edi+0],eax
       
    85 
       
    86         ror edx,8
       
    87         mov [edi+4],ebx
       
    88 
       
    89         mov [edi+8],ecx
       
    90         mov [edi+12],edx
       
    91 
       
    92         add esi,BYTE 16
       
    93         add edi,BYTE 16
       
    94 
       
    95         dec ebp
       
    96         jnz .L4                 
       
    97 
       
    98     ; check tail
       
    99     pop ecx
       
   100     and ecx,BYTE 11b
       
   101     jz .L6
       
   102 
       
   103 .L5 ; tail loop
       
   104     mov edx,[esi]
       
   105     bswap edx
       
   106     ror edx,8
       
   107     mov [edi],edx
       
   108     add esi,BYTE 4
       
   109     add edi,BYTE 4
       
   110     dec ecx
       
   111     jnz .L5
       
   112 
       
   113 .L6 pop ebp
       
   114     retn
       
   115 	
       
   116 
       
   117 	
       
   118 		
       
   119 _ConvertX86p32_32RGBA888:
       
   120 	
       
   121     ; check short
       
   122     cmp ecx,BYTE 32
       
   123     ja .L3
       
   124 
       
   125 .L1 ; short loop
       
   126     mov edx,[esi]
       
   127     rol edx,8
       
   128     mov [edi],edx
       
   129     add esi,BYTE 4
       
   130     add edi,BYTE 4
       
   131     dec ecx
       
   132     jnz .L1
       
   133 .L2
       
   134     retn
       
   135 
       
   136 .L3 ; save ebp
       
   137     push ebp
       
   138 
       
   139     ; unroll four times
       
   140     mov ebp,ecx
       
   141     shr ebp,2
       
   142     
       
   143     ; save count
       
   144     push ecx
       
   145 
       
   146 .L4     mov eax,[esi]
       
   147         mov ebx,[esi+4]
       
   148 
       
   149         rol eax,8
       
   150         mov ecx,[esi+8]
       
   151 
       
   152         rol ebx,8
       
   153         mov edx,[esi+12]
       
   154 
       
   155         rol ecx,8
       
   156         mov [edi+0],eax
       
   157 
       
   158         rol edx,8
       
   159         mov [edi+4],ebx
       
   160 
       
   161         mov [edi+8],ecx
       
   162         mov [edi+12],edx
       
   163 
       
   164         add esi,BYTE 16
       
   165         add edi,BYTE 16
       
   166 
       
   167         dec ebp
       
   168         jnz .L4                 
       
   169 
       
   170     ; check tail
       
   171     pop ecx
       
   172     and ecx,BYTE 11b
       
   173     jz .L6
       
   174 
       
   175 .L5 ; tail loop
       
   176     mov edx,[esi]
       
   177     rol edx,8
       
   178     mov [edi],edx
       
   179     add esi,BYTE 4
       
   180     add edi,BYTE 4
       
   181     dec ecx
       
   182     jnz .L5
       
   183 
       
   184 .L6 pop ebp
       
   185     retn
       
   186 
       
   187 	
       
   188 
       
   189 
       
   190 _ConvertX86p32_32BGRA888:
       
   191 
       
   192     ; check short
       
   193     cmp ecx,BYTE 32
       
   194     ja .L3
       
   195 
       
   196 .L1 ; short loop
       
   197     mov edx,[esi]
       
   198     bswap edx
       
   199     mov [edi],edx
       
   200     add esi,BYTE 4
       
   201     add edi,BYTE 4
       
   202     dec ecx
       
   203     jnz .L1
       
   204 .L2
       
   205     retn
       
   206 
       
   207 .L3 ; save ebp
       
   208     push ebp
       
   209 
       
   210     ; unroll four times
       
   211     mov ebp,ecx
       
   212     shr ebp,2
       
   213     
       
   214     ; save count
       
   215     push ecx
       
   216 
       
   217 .L4     mov eax,[esi]
       
   218         mov ebx,[esi+4]
       
   219 
       
   220         mov ecx,[esi+8]
       
   221         mov edx,[esi+12]
       
   222 
       
   223         bswap eax
       
   224 
       
   225         bswap ebx
       
   226 
       
   227         bswap ecx
       
   228 
       
   229         bswap edx
       
   230 
       
   231         mov [edi+0],eax
       
   232         mov [edi+4],ebx
       
   233 
       
   234         mov [edi+8],ecx
       
   235         mov [edi+12],edx
       
   236 
       
   237         add esi,BYTE 16
       
   238         add edi,BYTE 16
       
   239 
       
   240         dec ebp
       
   241         jnz .L4                 
       
   242 
       
   243     ; check tail
       
   244     pop ecx
       
   245     and ecx,BYTE 11b
       
   246     jz .L6
       
   247 
       
   248 .L5 ; tail loop
       
   249     mov edx,[esi]
       
   250     bswap edx
       
   251     mov [edi],edx
       
   252     add esi,BYTE 4
       
   253     add edi,BYTE 4
       
   254     dec ecx
       
   255     jnz .L5
       
   256 
       
   257 .L6 pop ebp
       
   258     retn
       
   259 
       
   260 
       
   261 	
       
   262 	
       
   263 ;; 32 bit RGB 888 to 24 BIT RGB 888
       
   264 
       
   265 _ConvertX86p32_24RGB888:
       
   266 
       
   267 	; check short
       
   268 	cmp ecx,BYTE 32
       
   269 	ja .L3
       
   270 
       
   271 .L1	; short loop
       
   272 	mov al,[esi]
       
   273 	mov bl,[esi+1]
       
   274 	mov dl,[esi+2]
       
   275 	mov [edi],al
       
   276 	mov [edi+1],bl
       
   277 	mov [edi+2],dl
       
   278 	add esi,BYTE 4
       
   279 	add edi,BYTE 3
       
   280 	dec ecx
       
   281 	jnz .L1
       
   282 .L2 
       
   283 	retn
       
   284 
       
   285 .L3	;	 head
       
   286 	mov edx,edi
       
   287 	and edx,BYTE 11b
       
   288 	jz .L4
       
   289 	mov al,[esi]
       
   290 	mov bl,[esi+1]
       
   291 	mov dl,[esi+2]
       
   292 	mov [edi],al
       
   293 	mov [edi+1],bl
       
   294 	mov [edi+2],dl
       
   295 	add esi,BYTE 4
       
   296 	add edi,BYTE 3
       
   297 	dec ecx
       
   298 	jmp SHORT .L3
       
   299 
       
   300 .L4 ; unroll 4 times
       
   301 	push ebp
       
   302 	mov ebp,ecx
       
   303 	shr ebp,2
       
   304 
       
   305     ; save count
       
   306 	push ecx
       
   307 
       
   308 .L5     mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
       
   309         mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
       
   310 
       
   311         shl eax,8                       ;                        eax = [R][G][B][.]
       
   312         mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
       
   313 
       
   314         shl ebx,8                       ;                        ebx = [r][g][b][.]
       
   315         mov al,[esi+4]                  ;                        eax = [R][G][B][b]
       
   316 
       
   317         ror eax,8                       ;                        eax = [b][R][G][B] (done)
       
   318         mov bh,[esi+8+1]                ;                        ebx = [r][g][G][.]
       
   319 
       
   320         mov [edi],eax
       
   321         add edi,BYTE 3*4
       
   322 
       
   323         shl ecx,8                       ;                        ecx = [r][g][b][.]
       
   324         mov bl,[esi+8+0]                ;                        ebx = [r][g][G][B]
       
   325 
       
   326         rol ebx,16                      ;                        ebx = [G][B][r][g] (done)
       
   327         mov cl,[esi+8+2]                ;                        ecx = [r][g][b][R] (done)
       
   328 
       
   329         mov [edi+4-3*4],ebx
       
   330         add esi,BYTE 4*4
       
   331         
       
   332         mov [edi+8-3*4],ecx
       
   333         dec ebp
       
   334 
       
   335         jnz .L5
       
   336 
       
   337     ; check tail
       
   338 	pop ecx
       
   339 	and ecx,BYTE 11b
       
   340 	jz .L7
       
   341 
       
   342 .L6 ; tail loop
       
   343 	mov al,[esi]
       
   344 	mov bl,[esi+1]
       
   345 	mov dl,[esi+2]
       
   346 	mov [edi],al
       
   347 	mov [edi+1],bl
       
   348 	mov [edi+2],dl
       
   349 	add esi,BYTE 4
       
   350 	add edi,BYTE 3
       
   351 	dec ecx
       
   352 	jnz .L6
       
   353 
       
   354 .L7	pop ebp
       
   355 	retn
       
   356 
       
   357 
       
   358 
       
   359 
       
   360 ;; 32 bit RGB 888 to 24 bit BGR 888
       
   361 
       
   362 _ConvertX86p32_24BGR888:
       
   363 
       
   364 	; check short
       
   365 	cmp ecx,BYTE 32
       
   366 	ja .L3
       
   367 
       
   368 	
       
   369 .L1	; short loop
       
   370 	mov dl,[esi]
       
   371 	mov bl,[esi+1]
       
   372 	mov al,[esi+2]
       
   373 	mov [edi],al
       
   374 	mov [edi+1],bl
       
   375 	mov [edi+2],dl
       
   376 	add esi,BYTE 4
       
   377 	add edi,BYTE 3
       
   378 	dec ecx
       
   379 	jnz .L1
       
   380 .L2
       
   381 	retn
       
   382 
       
   383 .L3 ; head
       
   384 	mov edx,edi
       
   385 	and edx,BYTE 11b
       
   386 	jz .L4
       
   387 	mov dl,[esi]
       
   388 	mov bl,[esi+1]
       
   389 	mov al,[esi+2]
       
   390 	mov [edi],al
       
   391 	mov [edi+1],bl
       
   392 	mov [edi+2],dl
       
   393 	add esi,BYTE 4
       
   394 	add edi,BYTE 3
       
   395 	dec ecx
       
   396 	jmp SHORT .L3
       
   397 
       
   398 .L4	; unroll 4 times
       
   399 	push ebp
       
   400 	mov ebp,ecx
       
   401 	shr ebp,2
       
   402 
       
   403 	; save count
       
   404 	push ecx
       
   405 
       
   406 .L5     
       
   407 	mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
       
   408         mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
       
   409         
       
   410         bswap eax                       ;                        eax = [B][G][R][A]
       
   411 
       
   412         bswap ebx                       ;                        ebx = [b][g][r][a]
       
   413 
       
   414         mov al,[esi+4+2]                ;                        eax = [B][G][R][r] 
       
   415         mov bh,[esi+4+4+1]              ;                        ebx = [b][g][G][a]
       
   416 
       
   417         ror eax,8                       ;                        eax = [r][B][G][R] (done)
       
   418         mov bl,[esi+4+4+2]              ;                        ebx = [b][g][G][R]
       
   419 
       
   420         ror ebx,16                      ;                        ebx = [G][R][b][g] (done)
       
   421         mov [edi],eax
       
   422     
       
   423         mov [edi+4],ebx
       
   424         mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
       
   425         
       
   426         bswap ecx                       ;                        ecx = [b][g][r][a]
       
   427         
       
   428         mov cl,[esi+8]                  ;                        ecx = [b][g][r][B] (done)
       
   429         add esi,BYTE 4*4
       
   430 
       
   431         mov [edi+8],ecx
       
   432         add edi,BYTE 3*4
       
   433 
       
   434         dec ebp
       
   435         jnz .L5
       
   436 
       
   437 	; check tail
       
   438 	pop ecx
       
   439 	and ecx,BYTE 11b
       
   440 	jz .L7
       
   441 
       
   442 .L6	; tail loop
       
   443 	mov dl,[esi]
       
   444 	mov bl,[esi+1]
       
   445 	mov al,[esi+2]
       
   446 	mov [edi],al
       
   447 	mov [edi+1],bl
       
   448 	mov [edi+2],dl
       
   449 	add esi,BYTE 4
       
   450 	add edi,BYTE 3
       
   451 	dec ecx
       
   452 	jnz .L6
       
   453 
       
   454 .L7 
       
   455 	pop ebp
       
   456 	retn
       
   457  
       
   458 
       
   459 	
       
   460 		
       
   461 ;; 32 bit RGB 888 to 16 BIT RGB 565 
       
   462 
       
   463 _ConvertX86p32_16RGB565:
       
   464 	; check short
       
   465 	cmp ecx,BYTE 16
       
   466 	ja .L3
       
   467 
       
   468 .L1 ; short loop
       
   469 	mov bl,[esi+0]    ; blue
       
   470 	mov al,[esi+1]    ; green
       
   471 	mov ah,[esi+2]    ; red
       
   472 	shr ah,3
       
   473         and al,11111100b
       
   474 	shl eax,3
       
   475 	shr bl,3
       
   476 	add al,bl
       
   477 	mov [edi+0],al
       
   478 	mov [edi+1],ah
       
   479 	add esi,BYTE 4
       
   480 	add edi,BYTE 2
       
   481 	dec ecx
       
   482 	jnz .L1
       
   483 
       
   484 .L2:				; End of short loop
       
   485 	retn
       
   486 
       
   487 	
       
   488 .L3	; head
       
   489 	mov ebx,edi
       
   490 	and ebx,BYTE 11b
       
   491 	jz .L4
       
   492 	
       
   493 	mov bl,[esi+0]    ; blue
       
   494 	mov al,[esi+1]    ; green
       
   495 	mov ah,[esi+2]    ; red
       
   496 	shr ah,3
       
   497 	and al,11111100b
       
   498 	shl eax,3
       
   499 	shr bl,3
       
   500 	add al,bl
       
   501 	mov [edi+0],al
       
   502 	mov [edi+1],ah
       
   503 	add esi,BYTE 4
       
   504 	add edi,BYTE 2
       
   505 	dec ecx
       
   506 
       
   507 .L4:	 
       
   508     ; save count
       
   509 	push ecx
       
   510 
       
   511     ; unroll twice
       
   512 	shr ecx,1
       
   513     
       
   514     ; point arrays to end
       
   515 	lea esi,[esi+ecx*8]
       
   516 	lea edi,[edi+ecx*4]
       
   517 
       
   518     ; negative counter 
       
   519 	neg ecx
       
   520 	jmp SHORT .L6
       
   521 
       
   522 .L5:	    
       
   523 	mov [edi+ecx*4-4],eax
       
   524 .L6:	
       
   525 	mov eax,[esi+ecx*8]
       
   526 
       
   527         shr ah,2
       
   528         mov ebx,[esi+ecx*8+4]
       
   529 
       
   530         shr eax,3
       
   531         mov edx,[esi+ecx*8+4]
       
   532 
       
   533         shr bh,2
       
   534         mov dl,[esi+ecx*8+2]
       
   535 
       
   536         shl ebx,13
       
   537         and eax,000007FFh
       
   538         
       
   539         shl edx,8
       
   540         and ebx,07FF0000h
       
   541 
       
   542         and edx,0F800F800h
       
   543         add eax,ebx
       
   544 
       
   545         add eax,edx
       
   546         inc ecx
       
   547 
       
   548         jnz .L5                 
       
   549 
       
   550 	mov [edi+ecx*4-4],eax
       
   551 
       
   552     ; tail
       
   553 	pop ecx
       
   554 	test cl,1
       
   555 	jz .L7
       
   556 	
       
   557 	mov bl,[esi+0]    ; blue
       
   558 	mov al,[esi+1]    ; green
       
   559 	mov ah,[esi+2]    ; red
       
   560 	shr ah,3
       
   561 	and al,11111100b
       
   562 	shl eax,3
       
   563 	shr bl,3
       
   564 	add al,bl
       
   565 	mov [edi+0],al
       
   566 	mov [edi+1],ah
       
   567 	add esi,BYTE 4
       
   568 	add edi,BYTE 2
       
   569 
       
   570 .L7:	
       
   571 	retn
       
   572 
       
   573 
       
   574 
       
   575 	
       
   576 ;; 32 bit RGB 888 to 16 BIT BGR 565 
       
   577 
       
   578 _ConvertX86p32_16BGR565:
       
   579 	
       
   580 	; check short
       
   581 	cmp ecx,BYTE 16
       
   582 	ja .L3
       
   583 
       
   584 .L1	; short loop
       
   585 	mov ah,[esi+0]    ; blue
       
   586 	mov al,[esi+1]    ; green
       
   587 	mov bl,[esi+2]    ; red
       
   588 	shr ah,3
       
   589 	and al,11111100b
       
   590 	shl eax,3
       
   591 	shr bl,3
       
   592 	add al,bl
       
   593 	mov [edi+0],al
       
   594 	mov [edi+1],ah
       
   595 	add esi,BYTE 4
       
   596 	add edi,BYTE 2
       
   597 	dec ecx
       
   598 	jnz .L1
       
   599 .L2
       
   600 	retn
       
   601 
       
   602 .L3	; head
       
   603 	mov ebx,edi
       
   604 	and ebx,BYTE 11b
       
   605 	jz .L4   
       
   606 	mov ah,[esi+0]    ; blue
       
   607 	mov al,[esi+1]    ; green
       
   608 	mov bl,[esi+2]    ; red
       
   609 	shr ah,3
       
   610 	and al,11111100b
       
   611 	shl eax,3
       
   612 	shr bl,3
       
   613 	add al,bl
       
   614 	mov [edi+0],al
       
   615 	mov [edi+1],ah
       
   616 	add esi,BYTE 4
       
   617 	add edi,BYTE 2
       
   618 	dec ecx
       
   619 
       
   620 .L4	; save count
       
   621 	push ecx
       
   622 
       
   623 	; unroll twice
       
   624 	shr ecx,1
       
   625     
       
   626 	; point arrays to end
       
   627 	lea esi,[esi+ecx*8]
       
   628 	lea edi,[edi+ecx*4]
       
   629 
       
   630 	; negative count
       
   631 	neg ecx
       
   632 	jmp SHORT .L6
       
   633 
       
   634 .L5     
       
   635 	mov [edi+ecx*4-4],eax            
       
   636 .L6     
       
   637 	mov edx,[esi+ecx*8+4]
       
   638 
       
   639         mov bh,[esi+ecx*8+4]                       
       
   640         mov ah,[esi+ecx*8]                       
       
   641 
       
   642         shr bh,3
       
   643         mov al,[esi+ecx*8+1]             
       
   644 
       
   645         shr ah,3
       
   646         mov bl,[esi+ecx*8+5]           
       
   647 
       
   648         shl eax,3
       
   649         mov dl,[esi+ecx*8+2]
       
   650 
       
   651         shl ebx,19
       
   652         and eax,0000FFE0h              
       
   653                 
       
   654         shr edx,3
       
   655         and ebx,0FFE00000h             
       
   656         
       
   657         and edx,001F001Fh               
       
   658         add eax,ebx
       
   659 
       
   660         add eax,edx
       
   661         inc ecx
       
   662 
       
   663         jnz .L5                 
       
   664 
       
   665 	mov [edi+ecx*4-4],eax            
       
   666 
       
   667 	; tail
       
   668 	pop ecx
       
   669 	and ecx,BYTE 1
       
   670 	jz .L7
       
   671 	mov ah,[esi+0]    ; blue
       
   672 	mov al,[esi+1]    ; green
       
   673 	mov bl,[esi+2]    ; red
       
   674 	shr ah,3
       
   675 	and al,11111100b
       
   676 	shl eax,3
       
   677 	shr bl,3
       
   678 	add al,bl
       
   679 	mov [edi+0],al
       
   680 	mov [edi+1],ah
       
   681 	add esi,BYTE 4
       
   682 	add edi,BYTE 2
       
   683 
       
   684 .L7 
       
   685 	retn
       
   686 
       
   687 
       
   688 	
       
   689 	
       
   690 ;; 32 BIT RGB TO 16 BIT RGB 555
       
   691 
       
   692 _ConvertX86p32_16RGB555:
       
   693 
       
   694 	; check short
       
   695 	cmp ecx,BYTE 16
       
   696 	ja .L3
       
   697 
       
   698 .L1	; short loop
       
   699 	mov bl,[esi+0]    ; blue
       
   700 	mov al,[esi+1]    ; green
       
   701 	mov ah,[esi+2]    ; red
       
   702 	shr ah,3
       
   703 	and al,11111000b
       
   704 	shl eax,2
       
   705 	shr bl,3
       
   706 	add al,bl
       
   707 	mov [edi+0],al
       
   708 	mov [edi+1],ah
       
   709 	add esi,BYTE 4
       
   710 	add edi,BYTE 2
       
   711 	dec ecx
       
   712 	jnz .L1
       
   713 .L2
       
   714 	retn
       
   715 
       
   716 .L3	; head
       
   717 	mov ebx,edi
       
   718         and ebx,BYTE 11b
       
   719 	jz .L4   
       
   720 	mov bl,[esi+0]    ; blue
       
   721 	mov al,[esi+1]    ; green
       
   722 	mov ah,[esi+2]    ; red
       
   723 	shr ah,3
       
   724 	and al,11111000b
       
   725 	shl eax,2
       
   726 	shr bl,3
       
   727 	add al,bl
       
   728 	mov [edi+0],al
       
   729 	mov [edi+1],ah
       
   730 	add esi,BYTE 4
       
   731 	add edi,BYTE 2
       
   732 	dec ecx
       
   733 
       
   734 .L4	; save count
       
   735 	push ecx
       
   736 
       
   737 	; unroll twice
       
   738 	shr ecx,1
       
   739     
       
   740 	; point arrays to end
       
   741 	lea esi,[esi+ecx*8]
       
   742 	lea edi,[edi+ecx*4]
       
   743 
       
   744 	; negative counter 
       
   745 	neg ecx
       
   746 	jmp SHORT .L6
       
   747 
       
   748 .L5     
       
   749 	mov [edi+ecx*4-4],eax
       
   750 .L6     
       
   751 	mov eax,[esi+ecx*8]
       
   752 
       
   753         shr ah,3
       
   754         mov ebx,[esi+ecx*8+4]
       
   755 
       
   756         shr eax,3
       
   757         mov edx,[esi+ecx*8+4]
       
   758 
       
   759         shr bh,3
       
   760         mov dl,[esi+ecx*8+2]
       
   761 
       
   762         shl ebx,13
       
   763         and eax,000007FFh
       
   764         
       
   765         shl edx,7
       
   766         and ebx,07FF0000h
       
   767 
       
   768         and edx,07C007C00h
       
   769         add eax,ebx
       
   770 
       
   771         add eax,edx
       
   772         inc ecx
       
   773 
       
   774         jnz .L5                 
       
   775 
       
   776 	mov [edi+ecx*4-4],eax
       
   777 
       
   778 	; tail
       
   779 	pop ecx
       
   780 	and ecx,BYTE 1
       
   781 	jz .L7
       
   782 	mov bl,[esi+0]    ; blue
       
   783 	mov al,[esi+1]    ; green
       
   784 	mov ah,[esi+2]    ; red
       
   785 	shr ah,3
       
   786 	and al,11111000b
       
   787 	shl eax,2
       
   788 	shr bl,3
       
   789 	add al,bl
       
   790 	mov [edi+0],al
       
   791 	mov [edi+1],ah
       
   792 	add esi,BYTE 4
       
   793 	add edi,BYTE 2
       
   794 
       
   795 .L7
       
   796 	retn
       
   797 
       
   798 
       
   799 
       
   800 
       
   801 ;; 32 BIT RGB TO 16 BIT BGR 555
       
   802 	
       
   803 _ConvertX86p32_16BGR555:
       
   804 	
       
   805 	; check short
       
   806 	cmp ecx,BYTE 16
       
   807 	ja .L3
       
   808 
       
   809 
       
   810 .L1	; short loop
       
   811 	mov ah,[esi+0]    ; blue
       
   812 	mov al,[esi+1]    ; green
       
   813 	mov bl,[esi+2]    ; red
       
   814 	shr ah,3
       
   815 	and al,11111000b
       
   816 	shl eax,2
       
   817 	shr bl,3
       
   818 	add al,bl
       
   819 	mov [edi+0],al
       
   820 	mov [edi+1],ah
       
   821 	add esi,BYTE 4
       
   822 	add edi,BYTE 2
       
   823 	dec ecx
       
   824 	jnz .L1
       
   825 .L2 
       
   826 	retn
       
   827 
       
   828 .L3	; head
       
   829 	mov ebx,edi
       
   830         and ebx,BYTE 11b
       
   831 	jz .L4   
       
   832 	mov ah,[esi+0]    ; blue
       
   833 	mov al,[esi+1]    ; green
       
   834 	mov bl,[esi+2]    ; red
       
   835 	shr ah,3
       
   836 	and al,11111000b
       
   837 	shl eax,2
       
   838 	shr bl,3
       
   839 	add al,bl
       
   840 	mov [edi+0],al
       
   841 	mov [edi+1],ah
       
   842 	add esi,BYTE 4
       
   843 	add edi,BYTE 2
       
   844 	dec ecx
       
   845 
       
   846 .L4	; save count
       
   847 	push ecx
       
   848 
       
   849 	; unroll twice
       
   850 	shr ecx,1
       
   851     
       
   852 	; point arrays to end
       
   853 	lea esi,[esi+ecx*8]
       
   854 	lea edi,[edi+ecx*4]
       
   855 
       
   856 	; negative counter 
       
   857 	neg ecx
       
   858 	jmp SHORT .L6
       
   859 
       
   860 .L5     
       
   861 	mov [edi+ecx*4-4],eax            
       
   862 .L6     
       
   863 	mov edx,[esi+ecx*8+4]
       
   864 
       
   865         mov bh,[esi+ecx*8+4]                       
       
   866         mov ah,[esi+ecx*8]                       
       
   867 
       
   868         shr bh,3
       
   869         mov al,[esi+ecx*8+1]             
       
   870 
       
   871         shr ah,3
       
   872         mov bl,[esi+ecx*8+5]           
       
   873 
       
   874         shl eax,2
       
   875         mov dl,[esi+ecx*8+2]
       
   876 
       
   877         shl ebx,18
       
   878         and eax,00007FE0h              
       
   879                 
       
   880         shr edx,3
       
   881         and ebx,07FE00000h             
       
   882         
       
   883         and edx,001F001Fh               
       
   884         add eax,ebx
       
   885 
       
   886         add eax,edx
       
   887         inc ecx
       
   888 
       
   889         jnz .L5                 
       
   890 
       
   891 	mov [edi+ecx*4-4],eax            
       
   892 
       
   893 	; tail
       
   894 	pop ecx
       
   895 	and ecx,BYTE 1
       
   896 	jz .L7
       
   897 	mov ah,[esi+0]    ; blue
       
   898 	mov al,[esi+1]    ; green
       
   899 	mov bl,[esi+2]    ; red
       
   900 	shr ah,3
       
   901 	and al,11111000b
       
   902 	shl eax,2
       
   903 	shr bl,3
       
   904 	add al,bl
       
   905 	mov [edi+0],al
       
   906 	mov [edi+1],ah
       
   907 	add esi,BYTE 4
       
   908 	add edi,BYTE 2
       
   909 
       
   910 .L7
       
   911 	retn
       
   912 
       
   913 
       
   914 
       
   915 
       
   916 	
       
   917 ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
       
   918 ;; This routine writes FOUR pixels at once (dword) and then, if they exist
       
   919 ;; the trailing three pixels
       
   920 _ConvertX86p32_8RGB332:
       
   921 
       
   922 	
       
   923 .L_ALIGNED
       
   924 	push ecx
       
   925 
       
   926 	shr ecx,2		; We will draw 4 pixels at once
       
   927 	jnz .L1
       
   928 	
       
   929 	jmp .L2			; short jump out of range :(
       
   930 	
       
   931 .L1:
       
   932 	mov eax,[esi]		; first pair of pixels
       
   933 	mov edx,[esi+4]
       
   934 
       
   935 	shr dl,6
       
   936 	mov ebx,eax
       
   937 
       
   938 	shr al,6
       
   939 	and ah,0e0h
       
   940 
       
   941 	shr ebx,16
       
   942 	and dh,0e0h
       
   943 	
       
   944 	shr ah,3
       
   945 	and bl,0e0h
       
   946 
       
   947 	shr dh,3
       
   948 	
       
   949 	or al,bl
       
   950 	
       
   951 	mov ebx,edx	
       
   952 	or al,ah
       
   953 	
       
   954 	shr ebx,16
       
   955 	or dl,dh
       
   956 
       
   957 	and bl,0e0h
       
   958 	
       
   959 	or dl,bl
       
   960 
       
   961 	mov ah,dl
       
   962 
       
   963 	
       
   964 		
       
   965 	mov ebx,[esi+8]		; second pair of pixels
       
   966 
       
   967 	mov edx,ebx
       
   968 	and bh,0e0h
       
   969 
       
   970 	shr bl,6
       
   971 	and edx,0e00000h
       
   972 
       
   973 	shr edx,16
       
   974 
       
   975 	shr bh,3
       
   976 
       
   977 	ror eax,16
       
   978 	or bl,dl
       
   979 
       
   980 	mov edx,[esi+12]
       
   981 	or bl,bh
       
   982 	
       
   983 	mov al,bl
       
   984 
       
   985 	mov ebx,edx
       
   986 	and dh,0e0h
       
   987 
       
   988 	shr dl,6
       
   989 	and ebx,0e00000h
       
   990 	
       
   991 	shr dh,3
       
   992 	mov ah,dl
       
   993 
       
   994 	shr ebx,16
       
   995 	or ah,dh
       
   996 
       
   997 	or ah,bl
       
   998 
       
   999 	rol eax,16
       
  1000 	add esi,BYTE 16
       
  1001 			
       
  1002 	mov [edi],eax	
       
  1003 	add edi,BYTE 4
       
  1004 	
       
  1005 	dec ecx
       
  1006 	jz .L2			; L1 out of range for short jump :(
       
  1007 	
       
  1008 	jmp .L1
       
  1009 .L2:
       
  1010 	
       
  1011 	pop ecx
       
  1012 	and ecx,BYTE 3		; mask out number of pixels to draw
       
  1013 	
       
  1014 	jz .L4			; Nothing to do anymore
       
  1015 
       
  1016 .L3:
       
  1017 	mov eax,[esi]		; single pixel conversion for trailing pixels
       
  1018 
       
  1019         mov ebx,eax
       
  1020 
       
  1021         shr al,6
       
  1022         and ah,0e0h
       
  1023 
       
  1024         shr ebx,16
       
  1025 
       
  1026         shr ah,3
       
  1027         and bl,0e0h
       
  1028 
       
  1029         or al,ah
       
  1030         or al,bl
       
  1031 
       
  1032         mov [edi],al
       
  1033 
       
  1034         inc edi
       
  1035         add esi,BYTE 4
       
  1036 
       
  1037 	dec ecx
       
  1038 	jnz .L3
       
  1039 	
       
  1040 .L4:	
       
  1041 	retn
       
  1042 
       
  1043 %ifidn __OUTPUT_FORMAT__,elf
       
  1044 section .note.GNU-stack noalloc noexec nowrite progbits
       
  1045 %endif