|
1 ; |
|
2 ; x86 format converters for HERMES |
|
3 ; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at) |
|
4 ; This source code is licensed under the GNU LGPL |
|
5 ; |
|
6 ; Please refer to the file COPYING.LIB contained in the distribution for |
|
7 ; licensing conditions |
|
8 ; |
|
9 ; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission |
|
10 ; |
|
11 |
|
12 BITS 32 |
|
13 |
|
14 %include "common.inc" |
|
15 |
|
16 SDL_FUNC _ConvertX86p32_32BGR888 |
|
17 SDL_FUNC _ConvertX86p32_32RGBA888 |
|
18 SDL_FUNC _ConvertX86p32_32BGRA888 |
|
19 SDL_FUNC _ConvertX86p32_24RGB888 |
|
20 SDL_FUNC _ConvertX86p32_24BGR888 |
|
21 SDL_FUNC _ConvertX86p32_16RGB565 |
|
22 SDL_FUNC _ConvertX86p32_16BGR565 |
|
23 SDL_FUNC _ConvertX86p32_16RGB555 |
|
24 SDL_FUNC _ConvertX86p32_16BGR555 |
|
25 SDL_FUNC _ConvertX86p32_8RGB332 |
|
26 |
|
27 SECTION .text |
|
28 |
|
29 ;; _Convert_* |
|
30 ;; Paramters: |
|
31 ;; ESI = source |
|
32 ;; EDI = dest |
|
33 ;; ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though)) |
|
34 ;; Destroys: |
|
35 ;; EAX, EBX, EDX |
|
36 |
|
37 |
|
38 _ConvertX86p32_32BGR888: |
|
39 |
|
40 ; check short |
|
41 cmp ecx,BYTE 32 |
|
42 ja .L3 |
|
43 |
|
44 .L1 ; short loop |
|
45 mov edx,[esi] |
|
46 bswap edx |
|
47 ror edx,8 |
|
48 mov [edi],edx |
|
49 add esi,BYTE 4 |
|
50 add edi,BYTE 4 |
|
51 dec ecx |
|
52 jnz .L1 |
|
53 .L2 |
|
54 retn |
|
55 |
|
56 .L3 ; save ebp |
|
57 push ebp |
|
58 |
|
59 ; unroll four times |
|
60 mov ebp,ecx |
|
61 shr ebp,2 |
|
62 |
|
63 ; save count |
|
64 push ecx |
|
65 |
|
66 .L4 mov eax,[esi] |
|
67 mov ebx,[esi+4] |
|
68 |
|
69 bswap eax |
|
70 |
|
71 bswap ebx |
|
72 |
|
73 ror eax,8 |
|
74 mov ecx,[esi+8] |
|
75 |
|
76 ror ebx,8 |
|
77 mov edx,[esi+12] |
|
78 |
|
79 bswap ecx |
|
80 |
|
81 bswap edx |
|
82 |
|
83 ror ecx,8 |
|
84 mov [edi+0],eax |
|
85 |
|
86 ror edx,8 |
|
87 mov [edi+4],ebx |
|
88 |
|
89 mov [edi+8],ecx |
|
90 mov [edi+12],edx |
|
91 |
|
92 add esi,BYTE 16 |
|
93 add edi,BYTE 16 |
|
94 |
|
95 dec ebp |
|
96 jnz .L4 |
|
97 |
|
98 ; check tail |
|
99 pop ecx |
|
100 and ecx,BYTE 11b |
|
101 jz .L6 |
|
102 |
|
103 .L5 ; tail loop |
|
104 mov edx,[esi] |
|
105 bswap edx |
|
106 ror edx,8 |
|
107 mov [edi],edx |
|
108 add esi,BYTE 4 |
|
109 add edi,BYTE 4 |
|
110 dec ecx |
|
111 jnz .L5 |
|
112 |
|
113 .L6 pop ebp |
|
114 retn |
|
115 |
|
116 |
|
117 |
|
118 |
|
119 _ConvertX86p32_32RGBA888: |
|
120 |
|
121 ; check short |
|
122 cmp ecx,BYTE 32 |
|
123 ja .L3 |
|
124 |
|
125 .L1 ; short loop |
|
126 mov edx,[esi] |
|
127 rol edx,8 |
|
128 mov [edi],edx |
|
129 add esi,BYTE 4 |
|
130 add edi,BYTE 4 |
|
131 dec ecx |
|
132 jnz .L1 |
|
133 .L2 |
|
134 retn |
|
135 |
|
136 .L3 ; save ebp |
|
137 push ebp |
|
138 |
|
139 ; unroll four times |
|
140 mov ebp,ecx |
|
141 shr ebp,2 |
|
142 |
|
143 ; save count |
|
144 push ecx |
|
145 |
|
146 .L4 mov eax,[esi] |
|
147 mov ebx,[esi+4] |
|
148 |
|
149 rol eax,8 |
|
150 mov ecx,[esi+8] |
|
151 |
|
152 rol ebx,8 |
|
153 mov edx,[esi+12] |
|
154 |
|
155 rol ecx,8 |
|
156 mov [edi+0],eax |
|
157 |
|
158 rol edx,8 |
|
159 mov [edi+4],ebx |
|
160 |
|
161 mov [edi+8],ecx |
|
162 mov [edi+12],edx |
|
163 |
|
164 add esi,BYTE 16 |
|
165 add edi,BYTE 16 |
|
166 |
|
167 dec ebp |
|
168 jnz .L4 |
|
169 |
|
170 ; check tail |
|
171 pop ecx |
|
172 and ecx,BYTE 11b |
|
173 jz .L6 |
|
174 |
|
175 .L5 ; tail loop |
|
176 mov edx,[esi] |
|
177 rol edx,8 |
|
178 mov [edi],edx |
|
179 add esi,BYTE 4 |
|
180 add edi,BYTE 4 |
|
181 dec ecx |
|
182 jnz .L5 |
|
183 |
|
184 .L6 pop ebp |
|
185 retn |
|
186 |
|
187 |
|
188 |
|
189 |
|
190 _ConvertX86p32_32BGRA888: |
|
191 |
|
192 ; check short |
|
193 cmp ecx,BYTE 32 |
|
194 ja .L3 |
|
195 |
|
196 .L1 ; short loop |
|
197 mov edx,[esi] |
|
198 bswap edx |
|
199 mov [edi],edx |
|
200 add esi,BYTE 4 |
|
201 add edi,BYTE 4 |
|
202 dec ecx |
|
203 jnz .L1 |
|
204 .L2 |
|
205 retn |
|
206 |
|
207 .L3 ; save ebp |
|
208 push ebp |
|
209 |
|
210 ; unroll four times |
|
211 mov ebp,ecx |
|
212 shr ebp,2 |
|
213 |
|
214 ; save count |
|
215 push ecx |
|
216 |
|
217 .L4 mov eax,[esi] |
|
218 mov ebx,[esi+4] |
|
219 |
|
220 mov ecx,[esi+8] |
|
221 mov edx,[esi+12] |
|
222 |
|
223 bswap eax |
|
224 |
|
225 bswap ebx |
|
226 |
|
227 bswap ecx |
|
228 |
|
229 bswap edx |
|
230 |
|
231 mov [edi+0],eax |
|
232 mov [edi+4],ebx |
|
233 |
|
234 mov [edi+8],ecx |
|
235 mov [edi+12],edx |
|
236 |
|
237 add esi,BYTE 16 |
|
238 add edi,BYTE 16 |
|
239 |
|
240 dec ebp |
|
241 jnz .L4 |
|
242 |
|
243 ; check tail |
|
244 pop ecx |
|
245 and ecx,BYTE 11b |
|
246 jz .L6 |
|
247 |
|
248 .L5 ; tail loop |
|
249 mov edx,[esi] |
|
250 bswap edx |
|
251 mov [edi],edx |
|
252 add esi,BYTE 4 |
|
253 add edi,BYTE 4 |
|
254 dec ecx |
|
255 jnz .L5 |
|
256 |
|
257 .L6 pop ebp |
|
258 retn |
|
259 |
|
260 |
|
261 |
|
262 |
|
263 ;; 32 bit RGB 888 to 24 BIT RGB 888 |
|
264 |
|
265 _ConvertX86p32_24RGB888: |
|
266 |
|
267 ; check short |
|
268 cmp ecx,BYTE 32 |
|
269 ja .L3 |
|
270 |
|
271 .L1 ; short loop |
|
272 mov al,[esi] |
|
273 mov bl,[esi+1] |
|
274 mov dl,[esi+2] |
|
275 mov [edi],al |
|
276 mov [edi+1],bl |
|
277 mov [edi+2],dl |
|
278 add esi,BYTE 4 |
|
279 add edi,BYTE 3 |
|
280 dec ecx |
|
281 jnz .L1 |
|
282 .L2 |
|
283 retn |
|
284 |
|
285 .L3 ; head |
|
286 mov edx,edi |
|
287 and edx,BYTE 11b |
|
288 jz .L4 |
|
289 mov al,[esi] |
|
290 mov bl,[esi+1] |
|
291 mov dl,[esi+2] |
|
292 mov [edi],al |
|
293 mov [edi+1],bl |
|
294 mov [edi+2],dl |
|
295 add esi,BYTE 4 |
|
296 add edi,BYTE 3 |
|
297 dec ecx |
|
298 jmp SHORT .L3 |
|
299 |
|
300 .L4 ; unroll 4 times |
|
301 push ebp |
|
302 mov ebp,ecx |
|
303 shr ebp,2 |
|
304 |
|
305 ; save count |
|
306 push ecx |
|
307 |
|
308 .L5 mov eax,[esi] ; first dword eax = [A][R][G][B] |
|
309 mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
|
310 |
|
311 shl eax,8 ; eax = [R][G][B][.] |
|
312 mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] |
|
313 |
|
314 shl ebx,8 ; ebx = [r][g][b][.] |
|
315 mov al,[esi+4] ; eax = [R][G][B][b] |
|
316 |
|
317 ror eax,8 ; eax = [b][R][G][B] (done) |
|
318 mov bh,[esi+8+1] ; ebx = [r][g][G][.] |
|
319 |
|
320 mov [edi],eax |
|
321 add edi,BYTE 3*4 |
|
322 |
|
323 shl ecx,8 ; ecx = [r][g][b][.] |
|
324 mov bl,[esi+8+0] ; ebx = [r][g][G][B] |
|
325 |
|
326 rol ebx,16 ; ebx = [G][B][r][g] (done) |
|
327 mov cl,[esi+8+2] ; ecx = [r][g][b][R] (done) |
|
328 |
|
329 mov [edi+4-3*4],ebx |
|
330 add esi,BYTE 4*4 |
|
331 |
|
332 mov [edi+8-3*4],ecx |
|
333 dec ebp |
|
334 |
|
335 jnz .L5 |
|
336 |
|
337 ; check tail |
|
338 pop ecx |
|
339 and ecx,BYTE 11b |
|
340 jz .L7 |
|
341 |
|
342 .L6 ; tail loop |
|
343 mov al,[esi] |
|
344 mov bl,[esi+1] |
|
345 mov dl,[esi+2] |
|
346 mov [edi],al |
|
347 mov [edi+1],bl |
|
348 mov [edi+2],dl |
|
349 add esi,BYTE 4 |
|
350 add edi,BYTE 3 |
|
351 dec ecx |
|
352 jnz .L6 |
|
353 |
|
354 .L7 pop ebp |
|
355 retn |
|
356 |
|
357 |
|
358 |
|
359 |
|
360 ;; 32 bit RGB 888 to 24 bit BGR 888 |
|
361 |
|
362 _ConvertX86p32_24BGR888: |
|
363 |
|
364 ; check short |
|
365 cmp ecx,BYTE 32 |
|
366 ja .L3 |
|
367 |
|
368 |
|
369 .L1 ; short loop |
|
370 mov dl,[esi] |
|
371 mov bl,[esi+1] |
|
372 mov al,[esi+2] |
|
373 mov [edi],al |
|
374 mov [edi+1],bl |
|
375 mov [edi+2],dl |
|
376 add esi,BYTE 4 |
|
377 add edi,BYTE 3 |
|
378 dec ecx |
|
379 jnz .L1 |
|
380 .L2 |
|
381 retn |
|
382 |
|
383 .L3 ; head |
|
384 mov edx,edi |
|
385 and edx,BYTE 11b |
|
386 jz .L4 |
|
387 mov dl,[esi] |
|
388 mov bl,[esi+1] |
|
389 mov al,[esi+2] |
|
390 mov [edi],al |
|
391 mov [edi+1],bl |
|
392 mov [edi+2],dl |
|
393 add esi,BYTE 4 |
|
394 add edi,BYTE 3 |
|
395 dec ecx |
|
396 jmp SHORT .L3 |
|
397 |
|
398 .L4 ; unroll 4 times |
|
399 push ebp |
|
400 mov ebp,ecx |
|
401 shr ebp,2 |
|
402 |
|
403 ; save count |
|
404 push ecx |
|
405 |
|
406 .L5 |
|
407 mov eax,[esi] ; first dword eax = [A][R][G][B] |
|
408 mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
|
409 |
|
410 bswap eax ; eax = [B][G][R][A] |
|
411 |
|
412 bswap ebx ; ebx = [b][g][r][a] |
|
413 |
|
414 mov al,[esi+4+2] ; eax = [B][G][R][r] |
|
415 mov bh,[esi+4+4+1] ; ebx = [b][g][G][a] |
|
416 |
|
417 ror eax,8 ; eax = [r][B][G][R] (done) |
|
418 mov bl,[esi+4+4+2] ; ebx = [b][g][G][R] |
|
419 |
|
420 ror ebx,16 ; ebx = [G][R][b][g] (done) |
|
421 mov [edi],eax |
|
422 |
|
423 mov [edi+4],ebx |
|
424 mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] |
|
425 |
|
426 bswap ecx ; ecx = [b][g][r][a] |
|
427 |
|
428 mov cl,[esi+8] ; ecx = [b][g][r][B] (done) |
|
429 add esi,BYTE 4*4 |
|
430 |
|
431 mov [edi+8],ecx |
|
432 add edi,BYTE 3*4 |
|
433 |
|
434 dec ebp |
|
435 jnz .L5 |
|
436 |
|
437 ; check tail |
|
438 pop ecx |
|
439 and ecx,BYTE 11b |
|
440 jz .L7 |
|
441 |
|
442 .L6 ; tail loop |
|
443 mov dl,[esi] |
|
444 mov bl,[esi+1] |
|
445 mov al,[esi+2] |
|
446 mov [edi],al |
|
447 mov [edi+1],bl |
|
448 mov [edi+2],dl |
|
449 add esi,BYTE 4 |
|
450 add edi,BYTE 3 |
|
451 dec ecx |
|
452 jnz .L6 |
|
453 |
|
454 .L7 |
|
455 pop ebp |
|
456 retn |
|
457 |
|
458 |
|
459 |
|
460 |
|
461 ;; 32 bit RGB 888 to 16 BIT RGB 565 |
|
462 |
|
463 _ConvertX86p32_16RGB565: |
|
464 ; check short |
|
465 cmp ecx,BYTE 16 |
|
466 ja .L3 |
|
467 |
|
468 .L1 ; short loop |
|
469 mov bl,[esi+0] ; blue |
|
470 mov al,[esi+1] ; green |
|
471 mov ah,[esi+2] ; red |
|
472 shr ah,3 |
|
473 and al,11111100b |
|
474 shl eax,3 |
|
475 shr bl,3 |
|
476 add al,bl |
|
477 mov [edi+0],al |
|
478 mov [edi+1],ah |
|
479 add esi,BYTE 4 |
|
480 add edi,BYTE 2 |
|
481 dec ecx |
|
482 jnz .L1 |
|
483 |
|
484 .L2: ; End of short loop |
|
485 retn |
|
486 |
|
487 |
|
488 .L3 ; head |
|
489 mov ebx,edi |
|
490 and ebx,BYTE 11b |
|
491 jz .L4 |
|
492 |
|
493 mov bl,[esi+0] ; blue |
|
494 mov al,[esi+1] ; green |
|
495 mov ah,[esi+2] ; red |
|
496 shr ah,3 |
|
497 and al,11111100b |
|
498 shl eax,3 |
|
499 shr bl,3 |
|
500 add al,bl |
|
501 mov [edi+0],al |
|
502 mov [edi+1],ah |
|
503 add esi,BYTE 4 |
|
504 add edi,BYTE 2 |
|
505 dec ecx |
|
506 |
|
507 .L4: |
|
508 ; save count |
|
509 push ecx |
|
510 |
|
511 ; unroll twice |
|
512 shr ecx,1 |
|
513 |
|
514 ; point arrays to end |
|
515 lea esi,[esi+ecx*8] |
|
516 lea edi,[edi+ecx*4] |
|
517 |
|
518 ; negative counter |
|
519 neg ecx |
|
520 jmp SHORT .L6 |
|
521 |
|
522 .L5: |
|
523 mov [edi+ecx*4-4],eax |
|
524 .L6: |
|
525 mov eax,[esi+ecx*8] |
|
526 |
|
527 shr ah,2 |
|
528 mov ebx,[esi+ecx*8+4] |
|
529 |
|
530 shr eax,3 |
|
531 mov edx,[esi+ecx*8+4] |
|
532 |
|
533 shr bh,2 |
|
534 mov dl,[esi+ecx*8+2] |
|
535 |
|
536 shl ebx,13 |
|
537 and eax,000007FFh |
|
538 |
|
539 shl edx,8 |
|
540 and ebx,07FF0000h |
|
541 |
|
542 and edx,0F800F800h |
|
543 add eax,ebx |
|
544 |
|
545 add eax,edx |
|
546 inc ecx |
|
547 |
|
548 jnz .L5 |
|
549 |
|
550 mov [edi+ecx*4-4],eax |
|
551 |
|
552 ; tail |
|
553 pop ecx |
|
554 test cl,1 |
|
555 jz .L7 |
|
556 |
|
557 mov bl,[esi+0] ; blue |
|
558 mov al,[esi+1] ; green |
|
559 mov ah,[esi+2] ; red |
|
560 shr ah,3 |
|
561 and al,11111100b |
|
562 shl eax,3 |
|
563 shr bl,3 |
|
564 add al,bl |
|
565 mov [edi+0],al |
|
566 mov [edi+1],ah |
|
567 add esi,BYTE 4 |
|
568 add edi,BYTE 2 |
|
569 |
|
570 .L7: |
|
571 retn |
|
572 |
|
573 |
|
574 |
|
575 |
|
576 ;; 32 bit RGB 888 to 16 BIT BGR 565 |
|
577 |
|
578 _ConvertX86p32_16BGR565: |
|
579 |
|
580 ; check short |
|
581 cmp ecx,BYTE 16 |
|
582 ja .L3 |
|
583 |
|
584 .L1 ; short loop |
|
585 mov ah,[esi+0] ; blue |
|
586 mov al,[esi+1] ; green |
|
587 mov bl,[esi+2] ; red |
|
588 shr ah,3 |
|
589 and al,11111100b |
|
590 shl eax,3 |
|
591 shr bl,3 |
|
592 add al,bl |
|
593 mov [edi+0],al |
|
594 mov [edi+1],ah |
|
595 add esi,BYTE 4 |
|
596 add edi,BYTE 2 |
|
597 dec ecx |
|
598 jnz .L1 |
|
599 .L2 |
|
600 retn |
|
601 |
|
602 .L3 ; head |
|
603 mov ebx,edi |
|
604 and ebx,BYTE 11b |
|
605 jz .L4 |
|
606 mov ah,[esi+0] ; blue |
|
607 mov al,[esi+1] ; green |
|
608 mov bl,[esi+2] ; red |
|
609 shr ah,3 |
|
610 and al,11111100b |
|
611 shl eax,3 |
|
612 shr bl,3 |
|
613 add al,bl |
|
614 mov [edi+0],al |
|
615 mov [edi+1],ah |
|
616 add esi,BYTE 4 |
|
617 add edi,BYTE 2 |
|
618 dec ecx |
|
619 |
|
620 .L4 ; save count |
|
621 push ecx |
|
622 |
|
623 ; unroll twice |
|
624 shr ecx,1 |
|
625 |
|
626 ; point arrays to end |
|
627 lea esi,[esi+ecx*8] |
|
628 lea edi,[edi+ecx*4] |
|
629 |
|
630 ; negative count |
|
631 neg ecx |
|
632 jmp SHORT .L6 |
|
633 |
|
634 .L5 |
|
635 mov [edi+ecx*4-4],eax |
|
636 .L6 |
|
637 mov edx,[esi+ecx*8+4] |
|
638 |
|
639 mov bh,[esi+ecx*8+4] |
|
640 mov ah,[esi+ecx*8] |
|
641 |
|
642 shr bh,3 |
|
643 mov al,[esi+ecx*8+1] |
|
644 |
|
645 shr ah,3 |
|
646 mov bl,[esi+ecx*8+5] |
|
647 |
|
648 shl eax,3 |
|
649 mov dl,[esi+ecx*8+2] |
|
650 |
|
651 shl ebx,19 |
|
652 and eax,0000FFE0h |
|
653 |
|
654 shr edx,3 |
|
655 and ebx,0FFE00000h |
|
656 |
|
657 and edx,001F001Fh |
|
658 add eax,ebx |
|
659 |
|
660 add eax,edx |
|
661 inc ecx |
|
662 |
|
663 jnz .L5 |
|
664 |
|
665 mov [edi+ecx*4-4],eax |
|
666 |
|
667 ; tail |
|
668 pop ecx |
|
669 and ecx,BYTE 1 |
|
670 jz .L7 |
|
671 mov ah,[esi+0] ; blue |
|
672 mov al,[esi+1] ; green |
|
673 mov bl,[esi+2] ; red |
|
674 shr ah,3 |
|
675 and al,11111100b |
|
676 shl eax,3 |
|
677 shr bl,3 |
|
678 add al,bl |
|
679 mov [edi+0],al |
|
680 mov [edi+1],ah |
|
681 add esi,BYTE 4 |
|
682 add edi,BYTE 2 |
|
683 |
|
684 .L7 |
|
685 retn |
|
686 |
|
687 |
|
688 |
|
689 |
|
690 ;; 32 BIT RGB TO 16 BIT RGB 555 |
|
691 |
|
692 _ConvertX86p32_16RGB555: |
|
693 |
|
694 ; check short |
|
695 cmp ecx,BYTE 16 |
|
696 ja .L3 |
|
697 |
|
698 .L1 ; short loop |
|
699 mov bl,[esi+0] ; blue |
|
700 mov al,[esi+1] ; green |
|
701 mov ah,[esi+2] ; red |
|
702 shr ah,3 |
|
703 and al,11111000b |
|
704 shl eax,2 |
|
705 shr bl,3 |
|
706 add al,bl |
|
707 mov [edi+0],al |
|
708 mov [edi+1],ah |
|
709 add esi,BYTE 4 |
|
710 add edi,BYTE 2 |
|
711 dec ecx |
|
712 jnz .L1 |
|
713 .L2 |
|
714 retn |
|
715 |
|
716 .L3 ; head |
|
717 mov ebx,edi |
|
718 and ebx,BYTE 11b |
|
719 jz .L4 |
|
720 mov bl,[esi+0] ; blue |
|
721 mov al,[esi+1] ; green |
|
722 mov ah,[esi+2] ; red |
|
723 shr ah,3 |
|
724 and al,11111000b |
|
725 shl eax,2 |
|
726 shr bl,3 |
|
727 add al,bl |
|
728 mov [edi+0],al |
|
729 mov [edi+1],ah |
|
730 add esi,BYTE 4 |
|
731 add edi,BYTE 2 |
|
732 dec ecx |
|
733 |
|
734 .L4 ; save count |
|
735 push ecx |
|
736 |
|
737 ; unroll twice |
|
738 shr ecx,1 |
|
739 |
|
740 ; point arrays to end |
|
741 lea esi,[esi+ecx*8] |
|
742 lea edi,[edi+ecx*4] |
|
743 |
|
744 ; negative counter |
|
745 neg ecx |
|
746 jmp SHORT .L6 |
|
747 |
|
748 .L5 |
|
749 mov [edi+ecx*4-4],eax |
|
750 .L6 |
|
751 mov eax,[esi+ecx*8] |
|
752 |
|
753 shr ah,3 |
|
754 mov ebx,[esi+ecx*8+4] |
|
755 |
|
756 shr eax,3 |
|
757 mov edx,[esi+ecx*8+4] |
|
758 |
|
759 shr bh,3 |
|
760 mov dl,[esi+ecx*8+2] |
|
761 |
|
762 shl ebx,13 |
|
763 and eax,000007FFh |
|
764 |
|
765 shl edx,7 |
|
766 and ebx,07FF0000h |
|
767 |
|
768 and edx,07C007C00h |
|
769 add eax,ebx |
|
770 |
|
771 add eax,edx |
|
772 inc ecx |
|
773 |
|
774 jnz .L5 |
|
775 |
|
776 mov [edi+ecx*4-4],eax |
|
777 |
|
778 ; tail |
|
779 pop ecx |
|
780 and ecx,BYTE 1 |
|
781 jz .L7 |
|
782 mov bl,[esi+0] ; blue |
|
783 mov al,[esi+1] ; green |
|
784 mov ah,[esi+2] ; red |
|
785 shr ah,3 |
|
786 and al,11111000b |
|
787 shl eax,2 |
|
788 shr bl,3 |
|
789 add al,bl |
|
790 mov [edi+0],al |
|
791 mov [edi+1],ah |
|
792 add esi,BYTE 4 |
|
793 add edi,BYTE 2 |
|
794 |
|
795 .L7 |
|
796 retn |
|
797 |
|
798 |
|
799 |
|
800 |
|
801 ;; 32 BIT RGB TO 16 BIT BGR 555 |
|
802 |
|
803 _ConvertX86p32_16BGR555: |
|
804 |
|
805 ; check short |
|
806 cmp ecx,BYTE 16 |
|
807 ja .L3 |
|
808 |
|
809 |
|
810 .L1 ; short loop |
|
811 mov ah,[esi+0] ; blue |
|
812 mov al,[esi+1] ; green |
|
813 mov bl,[esi+2] ; red |
|
814 shr ah,3 |
|
815 and al,11111000b |
|
816 shl eax,2 |
|
817 shr bl,3 |
|
818 add al,bl |
|
819 mov [edi+0],al |
|
820 mov [edi+1],ah |
|
821 add esi,BYTE 4 |
|
822 add edi,BYTE 2 |
|
823 dec ecx |
|
824 jnz .L1 |
|
825 .L2 |
|
826 retn |
|
827 |
|
828 .L3 ; head |
|
829 mov ebx,edi |
|
830 and ebx,BYTE 11b |
|
831 jz .L4 |
|
832 mov ah,[esi+0] ; blue |
|
833 mov al,[esi+1] ; green |
|
834 mov bl,[esi+2] ; red |
|
835 shr ah,3 |
|
836 and al,11111000b |
|
837 shl eax,2 |
|
838 shr bl,3 |
|
839 add al,bl |
|
840 mov [edi+0],al |
|
841 mov [edi+1],ah |
|
842 add esi,BYTE 4 |
|
843 add edi,BYTE 2 |
|
844 dec ecx |
|
845 |
|
846 .L4 ; save count |
|
847 push ecx |
|
848 |
|
849 ; unroll twice |
|
850 shr ecx,1 |
|
851 |
|
852 ; point arrays to end |
|
853 lea esi,[esi+ecx*8] |
|
854 lea edi,[edi+ecx*4] |
|
855 |
|
856 ; negative counter |
|
857 neg ecx |
|
858 jmp SHORT .L6 |
|
859 |
|
860 .L5 |
|
861 mov [edi+ecx*4-4],eax |
|
862 .L6 |
|
863 mov edx,[esi+ecx*8+4] |
|
864 |
|
865 mov bh,[esi+ecx*8+4] |
|
866 mov ah,[esi+ecx*8] |
|
867 |
|
868 shr bh,3 |
|
869 mov al,[esi+ecx*8+1] |
|
870 |
|
871 shr ah,3 |
|
872 mov bl,[esi+ecx*8+5] |
|
873 |
|
874 shl eax,2 |
|
875 mov dl,[esi+ecx*8+2] |
|
876 |
|
877 shl ebx,18 |
|
878 and eax,00007FE0h |
|
879 |
|
880 shr edx,3 |
|
881 and ebx,07FE00000h |
|
882 |
|
883 and edx,001F001Fh |
|
884 add eax,ebx |
|
885 |
|
886 add eax,edx |
|
887 inc ecx |
|
888 |
|
889 jnz .L5 |
|
890 |
|
891 mov [edi+ecx*4-4],eax |
|
892 |
|
893 ; tail |
|
894 pop ecx |
|
895 and ecx,BYTE 1 |
|
896 jz .L7 |
|
897 mov ah,[esi+0] ; blue |
|
898 mov al,[esi+1] ; green |
|
899 mov bl,[esi+2] ; red |
|
900 shr ah,3 |
|
901 and al,11111000b |
|
902 shl eax,2 |
|
903 shr bl,3 |
|
904 add al,bl |
|
905 mov [edi+0],al |
|
906 mov [edi+1],ah |
|
907 add esi,BYTE 4 |
|
908 add edi,BYTE 2 |
|
909 |
|
910 .L7 |
|
911 retn |
|
912 |
|
913 |
|
914 |
|
915 |
|
916 |
|
917 ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb) |
|
918 ;; This routine writes FOUR pixels at once (dword) and then, if they exist |
|
919 ;; the trailing three pixels |
|
920 _ConvertX86p32_8RGB332: |
|
921 |
|
922 |
|
923 .L_ALIGNED |
|
924 push ecx |
|
925 |
|
926 shr ecx,2 ; We will draw 4 pixels at once |
|
927 jnz .L1 |
|
928 |
|
929 jmp .L2 ; short jump out of range :( |
|
930 |
|
931 .L1: |
|
932 mov eax,[esi] ; first pair of pixels |
|
933 mov edx,[esi+4] |
|
934 |
|
935 shr dl,6 |
|
936 mov ebx,eax |
|
937 |
|
938 shr al,6 |
|
939 and ah,0e0h |
|
940 |
|
941 shr ebx,16 |
|
942 and dh,0e0h |
|
943 |
|
944 shr ah,3 |
|
945 and bl,0e0h |
|
946 |
|
947 shr dh,3 |
|
948 |
|
949 or al,bl |
|
950 |
|
951 mov ebx,edx |
|
952 or al,ah |
|
953 |
|
954 shr ebx,16 |
|
955 or dl,dh |
|
956 |
|
957 and bl,0e0h |
|
958 |
|
959 or dl,bl |
|
960 |
|
961 mov ah,dl |
|
962 |
|
963 |
|
964 |
|
965 mov ebx,[esi+8] ; second pair of pixels |
|
966 |
|
967 mov edx,ebx |
|
968 and bh,0e0h |
|
969 |
|
970 shr bl,6 |
|
971 and edx,0e00000h |
|
972 |
|
973 shr edx,16 |
|
974 |
|
975 shr bh,3 |
|
976 |
|
977 ror eax,16 |
|
978 or bl,dl |
|
979 |
|
980 mov edx,[esi+12] |
|
981 or bl,bh |
|
982 |
|
983 mov al,bl |
|
984 |
|
985 mov ebx,edx |
|
986 and dh,0e0h |
|
987 |
|
988 shr dl,6 |
|
989 and ebx,0e00000h |
|
990 |
|
991 shr dh,3 |
|
992 mov ah,dl |
|
993 |
|
994 shr ebx,16 |
|
995 or ah,dh |
|
996 |
|
997 or ah,bl |
|
998 |
|
999 rol eax,16 |
|
1000 add esi,BYTE 16 |
|
1001 |
|
1002 mov [edi],eax |
|
1003 add edi,BYTE 4 |
|
1004 |
|
1005 dec ecx |
|
1006 jz .L2 ; L1 out of range for short jump :( |
|
1007 |
|
1008 jmp .L1 |
|
1009 .L2: |
|
1010 |
|
1011 pop ecx |
|
1012 and ecx,BYTE 3 ; mask out number of pixels to draw |
|
1013 |
|
1014 jz .L4 ; Nothing to do anymore |
|
1015 |
|
1016 .L3: |
|
1017 mov eax,[esi] ; single pixel conversion for trailing pixels |
|
1018 |
|
1019 mov ebx,eax |
|
1020 |
|
1021 shr al,6 |
|
1022 and ah,0e0h |
|
1023 |
|
1024 shr ebx,16 |
|
1025 |
|
1026 shr ah,3 |
|
1027 and bl,0e0h |
|
1028 |
|
1029 or al,ah |
|
1030 or al,bl |
|
1031 |
|
1032 mov [edi],al |
|
1033 |
|
1034 inc edi |
|
1035 add esi,BYTE 4 |
|
1036 |
|
1037 dec ecx |
|
1038 jnz .L3 |
|
1039 |
|
1040 .L4: |
|
1041 retn |
|
1042 |
|
1043 %ifidn __OUTPUT_FORMAT__,elf |
|
1044 section .note.GNU-stack noalloc noexec nowrite progbits |
|
1045 %endif |