genericopenlibs/liboil/src/i386/composite_i386.c
branchRCL_3
changeset 56 acd3cd4aaceb
equal deleted inserted replaced
54:4332f0f7be53 56:acd3cd4aaceb
       
     1 /*
       
     2  * LIBOIL - Library of Optimized Inner Loops
       
     3  * Copyright (c) 2005 David A. Schleef <ds@schleef.org>
       
     4  * All rights reserved.
       
     5  *
       
     6  * Redistribution and use in source and binary forms, with or without
       
     7  * modification, are permitted provided that the following conditions
       
     8  * are met:
       
     9  * 1. Redistributions of source code must retain the above copyright
       
    10  *    notice, this list of conditions and the following disclaimer.
       
    11  * 2. Redistributions in binary form must reproduce the above copyright
       
    12  *    notice, this list of conditions and the following disclaimer in the
       
    13  *    documentation and/or other materials provided with the distribution.
       
    14  * 
       
    15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
       
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
       
    17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
       
    19  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
       
    20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
       
    21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       
    22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
       
    23  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
       
    24  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    25  * POSSIBILITY OF SUCH DAMAGE.
       
    26  */
       
    27 //Portions Copyright (c)  2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
       
    28 
       
    29 #ifdef HAVE_CONFIG_H
       
    30 #include "config.h"
       
    31 #endif
       
    32 
       
    33 #include <liboil/liboil.h>
       
    34 #include <liboil/liboilfunction.h>
       
    35 
       
    36 OIL_DECLARE_CLASS (composite_in_argb);
       
    37 OIL_DECLARE_CLASS (composite_in_argb_const_src);
       
    38 OIL_DECLARE_CLASS (composite_in_argb_const_mask);
       
    39 OIL_DECLARE_CLASS (composite_over_argb);
       
    40 OIL_DECLARE_CLASS (composite_over_argb_const_src);
       
    41 OIL_DECLARE_CLASS (composite_add_argb);
       
    42 OIL_DECLARE_CLASS (composite_add_argb_const_src);
       
    43 OIL_DECLARE_CLASS (composite_in_over_argb);
       
    44 OIL_DECLARE_CLASS (composite_in_over_argb_const_src);
       
    45 OIL_DECLARE_CLASS (composite_in_over_argb_const_mask);
       
    46 
       
    47 #if 0
       
    48 static void
       
    49 composite_in_argb_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n)
       
    50 {
       
    51   int i;
       
    52 
       
    53   for(i=0;i<n;i++){
       
    54     dest[i] = ARGB(
       
    55         COMPOSITE_IN(ARGB_A(src[i]), mask[i]),
       
    56         COMPOSITE_IN(ARGB_R(src[i]), mask[i]),
       
    57         COMPOSITE_IN(ARGB_G(src[i]), mask[i]),
       
    58         COMPOSITE_IN(ARGB_B(src[i]), mask[i]));
       
    59   }
       
    60 }
       
    61 OIL_DEFINE_IMPL_FULL (composite_in_argb_mmx, composite_in_argb);
       
    62 #endif
       
    63 
       
    64 /*
       
    65  * This macro loads the constants:
       
    66  * mm7 = { 0, 0, 0, 0 }
       
    67  * mm6 = { 128, 128, 128, 128 }
       
    68  * mm5 = { 255, 255, 255, 255 }
       
    69  */
       
    70 #define MMX_LOAD_CONSTANTS \
       
    71       "  pxor %%mm7, %%mm7\n" \
       
    72       "  movl $0x80808080, %%eax\n" \
       
    73       "  movd %%eax, %%mm6\n" \
       
    74       "  punpcklbw %%mm7, %%mm6\n" \
       
    75       "  movl $0xffffffff, %%eax\n" \
       
    76       "  movd %%eax, %%mm5\n" \
       
    77       "  punpcklbw %%mm7, %%mm5\n"
       
    78 
       
    79 /*
       
    80  * a = muldiv255(a, b)
       
    81  *   a, b are unpacked
       
    82  *   destroys both registers
       
    83  *   requires mm6 set up as above
       
    84  */
       
    85 #define MMX_MULDIV255(a,b) \
       
    86       "  pmullw %%" #b ", %%" #a "\n" \
       
    87       "  paddw %%mm6, %%" #a "\n" \
       
    88       "  movq %%" #a ", %%" #b "\n" \
       
    89       "  psrlw $8, %%" #b "\n" \
       
    90       "  paddw %%" #b ", %%" #a "\n" \
       
    91       "  psrlw $8, %%" #a "\n"
       
    92 
       
    93 static void
       
    94 composite_in_argb_mmx (uint32_t *dest, uint32_t *src, const uint8_t *mask, int n)
       
    95 {
       
    96 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
    97   __asm__ __volatile__ (
       
    98       MMX_LOAD_CONSTANTS
       
    99       "1:\n"
       
   100       "  movd (%2), %%mm0\n"
       
   101       "  punpcklbw %%mm7, %%mm0\n"
       
   102       "  pshufw $0x00, %%mm0, %%mm1\n"
       
   103 
       
   104       "  movd (%1), %%mm2\n"
       
   105       "  punpcklbw %%mm7, %%mm2\n"
       
   106 
       
   107       MMX_MULDIV255(mm2, mm1)
       
   108 
       
   109       "  packuswb %%mm2, %%mm2\n"
       
   110       "  movd %%mm2, (%0)\n"
       
   111       "  addl $4, %0\n"
       
   112       "  addl $4, %1\n"
       
   113       "  addl $1, %2\n"
       
   114       "  decl %3\n"
       
   115       "  jnz 1b\n"
       
   116       "  emms\n"
       
   117       :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
       
   118       :
       
   119       :"eax");
       
   120 #endif
       
   121 }
       
   122 OIL_DEFINE_IMPL_FULL (composite_in_argb_mmx, composite_in_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
       
   123 
       
   124 /* 
       
   125  * This is a different style than the others.  Should be moved elsewhere.
       
   126  */
       
   127 static void
       
   128 composite_in_argb_mmx2 (uint32_t *dest, uint32_t *src, const uint8_t *mask, int n)
       
   129 {
       
   130 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   131   __asm__ __volatile__ (
       
   132       MMX_LOAD_CONSTANTS
       
   133       "1:\n"
       
   134       "  movl (%2), %%eax\n"
       
   135       /* if alpha == 0, write a 0 */
       
   136       "  testl $0x000000ff, %%eax\n"
       
   137       "  je 2f\n"
       
   138       /* if alpha == 0xff, write src value */
       
   139       "  cmp $0xff, %%al\n"
       
   140       "  je 3f\n"
       
   141 
       
   142       "  movd %%eax, %%mm0\n"
       
   143       "  punpcklbw %%mm7, %%mm0\n"
       
   144       "  pshufw $0x00, %%mm0, %%mm1\n"
       
   145 
       
   146       "  movd (%1), %%mm2\n"
       
   147       "  punpcklbw %%mm7, %%mm2\n"
       
   148 
       
   149       MMX_MULDIV255(mm2, mm1)
       
   150 
       
   151       "  packuswb %%mm2, %%mm2\n"
       
   152       "  movd %%mm2, (%0)\n"
       
   153       "  jmp 4f\n"
       
   154       "2:\n"
       
   155       "  movl $0, (%0)\n"
       
   156       "  jmp 4f\n"
       
   157       "3:\n"
       
   158       "  movl (%1), %%eax\n"
       
   159       "  movl %%eax, (%0)\n"
       
   160       "4:\n"
       
   161       "  addl $4, %0\n"
       
   162       "  addl $4, %1\n"
       
   163       "  addl $1, %2\n"
       
   164       "  decl %3\n"
       
   165       "  jnz 1b\n"
       
   166       "  emms\n"
       
   167       :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
       
   168       :
       
   169       :"eax");
       
   170 #endif
       
   171 }
       
   172 OIL_DEFINE_IMPL_FULL (composite_in_argb_mmx2, composite_in_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
       
   173 
       
   174 static void
       
   175 composite_in_argb_const_src_mmx (uint32_t *dest, uint32_t *src, const uint8_t *mask, int n)
       
   176 {
       
   177 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   178   __asm__ __volatile__ (
       
   179       MMX_LOAD_CONSTANTS
       
   180       "  movd (%1), %%mm3\n"
       
   181       "  punpcklbw %%mm7, %%mm3\n"
       
   182       "1:\n"
       
   183       "  movd (%2), %%mm0\n"
       
   184       "  punpcklbw %%mm7, %%mm0\n"
       
   185       "  pshufw $0x00, %%mm0, %%mm1\n"
       
   186 
       
   187       "  movq %%mm3, %%mm2\n"
       
   188 
       
   189       MMX_MULDIV255(mm2, mm1)
       
   190 
       
   191       "  packuswb %%mm2, %%mm2\n"
       
   192       "  movd %%mm2, (%0)\n"
       
   193       "  addl $4, %0\n"
       
   194       "  addl $1, %2\n"
       
   195       "  decl %3\n"
       
   196       "  jnz 1b\n"
       
   197       "  emms\n"
       
   198       :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
       
   199       :
       
   200       :"eax");
       
   201 #endif
       
   202 }
       
   203 OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_mmx, composite_in_argb_const_src, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
       
   204 
       
   205 static void
       
   206 composite_in_argb_const_mask_mmx (uint32_t *dest, uint32_t *src, const uint8_t *mask, int n)
       
   207 {
       
   208 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   209   __asm__ __volatile__ (
       
   210       MMX_LOAD_CONSTANTS
       
   211       "  movd (%2), %%mm0\n"
       
   212       "  punpcklbw %%mm7, %%mm0\n"
       
   213       "  pshufw $0x00, %%mm0, %%mm3\n"
       
   214       "1:\n"
       
   215       "  movq %%mm3, %%mm1\n"
       
   216       "  movd (%1), %%mm2\n"
       
   217       "  punpcklbw %%mm7, %%mm2\n"
       
   218 
       
   219       MMX_MULDIV255(mm2, mm1)
       
   220 
       
   221       "  packuswb %%mm2, %%mm2\n"
       
   222       "  movd %%mm2, (%0)\n"
       
   223       "  addl $4, %0\n"
       
   224       "  addl $4, %1\n"
       
   225       "  decl %3\n"
       
   226       "  jnz 1b\n"
       
   227       "  emms\n"
       
   228       :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
       
   229       :
       
   230       :"eax");
       
   231 #endif
       
   232 }
       
   233 OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_mmx, composite_in_argb_const_mask, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
       
   234 
       
   235 static void
       
   236 composite_over_argb_mmx (uint32_t *dest, uint32_t *src, int n)
       
   237 {
       
   238 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   239   __asm__ __volatile__ (
       
   240       MMX_LOAD_CONSTANTS
       
   241       "1:\n"
       
   242       "  movl (%1), %%eax\n"
       
   243       "  testl $0xff000000, %%eax\n"
       
   244       "  jz 2f\n"
       
   245 
       
   246       "  movd %%eax, %%mm0\n"
       
   247       "  punpcklbw %%mm7, %%mm0\n"
       
   248       "  pshufw $0xff, %%mm0, %%mm1\n"
       
   249       "  pxor %%mm5, %%mm1\n"
       
   250 
       
   251       "  movd (%0), %%mm2\n"
       
   252       "  punpcklbw %%mm7, %%mm2\n"
       
   253 
       
   254       MMX_MULDIV255(mm2, mm1)
       
   255 
       
   256       "  paddw %%mm0, %%mm2\n"
       
   257       "  packuswb %%mm2, %%mm2\n"
       
   258 
       
   259       "  movd %%mm2, (%0)\n"
       
   260       "2:\n"
       
   261       "  addl $4, %0\n"
       
   262       "  addl $4, %1\n"
       
   263       "  decl %2\n"
       
   264       "  jnz 1b\n"
       
   265       "  emms\n"
       
   266       :"+r" (dest), "+r" (src), "+r" (n)
       
   267       :
       
   268       :"eax");
       
   269 #endif
       
   270 }
       
   271 OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx, composite_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
       
   272 
       
   273 /* unroll 2 */
       
   274 static void
       
   275 composite_over_argb_mmx_2 (uint32_t *dest, uint32_t *src, int n)
       
   276 {
       
   277 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   278   __asm__ __volatile__ (
       
   279       MMX_LOAD_CONSTANTS
       
   280 
       
   281       "  testl $0x1, %2\n"
       
   282       "  jz 2f\n"
       
   283 
       
   284       "  movl (%1), %%eax\n"
       
   285       "  testl $0xff000000, %%eax\n"
       
   286       "  jz 1f\n"
       
   287 
       
   288       "  movd %%eax, %%mm0\n"
       
   289       "  punpcklbw %%mm7, %%mm0\n"
       
   290       "  pshufw $0xff, %%mm0, %%mm1\n"
       
   291       "  pxor %%mm5, %%mm1\n"
       
   292 
       
   293       "  movd (%0), %%mm2\n"
       
   294       "  punpcklbw %%mm7, %%mm2\n"
       
   295       "  pmullw %%mm1, %%mm2\n"
       
   296       "  paddw %%mm6, %%mm2\n"
       
   297       "  movq %%mm2, %%mm1\n"
       
   298       "  psrlw $8, %%mm1\n"
       
   299       "  paddw %%mm1, %%mm2\n"
       
   300       "  psrlw $8, %%mm2\n"
       
   301 
       
   302       "  paddw %%mm0, %%mm2\n"
       
   303       "  packuswb %%mm2, %%mm2\n"
       
   304 
       
   305       "  movd %%mm2, (%0)\n"
       
   306 
       
   307       "1:\n"
       
   308       "  addl $4, %0\n"
       
   309       "  addl $4, %1\n"
       
   310 
       
   311       "2:\n"
       
   312       "  shr $1, %2\n"
       
   313       "  jz 5f\n"
       
   314       "3:\n"
       
   315       "  movl (%1), %%eax\n"
       
   316       "  orl 4(%1), %%eax\n"
       
   317       "  testl $0xff000000, %%eax\n"
       
   318       "  jz 4f\n"
       
   319 
       
   320       "  movd (%1), %%mm0\n"
       
   321       "  movd (%0), %%mm2\n"
       
   322 
       
   323       "  punpcklbw %%mm7, %%mm0\n"
       
   324       "   movd 4(%1), %%mm3\n"
       
   325 
       
   326       "  pshufw $0xff, %%mm0, %%mm1\n"
       
   327       "  punpcklbw %%mm7, %%mm2\n"
       
   328 
       
   329       "  pxor %%mm5, %%mm1\n"
       
   330       "   movd 4(%0), %%mm4\n"
       
   331 
       
   332       "  pmullw %%mm1, %%mm2\n"
       
   333       "   punpcklbw %%mm7, %%mm3\n"
       
   334 
       
   335       "  paddw %%mm6, %%mm2\n"
       
   336       "   punpcklbw %%mm7, %%mm4\n"
       
   337 
       
   338       "  movq %%mm2, %%mm1\n"
       
   339       "   pshufw $0xff, %%mm3, %%mm7\n"
       
   340 
       
   341       "  psrlw $8, %%mm1\n"
       
   342       "   pxor %%mm5, %%mm7\n"
       
   343 
       
   344       "  paddw %%mm1, %%mm2\n"
       
   345       "   pmullw %%mm7, %%mm4\n"
       
   346 
       
   347       "  psrlw $8, %%mm2\n"
       
   348       "   paddw %%mm6, %%mm4\n"
       
   349 
       
   350       "  paddw %%mm0, %%mm2\n"
       
   351       "   movq %%mm4, %%mm7\n"
       
   352 
       
   353       "  packuswb %%mm2, %%mm2\n"
       
   354       "   psrlw $8, %%mm7\n"
       
   355 
       
   356       "  movd %%mm2, (%0)\n"
       
   357       "   paddw %%mm7, %%mm4\n"
       
   358 
       
   359       "   psrlw $8, %%mm4\n"
       
   360       "   paddw %%mm3, %%mm4\n"
       
   361       "   packuswb %%mm4, %%mm4\n"
       
   362       "   movd %%mm4, 4(%0)\n"
       
   363 
       
   364       "  pxor %%mm7, %%mm7\n"
       
   365       "4:\n"
       
   366       "  addl $8, %0\n"
       
   367       "  addl $8, %1\n"
       
   368       "  decl %2\n"
       
   369       "  jnz 3b\n"
       
   370       "5:\n"
       
   371       "  emms\n"
       
   372       :"+r" (dest), "+r" (src), "+r" (n)
       
   373       :
       
   374       :"eax");
       
   375 #endif
       
   376 }
       
   377 OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx_2, composite_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
       
   378 
       
   379 /* replace pshufw with punpck */
       
   380 static void
       
   381 composite_over_argb_mmx_3 (uint32_t *dest, uint32_t *src, int n)
       
   382 {
       
   383 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   384   __asm__ __volatile__ (
       
   385       MMX_LOAD_CONSTANTS
       
   386       "1:\n"
       
   387       "  movl (%1), %%eax\n"
       
   388       "  testl $0xff000000, %%eax\n"
       
   389       "  jz 2f\n"
       
   390 
       
   391       "  movd %%eax, %%mm0\n"
       
   392       "  punpcklbw %%mm7, %%mm0\n"
       
   393       "  movq %%mm0, %%mm1\n"
       
   394       "  punpckhwd %%mm1, %%mm1\n"
       
   395       "  punpckhdq %%mm1, %%mm1\n"
       
   396       "  pxor %%mm5, %%mm1\n"
       
   397 
       
   398       "  movd (%0), %%mm2\n"
       
   399       "  punpcklbw %%mm7, %%mm2\n"
       
   400       "  pmullw %%mm1, %%mm2\n"
       
   401       "  paddw %%mm6, %%mm2\n"
       
   402       "  movq %%mm2, %%mm1\n"
       
   403       "  psrlw $8, %%mm1\n"
       
   404       "  paddw %%mm1, %%mm2\n"
       
   405       "  psrlw $8, %%mm2\n"
       
   406 
       
   407       "  paddw %%mm0, %%mm2\n"
       
   408       "  packuswb %%mm2, %%mm2\n"
       
   409       "  movd %%mm2, (%0)\n"
       
   410 
       
   411       "2:\n"
       
   412       "  addl $4, %0\n"
       
   413       "  addl $4, %1\n"
       
   414       "  decl %2\n"
       
   415       "  jnz 1b\n"
       
   416       "  emms\n"
       
   417       :"+r" (dest), "+r" (src), "+r" (n)
       
   418       :
       
   419       :"eax");
       
   420 #endif
       
   421 }
       
   422 OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx_3, composite_over_argb, OIL_IMPL_FLAG_MMX);
       
   423 
       
   424 /* written for gromit */
       
   425 static void
       
   426 composite_over_argb_mmx_4 (uint32_t *dest, uint32_t *src, int n)
       
   427 {
       
   428 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   429   __asm__ __volatile__ ("  pxor %%mm7, %%mm7\n"   // mm7 = { 0, 0, 0, 0 }
       
   430       "  movl $0x80808080, %%eax\n"
       
   431       "  movd %%eax, %%mm6\n"  // mm6 = { 128, 128, 128, 128 }
       
   432       "  punpcklbw %%mm7, %%mm6\n"
       
   433       "  movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
       
   434       "  movd %%eax, %%mm5\n"
       
   435       "  punpcklbw %%mm7, %%mm5\n"
       
   436       "  movl $0x02020202, %%eax\n"
       
   437       "  movd %%eax, %%mm4\n"
       
   438       "  punpcklbw %%mm7, %%mm4\n"
       
   439       "  paddw %%mm5, %%mm4\n" // mm5 = { 257, 257, 257, 257 }
       
   440       "1:\n"
       
   441       "  movl (%1), %%eax\n"
       
   442       "  testl $0xff000000, %%eax\n"
       
   443       "  jz 2f\n"
       
   444 
       
   445       "  movd %%eax, %%mm0\n"
       
   446       "  punpcklbw %%mm7, %%mm0\n"
       
   447       "  pshufw $0xff, %%mm0, %%mm1\n"
       
   448       "  pxor %%mm5, %%mm1\n"
       
   449 
       
   450       "  movd (%0), %%mm2\n"
       
   451       "  punpcklbw %%mm7, %%mm2\n"
       
   452       "  pmullw %%mm1, %%mm2\n"
       
   453       "  paddw %%mm6, %%mm2\n"
       
   454       "  pmulhuw %%mm4, %%mm2\n"
       
   455 
       
   456       "  paddw %%mm0, %%mm2\n"
       
   457       "  packuswb %%mm2, %%mm2\n"
       
   458 
       
   459       "  movd %%mm2, (%0)\n"
       
   460       "2:\n"
       
   461       "  addl $4, %0\n"
       
   462       "  addl $4, %1\n"
       
   463       "  subl $1, %2\n"
       
   464       "  jnz 1b\n"
       
   465       "  emms\n"
       
   466       :"+r" (dest), "+r" (src), "+r" (n)
       
   467       :
       
   468       :"eax");
       
   469 #endif
       
   470 }
       
   471 OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx_4, composite_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
       
   472 
       
   473 static void
       
   474 composite_over_argb_mmx_5 (uint32_t *dest, uint32_t *src, int n)
       
   475 {
       
   476 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   477   __asm__ __volatile__ ("  pxor %%mm7, %%mm7\n"   // mm7 = { 0, 0, 0, 0 }
       
   478       "  movl $0x80808080, %%eax\n"
       
   479       "  movd %%eax, %%mm6\n"  // mm6 = { 128, 128, 128, 128 }
       
   480       "  punpcklbw %%mm7, %%mm6\n"
       
   481 #if 0
       
   482       "  movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
       
   483       "  movd %%eax, %%mm5\n"
       
   484       "  punpcklbw %%mm7, %%mm5\n"
       
   485 #else
       
   486       "  pcmpeqw %%mm5, %%mm5\n"
       
   487       "  psrlw $8, %%mm5\n" // mm5 = { 255, 255, 255, 255 }
       
   488 #endif
       
   489       "  movl $0x02020202, %%eax\n"
       
   490       "  movd %%eax, %%mm4\n"
       
   491       "  punpcklbw %%mm7, %%mm4\n"
       
   492       "  paddw %%mm5, %%mm4\n" // mm5 = { 257, 257, 257, 257 }
       
   493       "1:\n"
       
   494       "  movd (%1), %%mm0\n"
       
   495       "  punpcklbw %%mm7, %%mm0\n"
       
   496       "  xor %%eax, %%eax\n"
       
   497       "  pextrw $3, %%mm0, %%eax\n"
       
   498       "  test %%eax, %%eax\n"
       
   499       "  jz 2f\n"
       
   500 
       
   501       "  pshufw $0xff, %%mm0, %%mm1\n"
       
   502       "  pxor %%mm5, %%mm1\n"
       
   503 
       
   504       "  movd (%0), %%mm2\n"
       
   505       "  punpcklbw %%mm7, %%mm2\n"
       
   506       "  pmullw %%mm1, %%mm2\n"
       
   507       "  paddw %%mm6, %%mm2\n"
       
   508       "  pmulhuw %%mm4, %%mm2\n"
       
   509 
       
   510       "  paddw %%mm0, %%mm2\n"
       
   511       "  packuswb %%mm2, %%mm2\n"
       
   512 
       
   513       "  movd %%mm2, (%0)\n"
       
   514       "2:\n"
       
   515       "  addl $4, %0\n"
       
   516       "  addl $4, %1\n"
       
   517       "  subl $1, %2\n"
       
   518       "  jnz 1b\n"
       
   519       "  emms\n"
       
   520       :"+r" (dest), "+r" (src), "+r" (n)
       
   521       :
       
   522       :"eax");
       
   523 #endif
       
   524 }
       
   525 OIL_DEFINE_IMPL_FULL (composite_over_argb_mmx_5, composite_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
       
   526 
       
   527 static void
       
   528 composite_over_argb_sse2 (uint32_t *dest, uint32_t *src, int n)
       
   529 {
       
   530 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   531   __asm__ __volatile__ ("  pxor %%xmm7, %%xmm7\n"   // mm7 = { 0, 0, 0, 0 }
       
   532       "  movl $0x80808080, %%eax\n"
       
   533       "  movd %%eax, %%xmm6\n"  // mm6 = { 128, 128, 128, 128 }
       
   534       "  punpcklbw %%xmm7, %%xmm6\n"
       
   535       "  movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
       
   536       "  movd %%eax, %%xmm5\n"
       
   537       "  punpcklbw %%xmm7, %%xmm5\n"
       
   538       "  movl $0x02020202, %%eax\n"
       
   539       "  movd %%eax, %%xmm4\n"
       
   540       "  punpcklbw %%xmm7, %%xmm4\n"
       
   541       "  paddw %%xmm5, %%xmm4\n" // mm4 = { 255, 255, 255, 255 }
       
   542       "1:\n"
       
   543       "  movl (%1), %%eax\n"
       
   544       "  testl $0xff000000, %%eax\n"
       
   545       "  jz 2f\n"
       
   546 
       
   547       "  movd (%1), %%xmm1\n"
       
   548       "  punpcklbw %%xmm7, %%xmm1\n"
       
   549       "  pshuflw $0xff, %%xmm1, %%xmm0\n"
       
   550       "  pxor %%xmm5, %%xmm0\n"
       
   551 
       
   552       "  movd (%0), %%xmm3\n"
       
   553       "  punpcklbw %%xmm7, %%xmm3\n"
       
   554       "  pmullw %%xmm0, %%xmm3\n"
       
   555       "  paddw %%xmm6, %%xmm3\n"
       
   556       "  pmulhuw %%xmm4, %%xmm3\n"
       
   557 
       
   558       "  paddw %%xmm1, %%xmm3\n"
       
   559       "  packuswb %%xmm3, %%xmm3\n"
       
   560       "  movd %%xmm3, (%0)\n"
       
   561       "2:\n"
       
   562       "  addl $4, %0\n"
       
   563       "  addl $4, %1\n"
       
   564       "  decl %2\n"
       
   565       "  jnz 1b\n"
       
   566       :"+r" (dest), "+r" (src), "+r" (n)
       
   567       :
       
   568       :"eax");
       
   569 #endif
       
   570 }
       
   571 OIL_DEFINE_IMPL_FULL (composite_over_argb_sse2, composite_over_argb, OIL_IMPL_FLAG_SSE2);
       
   572 
       
   573 /* written for shaun */
       
   574 static void
       
   575 composite_over_argb_sse2_2 (uint32_t *dest, uint32_t *src, int n)
       
   576 {
       
   577 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   578   __asm__ __volatile__ ("  pxor %%xmm7, %%xmm7\n"   // mm7 = { 0, 0, 0, 0 }
       
   579       "  movl $0x80808080, %%eax\n"
       
   580       "  movd %%eax, %%xmm6\n"  // mm6 = { 128, 128, 128, 128 }
       
   581       "  punpcklbw %%xmm7, %%xmm6\n"
       
   582       "  punpcklwd %%xmm6, %%xmm6\n"
       
   583       "  movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
       
   584       "  movd %%eax, %%xmm5\n"
       
   585       "  punpcklbw %%xmm7, %%xmm5\n"
       
   586       "  punpcklwd %%xmm5, %%xmm5\n"
       
   587       "  movl $0x02020202, %%eax\n"
       
   588       "  movd %%eax, %%xmm4\n"
       
   589       "  punpcklbw %%xmm7, %%xmm4\n"
       
   590       "  paddw %%xmm5, %%xmm4\n" // mm4 = { 257, 257, 257, 257 }
       
   591       "  punpcklwd %%xmm4, %%xmm4\n"
       
   592       :
       
   593       :
       
   594       :"eax");
       
   595 
       
   596   if (n&1) {
       
   597     __asm__ __volatile__ (
       
   598       "  movl (%1), %%eax\n"
       
   599       "  testl $0xff000000, %%eax\n"
       
   600       "  jz 1f\n"
       
   601 
       
   602       "  movd (%1), %%xmm1\n"
       
   603       "  punpcklbw %%xmm7, %%xmm1\n"
       
   604       "  pshuflw $0xff, %%xmm1, %%xmm0\n"
       
   605       "  pxor %%xmm5, %%xmm0\n"
       
   606 
       
   607       "  movd (%0), %%xmm3\n"
       
   608       "  punpcklbw %%xmm7, %%xmm3\n"
       
   609       "  pmullw %%xmm0, %%xmm3\n"
       
   610       "  paddw %%xmm6, %%xmm3\n"
       
   611       "  pmulhuw %%xmm4, %%xmm3\n"
       
   612 
       
   613       "  paddw %%xmm1, %%xmm3\n"
       
   614       "  packuswb %%xmm3, %%xmm3\n"
       
   615       "  movd %%xmm3, (%0)\n"
       
   616 
       
   617       "1:\n"
       
   618       "  addl $4, %0\n"
       
   619       "  addl $4, %1\n"
       
   620       :"+r" (dest), "+r" (src)
       
   621       :
       
   622       :"eax");
       
   623   }
       
   624   n>>=1;
       
   625 
       
   626   if (n>0){
       
   627     __asm__ __volatile__ ("\n"
       
   628       "3:\n"
       
   629 #if 0
       
   630       "  movl (%1), %%eax\n"
       
   631       "  orl 4(%1), %%eax\n"
       
   632       "  testl $0xff000000, %%eax\n"
       
   633       "  jz 4f\n"
       
   634 #endif
       
   635 
       
   636       "  movq (%1), %%xmm1\n"
       
   637       "  punpcklbw %%xmm7, %%xmm1\n"
       
   638       "  pshuflw $0xff, %%xmm1, %%xmm0\n"
       
   639       "  pshufhw $0xff, %%xmm0, %%xmm0\n"
       
   640       "  pxor %%xmm5, %%xmm0\n"
       
   641 
       
   642       "  movq (%0), %%xmm3\n"
       
   643       "  punpcklbw %%xmm7, %%xmm3\n"
       
   644       "  pmullw %%xmm0, %%xmm3\n"
       
   645       "  paddw %%xmm6, %%xmm3\n"
       
   646       "  pmulhuw %%xmm4, %%xmm3\n"
       
   647       "  paddw %%xmm1, %%xmm3\n"
       
   648       "  packuswb %%xmm3, %%xmm3\n"
       
   649       "  movq %%xmm3, (%0)\n"
       
   650       "4:\n"
       
   651       "  addl $8, %0\n"
       
   652       "  addl $8, %1\n"
       
   653       "  subl $1, %2\n"
       
   654       "  jnz 3b\n"
       
   655       :"+r" (dest), "+r" (src), "+r" (n)
       
   656       :
       
   657       :"eax");
       
   658   }
       
   659 #endif
       
   660 }
       
   661 OIL_DEFINE_IMPL_FULL (composite_over_argb_sse2_2, composite_over_argb, OIL_IMPL_FLAG_SSE2);
       
   662 
       
   663 /* written for shaun */
       
   664 static void
       
   665 composite_over_argb_sse2_3 (uint32_t *dest, uint32_t *src, int n)
       
   666 {
       
   667   int begin;
       
   668   int middle;
       
   669   int end;
       
   670 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   671   __asm__ __volatile__ ("  pxor %%xmm7, %%xmm7\n"   // mm7 = { 0, 0, 0, 0 }
       
   672       "  movl $0x80808080, %%eax\n"
       
   673       "  movd %%eax, %%xmm6\n"  // mm6 = { 128, 128, 128, 128 }
       
   674       "  punpcklbw %%xmm7, %%xmm6\n"
       
   675       "  punpcklwd %%xmm6, %%xmm6\n"
       
   676       "  movl $0xffffffff, %%eax\n" // mm5 = { 255, 255, 255, 255 }
       
   677       "  movd %%eax, %%xmm5\n"
       
   678       "  punpcklbw %%xmm7, %%xmm5\n"
       
   679       "  punpcklwd %%xmm5, %%xmm5\n"
       
   680       "  movl $0x02020202, %%eax\n"
       
   681       "  movd %%eax, %%xmm4\n"
       
   682       "  punpcklbw %%xmm7, %%xmm4\n"
       
   683       "  paddw %%xmm5, %%xmm4\n" // mm4 = { 257, 257, 257, 257 }
       
   684       "  punpcklwd %%xmm4, %%xmm4\n"
       
   685       :
       
   686       :
       
   687       :"eax");
       
   688 
       
   689   begin = 0x3 & (4 - (((unsigned long)dest & 0xf) >> 2));
       
   690   if (begin>n) {
       
   691     begin = n;
       
   692     middle = 0;
       
   693     end = 0;
       
   694   } else {
       
   695     middle = (n-begin)>>2;
       
   696     end = n - begin - middle*4;
       
   697   }
       
   698 
       
   699   if (begin>0) {
       
   700     __asm__ __volatile__ ("\n"
       
   701       "1:\n"
       
   702       "  movl (%1), %%eax\n"
       
   703       "  testl $0xff000000, %%eax\n"
       
   704       "  jz 2f\n"
       
   705 
       
   706       "  movd (%1), %%xmm1\n"
       
   707       "  punpcklbw %%xmm7, %%xmm1\n"
       
   708       "  pshuflw $0xff, %%xmm1, %%xmm0\n"
       
   709       "  pxor %%xmm5, %%xmm0\n"
       
   710 
       
   711       "  movd (%0), %%xmm3\n"
       
   712       "  punpcklbw %%xmm7, %%xmm3\n"
       
   713       "  pmullw %%xmm0, %%xmm3\n"
       
   714       "  paddw %%xmm6, %%xmm3\n"
       
   715       "  pmulhuw %%xmm4, %%xmm3\n"
       
   716 
       
   717       "  paddw %%xmm1, %%xmm3\n"
       
   718       "  packuswb %%xmm3, %%xmm3\n"
       
   719       "  movd %%xmm3, (%0)\n"
       
   720 
       
   721       "2:\n"
       
   722       "  addl $4, %0\n"
       
   723       "  addl $4, %1\n"
       
   724       "  subl $1, %2\n"
       
   725       "  jnz 1b\n"
       
   726       :"+r" (dest), "+r" (src), "+r" (begin)
       
   727       :
       
   728       :"eax");
       
   729   }
       
   730 
       
   731   if (middle>0){
       
   732     __asm__ __volatile__ ("\n"
       
   733       "1:\n"
       
   734       "  movq (%1), %%xmm1\n"
       
   735       "  movq 8(%1), %%xmm0\n"
       
   736       "  movl (%1), %%eax\n"
       
   737       "  orl 4(%1), %%eax\n"
       
   738       "  orl 8(%1), %%eax\n"
       
   739       "  orl 12(%1), %%eax\n"
       
   740       "  test $0xff000000, %%eax\n"
       
   741       "  jz 2f\n"
       
   742       "  punpcklbw %%xmm7, %%xmm1\n"
       
   743       "  punpcklbw %%xmm7, %%xmm0\n"
       
   744       "  pshuflw $0xff, %%xmm1, %%xmm1\n"
       
   745       "  pshuflw $0xff, %%xmm0, %%xmm0\n"
       
   746       "  pshufhw $0xff, %%xmm1, %%xmm1\n"
       
   747       "  pshufhw $0xff, %%xmm0, %%xmm0\n"
       
   748 
       
   749       "  pxor %%xmm5, %%xmm1\n"
       
   750       "  pxor %%xmm5, %%xmm0\n"
       
   751 
       
   752       "  movq (%0), %%xmm3\n"
       
   753       "  movq 8(%0), %%xmm2\n"
       
   754       "  punpcklbw %%xmm7, %%xmm3\n"
       
   755       "  punpcklbw %%xmm7, %%xmm2\n"
       
   756 
       
   757       "  pmullw %%xmm1, %%xmm3\n"
       
   758       "  paddw %%xmm6, %%xmm3\n"
       
   759       "  pmulhuw %%xmm4, %%xmm3\n"
       
   760       "  pmullw %%xmm0, %%xmm2\n"
       
   761       "  paddw %%xmm6, %%xmm2\n"
       
   762       "  pmulhuw %%xmm4, %%xmm2\n"
       
   763       "  packuswb %%xmm2, %%xmm3\n"
       
   764 
       
   765       "  movdqu (%1), %%xmm1\n"
       
   766       "  paddb %%xmm1, %%xmm3\n"
       
   767       "  movdqa %%xmm3, (%0)\n"
       
   768       "2:\n"
       
   769       "  addl $16, %0\n"
       
   770       "  addl $16, %1\n"
       
   771       "  subl $1, %2\n"
       
   772       "  jnz 1b\n"
       
   773       :"+r" (dest), "+r" (src), "+r" (middle)
       
   774       :
       
   775       :"eax");
       
   776   }
       
   777   if (end>0) {
       
   778     __asm__ __volatile__ ("\n"
       
   779       "1:\n"
       
   780       "  movl (%1), %%eax\n"
       
   781       "  testl $0xff000000, %%eax\n"
       
   782       "  jz 2f\n"
       
   783 
       
   784       "  movd (%1), %%xmm1\n"
       
   785       "  punpcklbw %%xmm7, %%xmm1\n"
       
   786       "  pshuflw $0xff, %%xmm1, %%xmm0\n"
       
   787       "  pxor %%xmm5, %%xmm0\n"
       
   788 
       
   789       "  movd (%0), %%xmm3\n"
       
   790       "  punpcklbw %%xmm7, %%xmm3\n"
       
   791       "  pmullw %%xmm0, %%xmm3\n"
       
   792       "  paddw %%xmm6, %%xmm3\n"
       
   793       "  pmulhuw %%xmm4, %%xmm3\n"
       
   794 
       
   795       "  paddw %%xmm1, %%xmm3\n"
       
   796       "  packuswb %%xmm3, %%xmm3\n"
       
   797       "  movd %%xmm3, (%0)\n"
       
   798 
       
   799       "2:\n"
       
   800       "  addl $4, %0\n"
       
   801       "  addl $4, %1\n"
       
   802       "  subl $1, %2\n"
       
   803       "  jnz 1b\n"
       
   804       :"+r" (dest), "+r" (src), "+r" (end)
       
   805       :
       
   806       :"eax");
       
   807   }
       
   808 #endif
       
   809 }
       
   810 OIL_DEFINE_IMPL_FULL (composite_over_argb_sse2_3, composite_over_argb, OIL_IMPL_FLAG_SSE2);
       
   811 
       
   812 
       
   813 static void
       
   814 composite_over_argb_const_src_mmx (uint32_t *dest, uint32_t *src, int n)
       
   815 {
       
   816 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   817   __asm__ __volatile__ (
       
   818       MMX_LOAD_CONSTANTS
       
   819       "  movl (%1), %%eax\n"
       
   820       "  movd %%eax, %%mm0\n"
       
   821       "  punpcklbw %%mm7, %%mm0\n"
       
   822       "  pshufw $0xff, %%mm0, %%mm3\n"
       
   823       "  pxor %%mm5, %%mm3\n"
       
   824       "1:\n"
       
   825       "  movq %%mm3, %%mm1\n"
       
   826       "  movd (%0), %%mm2\n"
       
   827       "  punpcklbw %%mm7, %%mm2\n"
       
   828 
       
   829       MMX_MULDIV255(mm2, mm1)
       
   830 
       
   831       "  paddw %%mm0, %%mm2\n"
       
   832       "  packuswb %%mm2, %%mm2\n"
       
   833 
       
   834       "  movd %%mm2, (%0)\n"
       
   835       "  addl $4, %0\n"
       
   836       "  decl %2\n"
       
   837       "  jnz 1b\n"
       
   838       "  emms\n"
       
   839       :"+r" (dest), "+r" (src), "+r" (n)
       
   840       :
       
   841       :"eax");
       
   842 #endif
       
   843 }
       
   844 OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_mmx, composite_over_argb_const_src, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
       
   845 
       
   846 static void
       
   847 composite_add_argb_mmx (uint32_t *dest, uint32_t *src, int n)
       
   848 {
       
   849 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   850   __asm__ __volatile__ (
       
   851       "1:\n"
       
   852       "  movd (%1), %%mm0\n"
       
   853       "  movd (%0), %%mm2\n"
       
   854       "  paddusb %%mm0, %%mm2\n"
       
   855       "  movd %%mm2, (%0)\n"
       
   856       "  addl $4, %0\n"
       
   857       "  addl $4, %1\n"
       
   858       "  decl %2\n"
       
   859       "  jnz 1b\n"
       
   860       "  emms\n"
       
   861       :"+r" (dest), "+r" (src), "+r" (n)
       
   862       :
       
   863       :"eax");
       
   864 #endif
       
   865 }
       
   866 OIL_DEFINE_IMPL_FULL (composite_add_argb_mmx, composite_add_argb, OIL_IMPL_FLAG_MMX);
       
   867 
       
   868 static void
       
   869 composite_add_argb_const_src_mmx (uint32_t *dest, uint32_t *src, int n)
       
   870 {
       
   871 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   872   __asm__ __volatile__ (
       
   873       "  movd (%1), %%mm0\n"
       
   874       "1:\n"
       
   875       "  movd (%0), %%mm2\n"
       
   876       "  paddusb %%mm0, %%mm2\n"
       
   877       "  movd %%mm2, (%0)\n"
       
   878       "  addl $4, %0\n"
       
   879       "  decl %2\n"
       
   880       "  jnz 1b\n"
       
   881       "  emms\n"
       
   882       :"+r" (dest), "+r" (src), "+r" (n)
       
   883       :
       
   884       :"eax");
       
   885 #endif
       
   886 }
       
   887 OIL_DEFINE_IMPL_FULL (composite_add_argb_const_src_mmx, composite_add_argb_const_src, OIL_IMPL_FLAG_MMX);
       
   888 
       
   889 static void
       
   890 composite_in_over_argb_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n)
       
   891 {
       
   892 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   893   __asm__ __volatile__ (
       
   894       MMX_LOAD_CONSTANTS
       
   895       "1:\n"
       
   896       "  movd (%2), %%mm0\n"
       
   897       "  punpcklbw %%mm7, %%mm0\n"
       
   898       "  pshufw $0x00, %%mm0, %%mm1\n"
       
   899 
       
   900       "  movd (%1), %%mm2\n"
       
   901       "  punpcklbw %%mm7, %%mm2\n"
       
   902 
       
   903       MMX_MULDIV255(mm2, mm1)
       
   904 
       
   905       "  movd (%0), %%mm0\n"
       
   906       "  punpcklbw %%mm7, %%mm0\n"
       
   907 
       
   908       "  pshufw $0xff, %%mm2, %%mm1\n"
       
   909       "  pxor %%mm5, %%mm1\n"
       
   910 
       
   911       MMX_MULDIV255(mm0, mm1)
       
   912 
       
   913       "  paddw %%mm0, %%mm2\n"
       
   914       "  packuswb %%mm2, %%mm2\n"
       
   915 
       
   916       "  movd %%mm2, (%0)\n"
       
   917       "  addl $4, %0\n"
       
   918       "  addl $4, %1\n"
       
   919       "  addl $1, %2\n"
       
   920       "  decl %3\n"
       
   921       "  jnz 1b\n"
       
   922       "  emms\n"
       
   923       :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
       
   924       :
       
   925       :"eax");
       
   926 #endif
       
   927 }
       
   928 OIL_DEFINE_IMPL_FULL (composite_in_over_argb_mmx, composite_in_over_argb, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
       
   929 
       
   930 static void
       
   931 composite_in_over_argb_const_src_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n)
       
   932 {
       
   933 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   934   __asm__ __volatile__ (
       
   935       MMX_LOAD_CONSTANTS
       
   936 
       
   937       "  movd (%1), %%mm3\n"
       
   938       "  punpcklbw %%mm7, %%mm3\n"
       
   939       "1:\n"
       
   940       "  movd (%2), %%mm0\n"
       
   941       "  punpcklbw %%mm7, %%mm0\n"
       
   942       "  pshufw $0x00, %%mm0, %%mm1\n"
       
   943 
       
   944       "  movq %%mm3, %%mm2\n"
       
   945 
       
   946       MMX_MULDIV255(mm2, mm1)
       
   947 
       
   948       "  movd (%0), %%mm0\n"
       
   949       "  punpcklbw %%mm7, %%mm0\n"
       
   950 
       
   951       "  pshufw $0xff, %%mm2, %%mm1\n"
       
   952       "  pxor %%mm5, %%mm1\n"
       
   953 
       
   954       MMX_MULDIV255(mm0, mm1)
       
   955 
       
   956       "  paddw %%mm0, %%mm2\n"
       
   957       "  packuswb %%mm2, %%mm2\n"
       
   958 
       
   959       "  movd %%mm2, (%0)\n"
       
   960       "  addl $4, %0\n"
       
   961       "  addl $1, %2\n"
       
   962       "  decl %3\n"
       
   963       "  jnz 1b\n"
       
   964       "  emms\n"
       
   965       :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
       
   966       :
       
   967       :"eax");
       
   968 #endif
       
   969 }
       
   970 OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_mmx, composite_in_over_argb_const_src, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
       
   971 
       
   972 static void
       
   973 composite_in_over_argb_const_mask_mmx (uint32_t *dest, uint32_t *src, uint8_t *mask, int n)
       
   974 {
       
   975 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   976   __asm__ __volatile__ (
       
   977       MMX_LOAD_CONSTANTS
       
   978       "  movd (%2), %%mm0\n"
       
   979       "  punpcklbw %%mm7, %%mm0\n"
       
   980       "  pshufw $0x00, %%mm0, %%mm3\n"
       
   981 
       
   982       "1:\n"
       
   983       "  movd (%1), %%mm2\n"
       
   984       "  punpcklbw %%mm7, %%mm2\n"
       
   985       "  movq %%mm3, %%mm1\n"
       
   986 
       
   987       MMX_MULDIV255(mm2, mm1)
       
   988 
       
   989       "  movd (%0), %%mm0\n"
       
   990       "  punpcklbw %%mm7, %%mm0\n"
       
   991 
       
   992       "  pshufw $0xff, %%mm2, %%mm1\n"
       
   993       "  pxor %%mm5, %%mm1\n"
       
   994 
       
   995       MMX_MULDIV255(mm0, mm1)
       
   996 
       
   997       "  paddw %%mm0, %%mm2\n"
       
   998       "  packuswb %%mm2, %%mm2\n"
       
   999 
       
  1000       "  movd %%mm2, (%0)\n"
       
  1001       "  addl $4, %0\n"
       
  1002       "  addl $4, %1\n"
       
  1003       "  decl %3\n"
       
  1004       "  jnz 1b\n"
       
  1005       "  emms\n"
       
  1006       :"+r" (dest), "+r" (src), "+r" (mask), "+r" (n)
       
  1007       :
       
  1008       :"eax");
       
  1009 #endif
       
  1010 }
       
  1011 OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_mmx, composite_in_over_argb_const_mask, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
       
  1012 
       
  1013 
       
  1014 
       
  1015 #ifdef	__SYMBIAN32__
       
  1016  
       
  1017 OilFunctionImpl* __oil_function_impl_composite_in_argb_mmx() {
       
  1018 		return &_oil_function_impl_composite_in_argb_mmx;
       
  1019 }
       
  1020 #endif
       
  1021 
       
  1022 #ifdef	__SYMBIAN32__
       
  1023  
       
  1024 OilFunctionImpl* __oil_function_impl_composite_in_argb_mmx, composite_in_argb() {
       
  1025 		return &_oil_function_impl_composite_in_argb_mmx, composite_in_argb;
       
  1026 }
       
  1027 #endif
       
  1028 
       
  1029 #ifdef	__SYMBIAN32__
       
  1030  
       
  1031 OilFunctionImpl* __oil_function_impl_composite_in_argb_mmx2, composite_in_argb() {
       
  1032 		return &_oil_function_impl_composite_in_argb_mmx2, composite_in_argb;
       
  1033 }
       
  1034 #endif
       
  1035 
       
  1036 #ifdef	__SYMBIAN32__
       
  1037  
       
  1038 OilFunctionImpl* __oil_function_impl_composite_in_argb_const_src_mmx, composite_in_argb_const_src() {
       
  1039 		return &_oil_function_impl_composite_in_argb_const_src_mmx, composite_in_argb_const_src;
       
  1040 }
       
  1041 #endif
       
  1042 
       
  1043 #ifdef	__SYMBIAN32__
       
  1044  
       
  1045 OilFunctionImpl* __oil_function_impl_composite_in_argb_const_mask_mmx, composite_in_argb_const_mask() {
       
  1046 		return &_oil_function_impl_composite_in_argb_const_mask_mmx, composite_in_argb_const_mask;
       
  1047 }
       
  1048 #endif
       
  1049 
       
  1050 #ifdef	__SYMBIAN32__
       
  1051  
       
  1052 OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx, composite_over_argb() {
       
  1053 		return &_oil_function_impl_composite_over_argb_mmx, composite_over_argb;
       
  1054 }
       
  1055 #endif
       
  1056 
       
  1057 #ifdef	__SYMBIAN32__
       
  1058  
       
  1059 OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_2, composite_over_argb() {
       
  1060 		return &_oil_function_impl_composite_over_argb_mmx_2, composite_over_argb;
       
  1061 }
       
  1062 #endif
       
  1063 
       
  1064 #ifdef	__SYMBIAN32__
       
  1065  
       
  1066 OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_3, composite_over_argb() {
       
  1067 		return &_oil_function_impl_composite_over_argb_mmx_3, composite_over_argb;
       
  1068 }
       
  1069 #endif
       
  1070 
       
  1071 #ifdef	__SYMBIAN32__
       
  1072  
       
  1073 OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_4, composite_over_argb() {
       
  1074 		return &_oil_function_impl_composite_over_argb_mmx_4, composite_over_argb;
       
  1075 }
       
  1076 #endif
       
  1077 
       
  1078 #ifdef	__SYMBIAN32__
       
  1079  
       
  1080 OilFunctionImpl* __oil_function_impl_composite_over_argb_mmx_5, composite_over_argb() {
       
  1081 		return &_oil_function_impl_composite_over_argb_mmx_5, composite_over_argb;
       
  1082 }
       
  1083 #endif
       
  1084 
       
  1085 #ifdef	__SYMBIAN32__
       
  1086  
       
  1087 OilFunctionImpl* __oil_function_impl_composite_over_argb_sse2, composite_over_argb() {
       
  1088 		return &_oil_function_impl_composite_over_argb_sse2, composite_over_argb;
       
  1089 }
       
  1090 #endif
       
  1091 
       
  1092 #ifdef	__SYMBIAN32__
       
  1093  
       
  1094 OilFunctionImpl* __oil_function_impl_composite_over_argb_sse2_2, composite_over_argb() {
       
  1095 		return &_oil_function_impl_composite_over_argb_sse2_2, composite_over_argb;
       
  1096 }
       
  1097 #endif
       
  1098 
       
  1099 #ifdef	__SYMBIAN32__
       
  1100  
       
  1101 OilFunctionImpl* __oil_function_impl_composite_over_argb_sse2_3, composite_over_argb() {
       
  1102 		return &_oil_function_impl_composite_over_argb_sse2_3, composite_over_argb;
       
  1103 }
       
  1104 #endif
       
  1105 
       
  1106 #ifdef	__SYMBIAN32__
       
  1107  
       
  1108 OilFunctionImpl* __oil_function_impl_composite_over_argb_const_src_mmx, composite_over_argb_const_src() {
       
  1109 		return &_oil_function_impl_composite_over_argb_const_src_mmx, composite_over_argb_const_src;
       
  1110 }
       
  1111 #endif
       
  1112 
       
  1113 #ifdef	__SYMBIAN32__
       
  1114  
       
  1115 OilFunctionImpl* __oil_function_impl_composite_add_argb_mmx, composite_add_argb() {
       
  1116 		return &_oil_function_impl_composite_add_argb_mmx, composite_add_argb;
       
  1117 }
       
  1118 #endif
       
  1119 
       
  1120 #ifdef	__SYMBIAN32__
       
  1121  
       
  1122 OilFunctionImpl* __oil_function_impl_composite_add_argb_const_src_mmx, composite_add_argb_const_src() {
       
  1123 		return &_oil_function_impl_composite_add_argb_const_src_mmx, composite_add_argb_const_src;
       
  1124 }
       
  1125 #endif
       
  1126 
       
  1127 #ifdef	__SYMBIAN32__
       
  1128  
       
  1129 OilFunctionImpl* __oil_function_impl_composite_in_over_argb_mmx, composite_in_over_argb() {
       
  1130 		return &_oil_function_impl_composite_in_over_argb_mmx, composite_in_over_argb;
       
  1131 }
       
  1132 #endif
       
  1133 
       
  1134 #ifdef	__SYMBIAN32__
       
  1135  
       
  1136 OilFunctionImpl* __oil_function_impl_composite_in_over_argb_const_src_mmx, composite_in_over_argb_const_src() {
       
  1137 		return &_oil_function_impl_composite_in_over_argb_const_src_mmx, composite_in_over_argb_const_src;
       
  1138 }
       
  1139 #endif
       
  1140 
       
  1141 #ifdef	__SYMBIAN32__
       
  1142  
       
  1143 OilFunctionImpl* __oil_function_impl_composite_in_over_argb_const_mask_mmx, composite_in_over_argb_const_mask() {
       
  1144 		return &_oil_function_impl_composite_in_over_argb_const_mask_mmx, composite_in_over_argb_const_mask;
       
  1145 }
       
  1146 #endif
       
  1147