genericopenlibs/liboil/src/i386/error8x8_i386.c
changeset 18 47c74d1534e1
equal deleted inserted replaced
0:e4d67989cc36 18:47c74d1534e1
       
     1 /*
       
     2  * LIBOIL - Library of Optimized Inner Loops
       
     3  * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
       
     4  * All rights reserved.
       
     5  *
       
     6  * Redistribution and use in source and binary forms, with or without
       
     7  * modification, are permitted provided that the following conditions
       
     8  * are met:
       
     9  * 1. Redistributions of source code must retain the above copyright
       
    10  *    notice, this list of conditions and the following disclaimer.
       
    11  * 2. Redistributions in binary form must reproduce the above copyright
       
    12  *    notice, this list of conditions and the following disclaimer in the
       
    13  *    documentation and/or other materials provided with the distribution.
       
    14  * 
       
    15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
       
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
       
    17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
       
    19  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
       
    20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
       
    21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       
    22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
       
    23  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
       
    24  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    25  * POSSIBILITY OF SUCH DAMAGE.
       
    26  */
       
    27 //Portions Copyright (c)  2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
       
    28 
       
    29 #ifdef HAVE_CONFIG_H
       
    30 #include "config.h"
       
    31 #endif
       
    32 
       
    33 #include <liboil/liboilfunction.h>
       
    34 
       
    35 OIL_DECLARE_CLASS (err_intra8x8_u8);
       
    36 OIL_DECLARE_CLASS (err_inter8x8_u8);
       
    37 OIL_DECLARE_CLASS (err_inter8x8_u8_avg);
       
    38 
       
    39 static void
       
    40 err_intra8x8_u8_mmx (uint32_t *dest, uint8_t *src1, int ss1)
       
    41 {
       
    42 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
    43   uint32_t  xsum;
       
    44   uint32_t  xxsum;
       
    45 
       
    46   __asm__ __volatile__ (
       
    47     "  pxor        %%mm5, %%mm5     \n\t"
       
    48     "  pxor        %%mm6, %%mm6     \n\t"
       
    49     "  pxor        %%mm7, %%mm7     \n\t"
       
    50     "  mov         $8, %%edi        \n\t"
       
    51     "1:                             \n\t"
       
    52     "  movq        (%2), %%mm0      \n\t"	/* take 8 bytes */
       
    53     "  movq        %%mm0, %%mm2     \n\t"
       
    54 
       
    55     "  punpcklbw   %%mm6, %%mm0     \n\t"
       
    56     "  punpckhbw   %%mm6, %%mm2     \n\t"
       
    57 
       
    58     "  paddw       %%mm0, %%mm5     \n\t"
       
    59     "  paddw       %%mm2, %%mm5     \n\t"
       
    60 
       
    61     "  pmaddwd     %%mm0, %%mm0     \n\t"
       
    62     "  pmaddwd     %%mm2, %%mm2     \n\t"
       
    63     
       
    64     "  paddd       %%mm0, %%mm7     \n\t"
       
    65     "  paddd       %%mm2, %%mm7     \n\t"
       
    66 
       
    67     "  add         %3, %2           \n\t"	/* Inc pointer into src data */
       
    68 
       
    69     "  dec         %%edi            \n\t"
       
    70     "  jnz 1b                       \n\t"
       
    71 
       
    72     "  movq        %%mm5, %%mm0     \n\t"
       
    73     "  psrlq       $32, %%mm5       \n\t"
       
    74     "  paddw       %%mm0, %%mm5     \n\t"
       
    75     "  movq        %%mm5, %%mm0     \n\t"
       
    76     "  psrlq       $16, %%mm5       \n\t"
       
    77     "  paddw       %%mm0, %%mm5     \n\t"
       
    78     "  movd        %%mm5, %%edi     \n\t"
       
    79     "  movswl       %%di, %%edi      \n\t"
       
    80     "  movl        %%edi, %0        \n\t"
       
    81 
       
    82     "  movq        %%mm7, %%mm0     \n\t"
       
    83     "  psrlq       $32, %%mm7       \n\t"
       
    84     "  paddd       %%mm0, %%mm7     \n\t"
       
    85     "  movd        %%mm7, %1        \n\t"
       
    86     "  emms                         \n\t"
       
    87 
       
    88      : "=r" (xsum),
       
    89        "=r" (xxsum),
       
    90        "+r" (src1) 
       
    91      : "r" (ss1)
       
    92      : "edi", "memory"
       
    93   );
       
    94 
       
    95   /* Compute population variance as mis-match metric. */
       
    96   *dest = (((xxsum<<6) - xsum*xsum)); 
       
    97 #endif
       
    98 }
       
    99 OIL_DEFINE_IMPL_FULL (err_intra8x8_u8_mmx, err_intra8x8_u8, OIL_IMPL_FLAG_MMX);
       
   100 
       
   101 static void
       
   102 err_inter8x8_u8_mmx (uint32_t *dest, uint8_t *src1, int ss1, uint8_t *src2, int ss2)
       
   103 {
       
   104   uint32_t  xsum;
       
   105   uint32_t  xxsum;
       
   106 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   107   __asm__ __volatile__ (
       
   108     "  pxor        %%mm5, %%mm5     \n\t"
       
   109     "  pxor        %%mm6, %%mm6     \n\t"
       
   110     "  pxor        %%mm7, %%mm7     \n\t"
       
   111     "  mov         $8, %%edi        \n\t"
       
   112     "1:                             \n\t"
       
   113     "  movq        (%2), %%mm0      \n\t"	/* take 8 bytes */
       
   114     "  movq        (%3), %%mm1      \n\t"
       
   115     "  movq        %%mm0, %%mm2     \n\t"
       
   116     "  movq        %%mm1, %%mm3     \n\t"
       
   117 
       
   118     "  punpcklbw   %%mm6, %%mm0     \n\t"
       
   119     "  punpcklbw   %%mm6, %%mm1     \n\t"
       
   120     "  punpckhbw   %%mm6, %%mm2     \n\t"
       
   121     "  punpckhbw   %%mm6, %%mm3     \n\t"
       
   122 
       
   123     "  psubsw      %%mm1, %%mm0     \n\t"
       
   124     "  psubsw      %%mm3, %%mm2     \n\t"
       
   125 
       
   126     "  paddw       %%mm0, %%mm5     \n\t"
       
   127     "  paddw       %%mm2, %%mm5     \n\t"
       
   128 
       
   129     "  pmaddwd     %%mm0, %%mm0     \n\t"
       
   130     "  pmaddwd     %%mm2, %%mm2     \n\t"
       
   131     
       
   132     "  paddd       %%mm0, %%mm7     \n\t"
       
   133     "  paddd       %%mm2, %%mm7     \n\t"
       
   134 
       
   135     "  add         %4, %2           \n\t"	/* Inc pointer into src data */
       
   136     "  add         %5, %3           \n\t"	/* Inc pointer into ref data */
       
   137 
       
   138     "  dec         %%edi            \n\t"
       
   139     "  jnz 1b                       \n\t"
       
   140 
       
   141     "  movq        %%mm5, %%mm0     \n\t"
       
   142     "  psrlq       $32, %%mm5       \n\t"
       
   143     "  paddw       %%mm0, %%mm5     \n\t"
       
   144     "  movq        %%mm5, %%mm0     \n\t"
       
   145     "  psrlq       $16, %%mm5       \n\t"
       
   146     "  paddw       %%mm0, %%mm5     \n\t"
       
   147     "  movd        %%mm5, %%edi     \n\t"
       
   148     "  movswl       %%di, %%edi      \n\t"
       
   149     "  movl        %%edi, %0        \n\t"
       
   150 
       
   151     "  movq        %%mm7, %%mm0     \n\t"
       
   152     "  psrlq       $32, %%mm7       \n\t"
       
   153     "  paddd       %%mm0, %%mm7     \n\t"
       
   154     "  movd        %%mm7, %1        \n\t"
       
   155     "  emms                         \n\t"
       
   156 
       
   157      : "=m" (xsum),
       
   158        "=m" (xxsum),
       
   159        "+r" (src1), 
       
   160        "+r" (src2) 
       
   161      : "m" (ss1),
       
   162        "m" (ss2)
       
   163      : "edi", "memory"
       
   164   );
       
   165 
       
   166   /* Compute and return population variance as mis-match metric. */
       
   167   *dest = (((xxsum<<6) - xsum*xsum));
       
   168 #endif
       
   169 }
       
   170 OIL_DEFINE_IMPL_FULL (err_inter8x8_u8_mmx, err_inter8x8_u8, OIL_IMPL_FLAG_MMX);
       
   171 
       
   172 static void
       
   173 err_inter8x8_u8_avg_mmx (uint32_t *dest, uint8_t *src1, int ss1, uint8_t *src2, uint8_t *src3, int ss2)
       
   174 {
       
   175 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   176   uint32_t xsum;
       
   177   uint32_t xxsum;
       
   178 
       
   179   __asm__ __volatile__ (
       
   180     "  pcmpeqd     %%mm4, %%mm4     \n\t"	/* fefefefefefefefe in mm4 */
       
   181     "  paddb       %%mm4, %%mm4     \n\t"
       
   182     "  pxor        %%mm5, %%mm5     \n\t"
       
   183     "  pxor        %%mm6, %%mm6     \n\t"
       
   184     "  pxor        %%mm7, %%mm7     \n\t"
       
   185     "  mov         $8, %%edi        \n\t"
       
   186     "1:                             \n\t"
       
   187     "  movq        (%2), %%mm0      \n\t"	/* take 8 bytes */
       
   188 
       
   189     "  movq        (%3), %%mm2      \n\t"
       
   190     "  movq        (%4), %%mm3      \n\t"	/* take average of mm2 and mm3 */
       
   191     "  movq        %%mm2, %%mm1     \n\t"
       
   192     "  pand        %%mm3, %%mm1     \n\t"
       
   193     "  pxor        %%mm2, %%mm3     \n\t"
       
   194     "  pand        %%mm4, %%mm3     \n\t"
       
   195     "  psrlq       $1, %%mm3        \n\t"
       
   196     "  paddb       %%mm3, %%mm1     \n\t"
       
   197 
       
   198     "  movq        %%mm0, %%mm2     \n\t"
       
   199     "  movq        %%mm1, %%mm3     \n\t"
       
   200 
       
   201     "  punpcklbw   %%mm6, %%mm0     \n\t"
       
   202     "  punpcklbw   %%mm6, %%mm1     \n\t"
       
   203     "  punpckhbw   %%mm6, %%mm2     \n\t"
       
   204     "  punpckhbw   %%mm6, %%mm3     \n\t"
       
   205 
       
   206     "  psubsw      %%mm1, %%mm0     \n\t"
       
   207     "  psubsw      %%mm3, %%mm2     \n\t"
       
   208 
       
   209     "  paddw       %%mm0, %%mm5     \n\t"
       
   210     "  paddw       %%mm2, %%mm5     \n\t"
       
   211 
       
   212     "  pmaddwd     %%mm0, %%mm0     \n\t"
       
   213     "  pmaddwd     %%mm2, %%mm2     \n\t"
       
   214     
       
   215     "  paddd       %%mm0, %%mm7     \n\t"
       
   216     "  paddd       %%mm2, %%mm7     \n\t"
       
   217 
       
   218     "  add         %5, %2           \n\t"	/* Inc pointer into src data */
       
   219     "  add         %6, %3           \n\t"	/* Inc pointer into ref data */
       
   220     "  add         %6, %4           \n\t"	/* Inc pointer into ref data */
       
   221 
       
   222     "  dec         %%edi            \n\t"
       
   223     "  jnz 1b                       \n\t"
       
   224 
       
   225     "  movq        %%mm5, %%mm0     \n\t"
       
   226     "  psrlq       $32, %%mm5       \n\t"
       
   227     "  paddw       %%mm0, %%mm5     \n\t"
       
   228     "  movq        %%mm5, %%mm0     \n\t"
       
   229     "  psrlq       $16, %%mm5       \n\t"
       
   230     "  paddw       %%mm0, %%mm5     \n\t"
       
   231     "  movd        %%mm5, %%edi     \n\t"
       
   232     "  movswl       %%di, %%edi      \n\t"
       
   233     "  movl        %%edi, %0        \n\t"
       
   234 
       
   235     "  movq        %%mm7, %%mm0     \n\t"
       
   236     "  psrlq       $32, %%mm7       \n\t"
       
   237     "  paddd       %%mm0, %%mm7     \n\t"
       
   238     "  movd        %%mm7, %1        \n\t"
       
   239     "  emms                         \n\t"
       
   240 
       
   241      : "=m" (xsum),
       
   242        "=m" (xxsum),
       
   243        "+r" (src1), 
       
   244        "+r" (src2),
       
   245        "+r" (src3) 
       
   246      : "m" (ss1),
       
   247        "m" (ss2)
       
   248      : "edi", "memory"
       
   249   );
       
   250 
       
   251   /* Compute and return population variance as mis-match metric. */
       
   252   *dest = (((xxsum<<6) - xsum*xsum));
       
   253 #endif
       
   254 }
       
   255 
       
   256 OIL_DEFINE_IMPL_FULL (err_inter8x8_u8_avg_mmx, err_inter8x8_u8_avg, OIL_IMPL_FLAG_MMX);
       
   257  
       
   258 #ifdef ENABLE_BROKEN_IMPLS
       
   259 static void
       
   260 err_inter8x8_u8_avg_mmxext (uint32_t *dest, uint8_t *src1, int ss1, uint8_t *src2, uint8_t *src3, int ss2)
       
   261 {
       
   262   uint32_t xsum;
       
   263   uint32_t xxsum;
       
   264 
       
   265   __asm__ __volatile__ (
       
   266     "  pxor        %%mm4, %%mm4     \n\t"
       
   267     "  pxor        %%mm5, %%mm5     \n\t"
       
   268     "  mov $0x01010101, %%edi \n\t"
       
   269     "  movd %%edi, %%mm6 \n\t"
       
   270     "  punpcklbw %%mm6, %%mm6 \n\t"
       
   271     "  pxor        %%mm7, %%mm7     \n\t"
       
   272     "  mov         $8, %%edi        \n\t"
       
   273     "1:                             \n\t"
       
   274     "  movq        (%2), %%mm0      \n\t"	/* take 8 bytes */
       
   275 
       
   276     "  movq        (%3), %%mm2      \n\t"
       
   277     "  movq        (%4), %%mm1      \n\t"	/* take average of mm2 and mm1 */
       
   278     "  movq        %%mm1, %%mm3     \n\t"
       
   279     "  pavgb       %%mm2, %%mm1     \n\t"
       
   280     "  pxor        %%mm2, %%mm3     \n\t"
       
   281     "  pand        %%mm6, %%mm3     \n\t"
       
   282     "  psubb       %%mm3, %%mm1     \n\t"
       
   283 
       
   284     "  movq        %%mm0, %%mm2     \n\t"
       
   285     "  movq        %%mm1, %%mm3     \n\t"
       
   286 
       
   287     "  punpcklbw   %%mm4, %%mm0     \n\t"
       
   288     "  punpcklbw   %%mm4, %%mm1     \n\t"
       
   289     "  punpckhbw   %%mm4, %%mm2     \n\t"
       
   290     "  punpckhbw   %%mm4, %%mm3     \n\t"
       
   291 
       
   292     "  psubsw      %%mm1, %%mm0     \n\t"
       
   293     "  psubsw      %%mm3, %%mm2     \n\t"
       
   294 
       
   295     "  paddw       %%mm0, %%mm5     \n\t"
       
   296     "  paddw       %%mm2, %%mm5     \n\t"
       
   297 
       
   298     "  pmaddwd     %%mm0, %%mm0     \n\t"
       
   299     "  pmaddwd     %%mm2, %%mm2     \n\t"
       
   300     
       
   301     "  paddd       %%mm0, %%mm7     \n\t"
       
   302     "  paddd       %%mm2, %%mm7     \n\t"
       
   303 
       
   304     "  add         %5, %2           \n\t"	/* Inc pointer into src data */
       
   305     "  add         %6, %3           \n\t"	/* Inc pointer into ref data */
       
   306     "  add         %6, %4           \n\t"	/* Inc pointer into ref data */
       
   307 
       
   308     "  dec         %%edi            \n\t"
       
   309     "  jnz 1b                       \n\t"
       
   310 
       
   311     "  movq        %%mm5, %%mm0     \n\t"
       
   312     "  psrlq       $32, %%mm5       \n\t"
       
   313     "  paddw       %%mm0, %%mm5     \n\t"
       
   314     "  movq        %%mm5, %%mm0     \n\t"
       
   315     "  psrlq       $16, %%mm5       \n\t"
       
   316     "  paddw       %%mm0, %%mm5     \n\t"
       
   317     "  movd        %%mm5, %%edi     \n\t"
       
   318     "  movswl       %%di, %%edi      \n\t"
       
   319     "  movl        %%edi, %0        \n\t"
       
   320 
       
   321     "  movq        %%mm7, %%mm0     \n\t"
       
   322     "  psrlq       $32, %%mm7       \n\t"
       
   323     "  paddd       %%mm0, %%mm7     \n\t"
       
   324     "  movd        %%mm7, %1        \n\t"
       
   325     "  emms                         \n\t"
       
   326 
       
   327      : "=m" (xsum),
       
   328        "=m" (xxsum),
       
   329        "+r" (src1), 
       
   330        "+r" (src2),
       
   331        "+r" (src3) 
       
   332      : "m" (ss1),
       
   333        "m" (ss2)
       
   334      : "edi", "memory"
       
   335   );
       
   336 
       
   337   /* Compute and return population variance as mis-match metric. */
       
   338   *dest = (((xxsum<<6) - xsum*xsum));
       
   339 }
       
   340 
       
   341 OIL_DEFINE_IMPL_FULL (err_inter8x8_u8_avg_mmxext, err_inter8x8_u8_avg, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
       
   342 #endif
       
   343 
       
   344 
       
   345 
       
   346 #ifdef	__SYMBIAN32__
       
   347  
       
   348 OilFunctionImpl* __oil_function_impl_err_intra8x8_u8_mmx, err_intra8x8_u8() {
       
   349 		return &_oil_function_impl_err_intra8x8_u8_mmx, err_intra8x8_u8;
       
   350 }
       
   351 #endif
       
   352 
       
   353 #ifdef	__SYMBIAN32__
       
   354  
       
   355 OilFunctionImpl* __oil_function_impl_err_inter8x8_u8_mmx, err_inter8x8_u8() {
       
   356 		return &_oil_function_impl_err_inter8x8_u8_mmx, err_inter8x8_u8;
       
   357 }
       
   358 #endif
       
   359 
       
   360 #ifdef	__SYMBIAN32__
       
   361  
       
   362 OilFunctionImpl* __oil_function_impl_err_inter8x8_u8_avg_mmx, err_inter8x8_u8_avg() {
       
   363 		return &_oil_function_impl_err_inter8x8_u8_avg_mmx, err_inter8x8_u8_avg;
       
   364 }
       
   365 #endif
       
   366 
       
   367 #ifdef	__SYMBIAN32__
       
   368  
       
   369 OilFunctionImpl* __oil_function_impl_err_inter8x8_u8_avg_mmxext, err_inter8x8_u8_avg() {
       
   370 		return &_oil_function_impl_err_inter8x8_u8_avg_mmxext, err_inter8x8_u8_avg;
       
   371 }
       
   372 #endif
       
   373