genericopenlibs/liboil/src/i386/diff8x8_i386.c
changeset 18 47c74d1534e1
equal deleted inserted replaced
0:e4d67989cc36 18:47c74d1534e1
       
     1 /*
       
     2  * LIBOIL - Library of Optimized Inner Loops
       
     3  * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
       
     4  * All rights reserved.
       
     5  *
       
     6  * Redistribution and use in source and binary forms, with or without
       
     7  * modification, are permitted provided that the following conditions
       
     8  * are met:
       
     9  * 1. Redistributions of source code must retain the above copyright
       
    10  *    notice, this list of conditions and the following disclaimer.
       
    11  * 2. Redistributions in binary form must reproduce the above copyright
       
    12  *    notice, this list of conditions and the following disclaimer in the
       
    13  *    documentation and/or other materials provided with the distribution.
       
    14  * 
       
    15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
       
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
       
    17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
       
    19  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
       
    20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
       
    21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       
    22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
       
    23  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
       
    24  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    25  * POSSIBILITY OF SUCH DAMAGE.
       
    26  */
       
    27 //Portions Copyright (c)  2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
       
    28 
       
    29 #ifdef HAVE_CONFIG_H
       
    30 #include "config.h"
       
    31 #endif
       
    32 
       
    33 #include <liboil/liboilfunction.h>
       
    34 
       
    35 OIL_DECLARE_CLASS (diff8x8_s16_u8);
       
    36 OIL_DECLARE_CLASS (diff8x8_const128_s16_u8);
       
    37 OIL_DECLARE_CLASS (diff8x8_average_s16_u8);
       
    38 
       
    39 
       
    40 static void
       
    41 diff8x8_s16_u8_mmx (int16_t *dest, uint8_t *src1, int ss1, uint8_t *src2, int ss2)
       
    42 {
       
    43 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
    44   __asm__ __volatile__ (
       
    45     "  pxor        %%mm7, %%mm7     \n\t" 
       
    46 
       
    47 #define LOOP \
       
    48     "  movq        (%0), %%mm0      \n\t" /* mm0 = FiltPtr */ \
       
    49     "  movq        (%1), %%mm1      \n\t" /* mm1 = ReconPtr */ \
       
    50     "  movq        %%mm0, %%mm2     \n\t" /* dup to prepare for up conversion */ \
       
    51     "  movq        %%mm1, %%mm3     \n\t" /* dup to prepare for up conversion */ \
       
    52     /* convert from UINT8 to INT16 */ \
       
    53     "  punpcklbw   %%mm7, %%mm0     \n\t" /* mm0 = INT16(FiltPtr) */ \
       
    54     "  punpcklbw   %%mm7, %%mm1     \n\t" /* mm1 = INT16(ReconPtr) */ \
       
    55     "  punpckhbw   %%mm7, %%mm2     \n\t" /* mm2 = INT16(FiltPtr) */ \
       
    56     "  punpckhbw   %%mm7, %%mm3     \n\t" /* mm3 = INT16(ReconPtr) */ \
       
    57     /* start calculation */ \
       
    58     "  psubw       %%mm1, %%mm0     \n\t" /* mm0 = FiltPtr - ReconPtr */ \
       
    59     "  psubw       %%mm3, %%mm2     \n\t" /* mm2 = FiltPtr - ReconPtr */ \
       
    60     "  movq        %%mm0,  (%2)     \n\t" /* write answer out */ \
       
    61     "  movq        %%mm2, 8(%2)     \n\t" /* write answer out */ \
       
    62     /* Increment pointers */ \
       
    63     "  add         $16, %2          \n\t" \
       
    64     "  add         %3, %0           \n\t" \
       
    65     "  add         %4, %1           \n\t"
       
    66 
       
    67     LOOP
       
    68     LOOP
       
    69     LOOP
       
    70     LOOP
       
    71     LOOP
       
    72     LOOP
       
    73     LOOP
       
    74     LOOP
       
    75 #undef LOOP
       
    76 
       
    77     "  emms                         \n\t"
       
    78 
       
    79      : "+r" (src1),
       
    80        "+r" (src2),
       
    81        "+r" (dest)
       
    82      : "m" (ss1),
       
    83        "m" (ss2) 
       
    84      : "memory"
       
    85   );
       
    86 #endif  
       
    87 }
       
    88 OIL_DEFINE_IMPL_FULL (diff8x8_s16_u8_mmx, diff8x8_s16_u8, OIL_IMPL_FLAG_MMX);
       
    89 
       
    90 static void
       
    91 diff8x8_const128_s16_u8_mmx (int16_t *dest, uint8_t *src1, int ss1)
       
    92 {
       
    93 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
    94   const int16_t tmp[4] = { 0x0080, 0x0080, 0x0080, 0x0080 };
       
    95 
       
    96   __asm__ __volatile__ (
       
    97     "  pxor        %%mm7, %%mm7     \n\t" 
       
    98     "  movq        (%3), %%mm1  \n\t"
       
    99 
       
   100 #define LOOP \
       
   101     "  movq        (%0), %%mm0      \n\t" /* mm0 = FiltPtr */ \
       
   102     "  movq        %%mm0, %%mm2     \n\t" /* dup to prepare for up conversion */ \
       
   103     /* convert from UINT8 to INT16 */ \
       
   104     "  punpcklbw   %%mm7, %%mm0     \n\t" /* mm0 = INT16(FiltPtr) */ \
       
   105     "  punpckhbw   %%mm7, %%mm2     \n\t" /* mm2 = INT16(FiltPtr) */ \
       
   106     /* start calculation */ \
       
   107     "  psubw       %%mm1, %%mm0     \n\t" /* mm0 = FiltPtr - 128 */ \
       
   108     "  psubw       %%mm1, %%mm2     \n\t" /* mm2 = FiltPtr - 128 */ \
       
   109     "  movq        %%mm0,  (%1)     \n\t" /* write answer out */ \
       
   110     "  movq        %%mm2, 8(%1)     \n\t" /* write answer out */ \
       
   111     /* Increment pointers */ \
       
   112     "  add         $16, %1           \n\t" \
       
   113     "  add         %2, %0           \n\t"
       
   114 
       
   115     LOOP
       
   116     LOOP
       
   117     LOOP
       
   118     LOOP
       
   119     LOOP
       
   120     LOOP
       
   121     LOOP
       
   122     LOOP
       
   123 #undef LOOP
       
   124 
       
   125     "  emms                         \n\t"
       
   126 
       
   127      : "+r" (src1),
       
   128        "+r" (dest)
       
   129      : "r" (ss1),
       
   130        "r" (tmp)
       
   131      : "memory"
       
   132   );
       
   133 #endif  
       
   134 }
       
   135 OIL_DEFINE_IMPL_FULL (diff8x8_const128_s16_u8_mmx, diff8x8_const128_s16_u8, OIL_IMPL_FLAG_MMX);
       
   136 
       
   137 static void
       
   138 diff8x8_average_s16_u8_mmx (int16_t *dest, uint8_t *src1, int ss1, uint8_t *src2, int ss2, uint8_t *src3)
       
   139 {
       
   140 #if !defined(__WINSCW__) && !defined(__WINS__)      
       
   141   __asm__ __volatile__ (
       
   142     "  pxor        %%mm7, %%mm7     \n\t" 
       
   143 
       
   144 #define LOOP \
       
   145     "  movq        (%0), %%mm0      \n\t" /* mm0 = FiltPtr */ \
       
   146     "  movq        (%1), %%mm1      \n\t" /* mm1 = ReconPtr1 */ \
       
   147     "  movq        (%2), %%mm4      \n\t" /* mm1 = ReconPtr2 */ \
       
   148     "  movq        %%mm0, %%mm2     \n\t" /* dup to prepare for up conversion */ \
       
   149     "  movq        %%mm1, %%mm3     \n\t" /* dup to prepare for up conversion */ \
       
   150     "  movq        %%mm4, %%mm5     \n\t" /* dup to prepare for up conversion */ \
       
   151     /* convert from UINT8 to INT16 */ \
       
   152     "  punpcklbw   %%mm7, %%mm0     \n\t" /* mm0 = INT16(FiltPtr) */ \
       
   153     "  punpcklbw   %%mm7, %%mm1     \n\t" /* mm1 = INT16(ReconPtr1) */ \
       
   154     "  punpcklbw   %%mm7, %%mm4     \n\t" /* mm1 = INT16(ReconPtr2) */ \
       
   155     "  punpckhbw   %%mm7, %%mm2     \n\t" /* mm2 = INT16(FiltPtr) */ \
       
   156     "  punpckhbw   %%mm7, %%mm3     \n\t" /* mm3 = INT16(ReconPtr1) */ \
       
   157     "  punpckhbw   %%mm7, %%mm5     \n\t" /* mm3 = INT16(ReconPtr2) */ \
       
   158     /* average ReconPtr1 and ReconPtr2 */ \
       
   159     "  paddw       %%mm4, %%mm1     \n\t" /* mm1 = ReconPtr1 + ReconPtr2 */ \
       
   160     "  paddw       %%mm5, %%mm3     \n\t" /* mm3 = ReconPtr1 + ReconPtr2 */ \
       
   161     "  psrlw       $1, %%mm1        \n\t" /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */ \
       
   162     "  psrlw       $1, %%mm3        \n\t" /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */ \
       
   163     "  psubw       %%mm1, %%mm0     \n\t" /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ \
       
   164     "  psubw       %%mm3, %%mm2     \n\t" /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ \
       
   165     "  movq        %%mm0,  (%3)     \n\t" /* write answer out */ \
       
   166     "  movq        %%mm2, 8(%3)     \n\t" /* write answer out */ \
       
   167     /* Increment pointers */ \
       
   168     "  add         $16, %3           \n\t" \
       
   169     "  add         %4, %0           \n\t" \
       
   170     "  add         %5, %1           \n\t" \
       
   171     "  add         %5, %2           \n\t"
       
   172 
       
   173     LOOP
       
   174     LOOP
       
   175     LOOP
       
   176     LOOP
       
   177     LOOP
       
   178     LOOP
       
   179     LOOP
       
   180     LOOP
       
   181 #undef LOOP
       
   182 
       
   183     "  emms                         \n\t"
       
   184 
       
   185      : "+r" (src1),
       
   186        "+r" (src2),
       
   187        "+r" (src3),
       
   188        "+r" (dest)
       
   189      : "m" (ss1),
       
   190        "m" (ss2) 
       
   191      : "memory"
       
   192   );
       
   193 #endif  
       
   194 }
       
   195 OIL_DEFINE_IMPL_FULL (diff8x8_average_s16_u8_mmx, diff8x8_average_s16_u8, OIL_IMPL_FLAG_MMX);
       
   196 
       
   197 
       
   198 
       
   199 #ifdef	__SYMBIAN32__
       
   200  
       
   201 OilFunctionImpl* __oil_function_impl_diff8x8_s16_u8_mmx, diff8x8_s16_u8() {
       
   202 		return &_oil_function_impl_diff8x8_s16_u8_mmx, diff8x8_s16_u8;
       
   203 }
       
   204 #endif
       
   205 
       
   206 #ifdef	__SYMBIAN32__
       
   207  
       
   208 OilFunctionImpl* __oil_function_impl_diff8x8_const128_s16_u8_mmx, diff8x8_const128_s16_u8() {
       
   209 		return &_oil_function_impl_diff8x8_const128_s16_u8_mmx, diff8x8_const128_s16_u8;
       
   210 }
       
   211 #endif
       
   212 
       
   213 #ifdef	__SYMBIAN32__
       
   214  
       
   215 OilFunctionImpl* __oil_function_impl_diff8x8_average_s16_u8_mmx, diff8x8_average_s16_u8() {
       
   216 		return &_oil_function_impl_diff8x8_average_s16_u8_mmx, diff8x8_average_s16_u8;
       
   217 }
       
   218 #endif
       
   219